Beispiel #1
0
def get_articles(matches=(), sender=None):
    req = requests.get("https://www.avanza.se/placera/telegram.plc.html")
    req.raise_for_status()
    tree = html.fromstring(req.content)
    items = tree.xpath(
        '//ul[@class="feedArticleList XSText"]/li[@class="item"]/a')
    for item in items:
        url_path = item.attrib["href"]
        absolute_url = "https://avanza.se{}".format(url_path)
        headline_date = item.find('span').text_content().lstrip().rstrip()
        headline = item.find('div').text_content()
        headline_split = headline.split(":")
        if len(headline_split) == 1:
            headline_topic = "unknown"
            headline_message = headline_split[0].lstrip().rstrip()
        else:
            headline_topic = headline_split[0].lower().lstrip().rstrip()
            headline_message = headline_split[1].lstrip().rstrip()
        if len(matches) == 0 or headline_topic in matches:
            session = Session()
            article = NewsArticle(headline_topic, absolute_url,
                                  headline_message, headline_date)
            try:
                if not NewsArticleSeen.is_seen(session, article, sender):
                    yield article
                    NewsArticleSeen.mark_as_seen(session, article, sender)
                    session.commit()
            except Exception as e:
                LOGGER.exception(
                    "Something went wrong when fetching news articles")
            finally:
                session.close()
Beispiel #2
0
def stock_scrape_task(*args, **kwargs):
    currency = args[0].upper()
    segment = " ".join(args[1:])
    service = kwargs.get('service')
    session = Session()
    scraped = 0

    # TODO: not so pretty but what to do when there's no universal ticker naming scheme
    prefix_wrapper = {'SEK': 'STO'}
    prefix = prefix_wrapper.get(currency, None)

    try:
        result = session.query(NasdaqCompany.ticker)\
            .filter(NasdaqCompany.segment == segment)\
            .filter(NasdaqCompany.currency == currency)

        for r in result:

            # delete the record first if already existing
            try:
                session.query(StockDomain).filter(
                    StockDomain.ticker == r.ticker).delete()
                session.commit()
            except Exception as e:
                LOGGER.exception("failed to delete stock ticker '{}'".format(
                    r.ticker))
                session.rollback()

            # fetch the updated quote from interweb
            try:
                if prefix is not None:
                    ticker = "{}:{}".format(prefix, r.ticker)
                else:
                    ticker = r.ticker
                quote = service.get_quote(ticker)
                stock = StockDomain()
                stock.from_google_finance_quote(quote)
                session.add(stock)
                session.commit()
            except Exception as e:
                LOGGER.exception(
                    "failed to fetch and store stock ticker '{}'".format(
                        r.ticker))
            else:
                scraped += 1

            # arbitrary sleep, avoid getting us blocked, rate-limited etc
            time.sleep(3)

    except Exception as e:
        LOGGER.exception("failed to scrape stocks")
        return "Failed to scrape stocks"
    else:
        return "Done scraping segment '{segment}' currency '{currency}' - scraped {scraped} companies".format(
            segment=segment, currency=currency, scraped=scraped)
    finally:
        session.close()
Beispiel #3
0
def add_quote_hint(*args, **kwargs):
    provider = args[0]
    dst_ticker = args[1]
    src_text = " ".join(args[2:])
    session = Session()
    try:
        hint = ProviderHints(provider=provider, src=src_text, dst=dst_ticker)
        session.add(hint)
        session.commit()
        return "Added hint"
    except Exception as e:
        LOGGER.exception("failed to add hint")
    finally:
        session.close()
Beispiel #4
0
def remove_quote_hint(*args, **kwargs):
    provider = args[0]
    dst_ticker = args[1]
    session = Session()
    try:
        hint = session.query(ProviderHints).filter(and_(ProviderHints.provider == provider, ProviderHints.dst == dst_ticker)).one_or_none()
        if hint:
            session.delete(hint)
            session.commit()
            return "Removed hint"
        else:
            return "No matching hint to remove"
    except Exception as e:
        LOGGER.exception("failed to remove hint")
    finally:
        session.close()
Beispiel #5
0
def list_quote_hint(*args, **kwargs):
    provider = args[0]
    session = Session()
    try:
        result = []
        for hint in session.query(ProviderHints).filter(ProviderHints.provider == provider).all():
            result.append("Provider: {}, Ticker: {}, Free-text: {}".format(hint.provider, hint.dst, hint.src))
        if len(result) > 0:
            return result
        else:
            return "no hints found"
    except Exception as e:
        LOGGER.exception("failed to list hints")
        return "broken"
    finally:
        session.close()
Beispiel #6
0
def ticker_hint(provider, ticker):
    session = None
    try:
        session = Session()
        hint_ticker = session.query(ProviderHints).filter(
            and_(ProviderHints.provider == provider, ProviderHints.src == ticker)) \
            .one_or_none()
        if hint_ticker:
            return hint_ticker.dst
        else:
            return ticker
    except Exception as e:
        LOGGER.exception("failed to query hints", e)
        return ticker
    finally:
        if session:
            session.close()
Beispiel #7
0
def stock_analytics_top(*args, **kwargs):
    rv = []

    if len(args) < 2:
        return ["Error: need moar args"]

    try:
        count = int(args[0])
    except ValueError:
        rv.append("Error: {} is not a number sherlock".format(args[0]))
        count = 5

    try:
        sort_field_name = args[1]
        filter_by = getattr(StockDomain, sort_field_name)
    except AttributeError:
        return ["Error: '{}' is not a valid field".format(args[1])]

    sort_descending = len(args) == 3 and args[2] == "desc"
    session = Session()

    if sort_descending:
        tmp = getattr(StockDomain, sort_field_name)
        order_by = getattr(tmp, "desc")()
    else:
        order_by = filter_by

    try:
        result = session.query(StockDomain)\
            .filter(filter_by != 0.0)\
            .order_by(order_by).limit(count)
        rv.extend(["Top {}: Ticker: {}, Name: {}, Value: {}".format(i + 1, x.ticker, x.name,
                                                                    getattr(x, sort_field_name))
                   for i, x in enumerate(result)])
    except Exception as e:
        LOGGER.exception("failed to query stockdomain for top '{}'".format(sort_field_name))
    finally:
        session.close()
        if len(rv) == 0:
            rv.append("Nothing found")
        return rv
Beispiel #8
0
def nasdaq_scraper_task(*args, **kwargs):
    session = Session()
    nasdaq_scraper = NasdaqIndexScraper()
    try:
        session.query(NasdaqCompany).delete()
        session.commit()
    except Exception as e:
        LOGGER.exception("Failed to delete all records")
        session.rollback()
    else:
        try:
            rv = []
            for index in nasdaq_scraper.indexes.keys():
                rv.extend(nasdaq_scraper.scrape(index))
            session.add_all(rv)
            session.commit()
            return "Scraped {} companies from Nasdaq".format(len(rv))
        except Exception as e:
            LOGGER.exception("Failed to fetch and store nasdaq companies")
            session.rollback()
    finally:
        session.close()
Beispiel #9
0
def scrape_stats(*args, **kwargs):
    session = Session()
    result = session.query(NasdaqCompany.segment, func.count(NasdaqCompany.segment)).group_by(NasdaqCompany.segment)\
        .all()
    return "Scraped: {}".format(", ".join(
        ["{k}={v}".format(k=x[0], v=x[1]) for x in result]))
Beispiel #10
0
    def setUp(self):

        self.ircbot = FakeIrcBot()
        self.service = FakeQuoteService()
        self.session = Session()
        create_tables()