コード例 #1
0
ファイル: worker.py プロジェクト: apocsve/robinhood-scrapper
def fetch_quote(
    symbols: str,
    collection: pymongo.collection.Collection,
    sleep,
    worker_request_cooldown_seconds=1.0,
):
    print(" [fetch_quote] symbols %r" % symbols)

    if symbols == "__DONE":
        print(
            "Received DONE message for quote fetching; marking as complete in Redis..."
        )
        set_quotes_finished()
        return

    try:
        url = f"{endpoints.quotes()}?symbols={symbols}"
        res = requests.get(url, headers=TRADER.headers, timeout=15)
        res = res.json()
        quotes = res["results"]
        store_quotes(quotes, collection)

        sleep(worker_request_cooldown_seconds)
    except KeyError:  # Likely a ratelimit issue; cooldown.
        if not res.get("detail"):
            print("ERROR: Unexpected response received from quote request: {}".
                  format(res))
            sleep(120)
            return

        cooldown_seconds = parse_throttle_res(res["detail"])
        print("Quote fetch request failed; waiting for {} second cooldown...".
              format(cooldown_seconds))
        sleep(cooldown_seconds)

        fetch_quote(
            symbols,
            collection,
            sleep,
            worker_request_cooldown_seconds=worker_request_cooldown_seconds,
        )
    except InvalidTickerSymbol:
        print("Error while fetching symbols: {}".format(symbols))
    except requests.exceptions.ReadTimeout:
        print(
            "Read timeout while fetching quotes... Sleeping 30 seconds and re-trying."
        )
        sleep(30)
        fetch_quote(
            symbols,
            collection,
            sleep,
            worker_request_cooldown_seconds=worker_request_cooldown_seconds,
        )
コード例 #2
0
def fetch_fundamentals(
    instrument_ids: str,
    collection: pymongo.collection.Collection,
    sleep,
    worker_request_cooldown_seconds=1.0,
):
    if instrument_ids == "__DONE":
        print("Received DONE message for fundamentals fetching.")
        return

    try:
        instrument_urls = ",".join(
            list(map(build_instrument_url, instrument_ids.split(","))))
        url = f"https://api.robinhood.com/fundamentals/?instruments={instrument_urls}"
        res = requests.get(url, headers=TRADER.headers, timeout=15)
        res = res.json()
        fundamentals = res["results"]
        store_fundamentals(fundamentals, collection)

        sleep(worker_request_cooldown_seconds)
    except KeyError:  # Likely a ratelimit issue; cooldown.
        if not res.get("detail"):
            print(
                "ERROR: Unexpected response received from fundamentals request: {}"
                .format(res))
            sleep(120)
            return

        cooldown_seconds = parse_throttle_res(res["detail"])
        print(
            "Fundamentals fetch request failed; waiting for {} second cooldown..."
            .format(cooldown_seconds))
        sleep(cooldown_seconds)

        fetch_fundamentals(
            instrument_ids,
            collection,
            sleep,
            worker_request_cooldown_seconds=worker_request_cooldown_seconds,
        )
    except InvalidTickerSymbol:
        print("Error while fetching instrument ids: {}".format(instrument_ids))
    except requests.exceptions.ReadTimeout:
        print(
            "Read timeout while fetching quotes... Sleeping 30 seconds and re-trying."
        )
        sleep(30)
        fetch_fundamentals(
            instrument_ids,
            collection,
            sleep,
            worker_request_cooldown_seconds=worker_request_cooldown_seconds,
        )
コード例 #3
0
def cli(rabbitmq_host: str, rabbitmq_port: int,
        scraper_request_cooldown_seconds: float):
    print("init rabbitmq connection")
    rabbitmq_connection = pika.BlockingConnection(
        pika.ConnectionParameters(host=rabbitmq_host, port=rabbitmq_port))
    rabbitmq_channel = rabbitmq_connection.channel()
    rabbitmq_channel.queue_declare(queue="instrument_ids")
    print("rabbitmq connection init'd")

    # Lock and flush the existing cache
    print("Locking the cache in preparation for update...")
    set_update_started()

    trader = Robinhood()
    res = trader.get_url("https://api.robinhood.com/instruments/")

    db = get_db()
    index_coll = db["index"]
    index_coll.create_index("instrument_id", unique=True)

    total_ids = 0
    quotes = []
    instrument_ids = []
    while True:
        fetched_instruments: List[Dict[str, str]] = res["results"]
        tradable_instruments = get_tradable_instruments(fetched_instruments)

        for instrument_datum in tradable_instruments:
            total_ids += 1
            try_update_instrument(index_coll, instrument_datum)

            instrument_ids.append(instrument_datum["id"])
            quotes.append(instrument_datum["symbol"])

            if len(quotes) == 20:
                publish_quotes_and_instrument_ids(rabbitmq_channel, quotes,
                                                  instrument_ids)

                quotes = []
                instrument_ids = []

        if res.get("detail"):
            # Request was throttled; wait for a cooldown before continuing

            cooldown_seconds = parse_throttle_res(res["detail"])
            print(
                "Instruments fetch request failed; waiting for {} second cooldown..."
                .format(cooldown_seconds))
            sleep(cooldown_seconds)
        elif res.get("next"):
            # There are more instruments to scrape.  Wait for the standard cooldown and then
            # continue by fetching the next request url.

            sleep(scraper_request_cooldown_seconds)
            res = trader.get_url(res["next"])
        else:
            # We're done scraping; there are no more instruments in the list.
            publish_quotes_and_instrument_ids(rabbitmq_channel, quotes,
                                              instrument_ids)

            # Publish a finished message over the channels to indicate that there are no more
            # items to process in this run.
            rabbitmq_channel.basic_publish(exchange="",
                                           routing_key="symbols",
                                           body="__DONE")
            rabbitmq_channel.basic_publish(exchange="",
                                           routing_key="instrument_ids",
                                           body="__DONE")

            # Mark the instrument scrape as finished
            set_instruments_finished()

            print(
                "Finished scraping; fetched a total of {} tradable instrument IDs."
                .format(total_ids))
            break

    rabbitmq_connection.close()
コード例 #4
0
def fetch_popularity(
    instrument_ids: str,
    collection: pymongo.collection.Collection,
    sleep,
    worker_request_cooldown_seconds=1.0,
):
    if instrument_ids == "__DONE":
        print(
            "Received DONE message for popularity fetching; marking as complete in Redis..."
        )
        set_popularities_finished()
        return

    url = "https://api.robinhood.com/instruments/popularity/?ids={}".format(
        instrument_ids)

    def reduce_popularity(acc: dict, datum: dict) -> dict:
        instrument_id = parse_instrument_url(datum["instrument"])

        return {**acc, instrument_id: datum["num_open_positions"]}

    def call_self():
        """ In the case of some kind of error, wait 30 seconds and then re-call ourself to try
        again. """

        sleep(30)
        fetch_popularity(
            instrument_ids,
            collection,
            sleep,
            worker_request_cooldown_seconds=worker_request_cooldown_seconds,
        )

    try:
        res = TRADER.get_url(url)
        popularities = reduce(reduce_popularity, res["results"], {})
        store_popularities(popularities, collection)
        sleep(worker_request_cooldown_seconds)
    except KeyError:  # Likely a ratelimit issue; cooldown.
        if not res.get("results"):
            print(
                "ERROR: Unexpected response received from popularity request: {}"
                .format(res))
            sleep(120)
            return

        print(res)
        cooldown_seconds = parse_throttle_res(res["detail"])
        print(
            "Popularity fetch request failed; waiting for {} second cooldown..."
            .format(cooldown_seconds))
        sleep(cooldown_seconds)

        fetch_popularity(
            instrument_ids,
            collection,
            sleep,
            worker_request_cooldown_seconds=worker_request_cooldown_seconds,
        )
    except requests.exceptions.ReadTimeout:
        print(
            "Read timeout while fetching popularity... Sleeping 30 seconds and re-trying."
        )
        call_self()
    except TypeError:  # They sent back some broken data; just ignore it.
        print("Robinhood sent back garbage; ignoring.")
        call_self()
    except JSONDecodeError:
        print("Robinhood API sending back HTML; backing off.")
        call_self()