def fetch_quote( symbols: str, collection: pymongo.collection.Collection, sleep, worker_request_cooldown_seconds=1.0, ): print(" [fetch_quote] symbols %r" % symbols) if symbols == "__DONE": print( "Received DONE message for quote fetching; marking as complete in Redis..." ) set_quotes_finished() return try: url = f"{endpoints.quotes()}?symbols={symbols}" res = requests.get(url, headers=TRADER.headers, timeout=15) res = res.json() quotes = res["results"] store_quotes(quotes, collection) sleep(worker_request_cooldown_seconds) except KeyError: # Likely a ratelimit issue; cooldown. if not res.get("detail"): print("ERROR: Unexpected response received from quote request: {}". format(res)) sleep(120) return cooldown_seconds = parse_throttle_res(res["detail"]) print("Quote fetch request failed; waiting for {} second cooldown...". format(cooldown_seconds)) sleep(cooldown_seconds) fetch_quote( symbols, collection, sleep, worker_request_cooldown_seconds=worker_request_cooldown_seconds, ) except InvalidTickerSymbol: print("Error while fetching symbols: {}".format(symbols)) except requests.exceptions.ReadTimeout: print( "Read timeout while fetching quotes... Sleeping 30 seconds and re-trying." ) sleep(30) fetch_quote( symbols, collection, sleep, worker_request_cooldown_seconds=worker_request_cooldown_seconds, )
def fetch_fundamentals( instrument_ids: str, collection: pymongo.collection.Collection, sleep, worker_request_cooldown_seconds=1.0, ): if instrument_ids == "__DONE": print("Received DONE message for fundamentals fetching.") return try: instrument_urls = ",".join( list(map(build_instrument_url, instrument_ids.split(",")))) url = f"{instrument_urls}" res = requests.get(url, headers=TRADER.headers, timeout=15) res = res.json() fundamentals = res["results"] store_fundamentals(fundamentals, collection) sleep(worker_request_cooldown_seconds) except KeyError: # Likely a ratelimit issue; cooldown. if not res.get("detail"): print( "ERROR: Unexpected response received from fundamentals request: {}" .format(res)) sleep(120) return cooldown_seconds = parse_throttle_res(res["detail"]) print( "Fundamentals fetch request failed; waiting for {} second cooldown..." .format(cooldown_seconds)) sleep(cooldown_seconds) fetch_fundamentals( instrument_ids, collection, sleep, worker_request_cooldown_seconds=worker_request_cooldown_seconds, ) except InvalidTickerSymbol: print("Error while fetching instrument ids: {}".format(instrument_ids)) except requests.exceptions.ReadTimeout: print( "Read timeout while fetching quotes... Sleeping 30 seconds and re-trying." ) sleep(30) fetch_fundamentals( instrument_ids, collection, sleep, worker_request_cooldown_seconds=worker_request_cooldown_seconds, )
def cli(rabbitmq_host: str, rabbitmq_port: int, scraper_request_cooldown_seconds: float): print("init rabbitmq connection") rabbitmq_connection = pika.BlockingConnection( pika.ConnectionParameters(host=rabbitmq_host, port=rabbitmq_port)) rabbitmq_channel = rabbitmq_channel.queue_declare(queue="instrument_ids") print("rabbitmq connection init'd") # Lock and flush the existing cache print("Locking the cache in preparation for update...") set_update_started() trader = Robinhood() res = trader.get_url("") db = get_db() index_coll = db["index"] index_coll.create_index("instrument_id", unique=True) total_ids = 0 quotes = [] instrument_ids = [] while True: fetched_instruments: List[Dict[str, str]] = res["results"] tradable_instruments = get_tradable_instruments(fetched_instruments) for instrument_datum in tradable_instruments: total_ids += 1 try_update_instrument(index_coll, instrument_datum) instrument_ids.append(instrument_datum["id"]) quotes.append(instrument_datum["symbol"]) if len(quotes) == 20: publish_quotes_and_instrument_ids(rabbitmq_channel, quotes, instrument_ids) quotes = [] instrument_ids = [] if res.get("detail"): # Request was throttled; wait for a cooldown before continuing cooldown_seconds = parse_throttle_res(res["detail"]) print( "Instruments fetch request failed; waiting for {} second cooldown..." .format(cooldown_seconds)) sleep(cooldown_seconds) elif res.get("next"): # There are more instruments to scrape. Wait for the standard cooldown and then # continue by fetching the next request url. sleep(scraper_request_cooldown_seconds) res = trader.get_url(res["next"]) else: # We're done scraping; there are no more instruments in the list. publish_quotes_and_instrument_ids(rabbitmq_channel, quotes, instrument_ids) # Publish a finished message over the channels to indicate that there are no more # items to process in this run. rabbitmq_channel.basic_publish(exchange="", routing_key="symbols", body="__DONE") rabbitmq_channel.basic_publish(exchange="", routing_key="instrument_ids", body="__DONE") # Mark the instrument scrape as finished set_instruments_finished() print( "Finished scraping; fetched a total of {} tradable instrument IDs." .format(total_ids)) break rabbitmq_connection.close()
def fetch_popularity( instrument_ids: str, collection: pymongo.collection.Collection, sleep, worker_request_cooldown_seconds=1.0, ): if instrument_ids == "__DONE": print( "Received DONE message for popularity fetching; marking as complete in Redis..." ) set_popularities_finished() return url = "{}".format( instrument_ids) def reduce_popularity(acc: dict, datum: dict) -> dict: instrument_id = parse_instrument_url(datum["instrument"]) return {**acc, instrument_id: datum["num_open_positions"]} def call_self(): """ In the case of some kind of error, wait 30 seconds and then re-call ourself to try again. """ sleep(30) fetch_popularity( instrument_ids, collection, sleep, worker_request_cooldown_seconds=worker_request_cooldown_seconds, ) try: res = TRADER.get_url(url) popularities = reduce(reduce_popularity, res["results"], {}) store_popularities(popularities, collection) sleep(worker_request_cooldown_seconds) except KeyError: # Likely a ratelimit issue; cooldown. if not res.get("results"): print( "ERROR: Unexpected response received from popularity request: {}" .format(res)) sleep(120) return print(res) cooldown_seconds = parse_throttle_res(res["detail"]) print( "Popularity fetch request failed; waiting for {} second cooldown..." .format(cooldown_seconds)) sleep(cooldown_seconds) fetch_popularity( instrument_ids, collection, sleep, worker_request_cooldown_seconds=worker_request_cooldown_seconds, ) except requests.exceptions.ReadTimeout: print( "Read timeout while fetching popularity... Sleeping 30 seconds and re-trying." ) call_self() except TypeError: # They sent back some broken data; just ignore it. print("Robinhood sent back garbage; ignoring.") call_self() except JSONDecodeError: print("Robinhood API sending back HTML; backing off.") call_self()