def import_orders( ctx: Context, admin_url: str, ) -> Iterator[Records[ShopifyOrder]]: _, _, shop_name = split_admin_url(admin_url) url, auth = url_and_auth_from_admin_url(admin_url) endpoint_url = url + "/orders.json" latest_updated_at = ctx.get_state_value( "latest_updated_at") or DEFAULT_MIN_DATE params = { "order": "updated_at asc", "updated_at_min": latest_updated_at, "status": "any", "limit": 250, } conn = JsonHttpApiConnection() while ctx.should_continue(): resp = conn.get(endpoint_url, params, auth=auth) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["orders"] if len(records) == 0: # All done break new_latest_updated_at = max([o["updated_at"] for o in records]) ctx.emit_state({"latest_updated_at": new_latest_updated_at}) yield records # Shopify has cursor-based pagination now, so we can safely paginate results next_page = get_next_page_link(resp.headers) if not next_page: # No more pages break endpoint_url = next_page params = {}
def marketstack_import_eod_prices( ctx: Context, tickers_input: Optional[Reference[Ticker]], access_key: str, from_date: date = MIN_DATE, tickers: Optional[List] = None, ) -> Iterator[Records[EodPrice]]: # access_key = ctx.get_param("access_key") use_https = False # TODO: when do we want this True? default_from_date = from_date assert access_key is not None if tickers_input is not None: tickers = list(tickers_input.as_dataframe()["symbol"]) if not tickers: return ticker_latest_dates_imported = ( ctx.get_state_value("ticker_latest_dates_imported") or {}) conn = JsonHttpApiConnection(date_format="%Y-%m-%d") if use_https: endpoint_url = HTTPS_MARKETSTACK_API_BASE_URL + "eod" else: endpoint_url = MARKETSTACK_API_BASE_URL + "eod" for ticker in tickers: assert isinstance(ticker, str) latest_date_imported = ensure_date( ticker_latest_dates_imported.get(ticker, default_from_date)) max_date = latest_date_imported params = { "limit": 1000, "offset": 0, "access_key": access_key, "symbols": ticker, "date_from": latest_date_imported, } while ctx.should_continue(): resp = conn.get(endpoint_url, params) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done break yield records # Update state max_date = max(max_date, max(ensure_date(r["date"]) for r in records)) ticker_latest_dates_imported[ticker] = max_date + timedelta(days=1) ctx.emit_state_value("ticker_latest_dates_imported", ticker_latest_dates_imported) # Setup for next page params["offset"] = params["offset"] + len(records)
def alphavantage_import_eod_prices( ctx: Context, tickers_input: Optional[Reference[Ticker]], api_key: str, tickers: Optional[List] = None, ) -> Iterator[Records[AlphavantageEodPrice]]: assert api_key is not None tickers = prepare_tickers(tickers, tickers_input) if not tickers: return None ticker_latest_dates_imported = ( ctx.get_state_value("ticker_latest_dates_imported") or {}) conn = JsonHttpApiConnection() def fetch_prices(params: Dict, tries: int = 0) -> Optional[Records]: if tries > 2: return None resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params, stream=True) try: record = resp.json() # Json response means error if is_alphavantage_error(record): # TODO: Log this failure? print(f"Error for {params} {record}") return None if is_alphavantage_rate_limit(record): time.sleep(60) return fetch_prices(params, tries=tries + 1) except: pass # print(resp.raw.read().decode("utf8")) # resp.raw.seek(0) records = list(read_csv(resp.iter_lines())) return records for ticker in tickers: assert isinstance(ticker, str) latest_date_imported = ensure_datetime( ticker_latest_dates_imported.get(ticker, MIN_DATETIME)) assert latest_date_imported is not None if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1): # Only check once a day continue params = prepare_params_for_ticker(ticker, ticker_latest_dates_imported) params["apikey"] = api_key records = fetch_prices(params) if records: # Symbol not included for r in records: r["symbol"] = ticker yield records # Update state ticker_latest_dates_imported[ticker] = utcnow() ctx.emit_state_value("ticker_latest_dates_imported", ticker_latest_dates_imported) if not ctx.should_continue(): break
def import_observations( ctx: Context, api_key: str, series_id: str, ) -> Iterator[Records[FredObservation]]: latest_fetched_at = ctx.get_state_value("latest_fetched_at") if latest_fetched_at: # Two year curing window (to be safe) obs_start = ensure_datetime(latest_fetched_at) - timedelta(days=365 * 2) else: obs_start = MIN_DATE params = { "file_type": "json", "api_key": api_key, "series_id": series_id, "observation_start": obs_start, "offset": 0, "limit": 100000, } conn = JsonHttpApiConnection(date_format="%Y-%m-%d") endpoint_url = FRED_API_BASE_URL + "series/observations" while True: resp = conn.get(endpoint_url, params) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["observations"] if len(records) == 0: # All done break for r in records: r["series_id"] = params[ "series_id"] # Add series ID to data so we know what the data is r["value"] = ( None if r["value"] == "." else r["value"] ) # FRED quirk, returns empty decimal number "." instead of null yield records num_returned = len(records) if num_returned < json_resp["limit"]: # we got back less than limit, so must be done (no other way to tell?) break params["offset"] += num_returned # We only update date if we have fetched EVERYTHING available as of now ctx.emit_state_value("latest_fetched_at", utcnow())
def marketstack_import_tickers( ctx: Context, access_key: str, exchanges: List = ["XNYS", "XNAS"], ) -> Iterator[Records[MarketstackTicker]]: use_https = False # TODO: when do we want this True? # default_from_date = ctx.get_param("from_date", MIN_DATE) assert access_key is not None assert isinstance(exchanges, list) last_imported_at = ensure_datetime( ctx.get_state_value("last_imported_at") or "2020-01-01 00:00:00") assert last_imported_at is not None last_imported_at = ensure_utc(last_imported_at) if utcnow() - last_imported_at < timedelta(days=1): # TODO: from config return conn = JsonHttpApiConnection() if use_https: endpoint_url = HTTPS_MARKETSTACK_API_BASE_URL + "tickers" else: endpoint_url = MARKETSTACK_API_BASE_URL + "tickers" for exchange in exchanges: params = { "limit": 1000, "offset": 0, "access_key": access_key, "exchange": exchange, } while ctx.should_continue(): resp = conn.get(endpoint_url, params) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done break # Add a flattened exchange indicator for r in records: r["exchange_acronym"] = r.get("stock_exchange", {}).get("acronym") yield records # Setup for next page params["offset"] = params["offset"] + len(records) ctx.emit_state_value("last_imported_at", utcnow())
def alphavantage_import_company_overview( ctx: Context, tickers_input: Optional[Reference[Ticker]], api_key: str, tickers: Optional[List] = None, ) -> Iterator[Records[AlphavantageCompanyOverview]]: assert api_key is not None tickers = prepare_tickers(tickers, tickers_input) if tickers is None: # We didn't get an input block for tickers AND # the config is empty, so we are done return None ticker_latest_dates_imported = ( ctx.get_state_value("ticker_latest_dates_imported") or {}) conn = JsonHttpApiConnection() batch_size = 50 records = [] tickers_updated = [] def fetch_overview(params: Dict, tries: int = 0) -> Optional[Dict]: if tries > 2: return None resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params) record = resp.json() # Alphavantage returns 200 and json error message on failure if is_alphavantage_error(record): # TODO: Log this failure? # print(f"Error for ticker {params['symbol']}: {record}") return None if is_alphavantage_rate_limit(record): time.sleep(20) return fetch_overview(params, tries=tries + 1) return record for i, ticker in enumerate(tickers): assert isinstance(ticker, str) latest_date_imported = ensure_datetime( ticker_latest_dates_imported.get(ticker, MIN_DATETIME)) assert latest_date_imported is not None # Refresh at most once a day # TODO: make this configurable instead of hard-coded 1 day if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1): continue params = { "apikey": api_key, "symbol": ticker, "function": "OVERVIEW", } record = fetch_overview(params) if not record: continue # Clean up json keys to be more DB friendly record = {title_to_snake_case(k): v for k, v in record.items()} records.append(record) tickers_updated.append(ticker) if len(records) >= batch_size or i == len(tickers) - 1: yield records # Update state for updated_ticker in tickers_updated: ticker_latest_dates_imported[updated_ticker] = utcnow() ctx.emit_state_value("ticker_latest_dates_imported", ticker_latest_dates_imported) if not ctx.should_continue(): break records = [] tickers_updated = []
def import_subscription_items( ctx: Context, api_key: str, curing_window_days: int = 90 ) -> Iterator[Records[StripeSubscriptionItemRaw]]: """ # TODO: repeated code """ latest_full_import_at = ctx.get_state_value("latest_full_import_at") latest_full_import_at = ensure_datetime(latest_full_import_at) current_starting_after = ctx.get_state_value("current_starting_after") params = { "limit": 100, "status": "all", } # if earliest_created_at_imported <= latest_full_import_at - timedelta(days=int(curing_window_days)): if latest_full_import_at and curing_window_days: # Import only more recent than latest imported at date, offset by a curing window # (default 90 days) to capture updates to objects (refunds, etc) params["created[gte]"] = int( (latest_full_import_at - timedelta(days=int(curing_window_days))).timestamp()) if current_starting_after: params["starting_after"] = current_starting_after conn = JsonHttpApiConnection() endpoint_url = STRIPE_API_BASE_URL + "subscriptions" all_done = False while ctx.should_continue(): resp = conn.get(endpoint_url, params, auth=HTTPBasicAuth(api_key, "")) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done all_done = True break for record in records: item_params = { "limit": 100, "subscription": record["id"], } while True: items_url = STRIPE_API_BASE_URL + "subscription_items" items_resp = conn.get(items_url, item_params, auth=HTTPBasicAuth(api_key, "")) items_json_resp = items_resp.json() assert isinstance(items_json_resp, dict) items = items_json_resp["data"] if len(items) == 0: # All done break yield items if not items_json_resp.get("has_more"): break latest_item_id = items[-1]["id"] item_params["starting_after"] = latest_item_id if not ctx.should_continue(): break latest_object_id = records[-1]["id"] if not json_resp.get("has_more"): all_done = True break params["starting_after"] = latest_object_id ctx.emit_state_value("current_starting_after", latest_object_id) else: # Don't update any state, we just timed out return # We only update state if we have fetched EVERYTHING available as of now if all_done: ctx.emit_state_value("latest_imported_at", utcnow()) # IMPORTANT: we reset the starting after cursor so we start from the beginning again on next run ctx.emit_state_value("current_starting_after", None)
def stripe_importer( endpoint: str, ctx: Context, api_key: str, curing_window_days: int = None, extra_params: Dict = None, ): """ Stripe only allows fetching records in one order: from newest to oldest, so we use its cursor based pagination to iterate once through all historical. Stripe doesn't have a way to request by "updated at" times, so we must refresh old records according to our own logic, using a "curing window" to re-import records up to one year (the default) old. """ latest_full_import_at = ctx.get_state_value("latest_full_import_at") latest_full_import_at = ensure_datetime(latest_full_import_at) current_starting_after = ctx.get_state_value("current_starting_after") params = { "limit": 100, } if extra_params: params.update(extra_params) # if earliest_created_at_imported <= latest_full_import_at - timedelta(days=int(curing_window_days)): if latest_full_import_at and curing_window_days: # Import only more recent than latest imported at date, offset by a curing window # (default 90 days) to capture updates to objects (refunds, etc) params["created[gte]"] = int( (latest_full_import_at - timedelta(days=int(curing_window_days))).timestamp()) if current_starting_after: params["starting_after"] = current_starting_after conn = JsonHttpApiConnection() endpoint_url = STRIPE_API_BASE_URL + endpoint all_done = False while ctx.should_continue(): resp = conn.get(endpoint_url, params, auth=HTTPBasicAuth(api_key, "")) json_resp = resp.json() assert isinstance(json_resp, dict) records = json_resp["data"] if len(records) == 0: # All done all_done = True break # Return acutal data yield records latest_object_id = records[-1]["id"] if not json_resp.get("has_more"): all_done = True break params["starting_after"] = latest_object_id ctx.emit_state_value("current_starting_after", latest_object_id) else: # Don't update any state, we just timed out return # We only update state if we have fetched EVERYTHING available as of now if all_done: ctx.emit_state_value("latest_imported_at", utcnow()) # IMPORTANT: we reset the starting after cursor so we start from the beginning again on next run ctx.emit_state_value("current_starting_after", None)