Esempio n. 1
0
def import_orders(
    ctx: Context,
    admin_url: str,
) -> Iterator[Records[ShopifyOrder]]:
    _, _, shop_name = split_admin_url(admin_url)
    url, auth = url_and_auth_from_admin_url(admin_url)
    endpoint_url = url + "/orders.json"
    latest_updated_at = ctx.get_state_value(
        "latest_updated_at") or DEFAULT_MIN_DATE

    params = {
        "order": "updated_at asc",
        "updated_at_min": latest_updated_at,
        "status": "any",
        "limit": 250,
    }
    conn = JsonHttpApiConnection()
    while ctx.should_continue():
        resp = conn.get(endpoint_url, params, auth=auth)
        json_resp = resp.json()
        assert isinstance(json_resp, dict)
        records = json_resp["orders"]
        if len(records) == 0:
            # All done
            break
        new_latest_updated_at = max([o["updated_at"] for o in records])
        ctx.emit_state({"latest_updated_at": new_latest_updated_at})
        yield records
        # Shopify has cursor-based pagination now, so we can safely paginate results
        next_page = get_next_page_link(resp.headers)
        if not next_page:
            # No more pages
            break
        endpoint_url = next_page
        params = {}
Esempio n. 2
0
def marketstack_import_eod_prices(
    ctx: Context,
    tickers_input: Optional[Reference[Ticker]],
    access_key: str,
    from_date: date = MIN_DATE,
    tickers: Optional[List] = None,
) -> Iterator[Records[EodPrice]]:
    # access_key = ctx.get_param("access_key")
    use_https = False  # TODO: when do we want this True?
    default_from_date = from_date
    assert access_key is not None
    if tickers_input is not None:
        tickers = list(tickers_input.as_dataframe()["symbol"])
    if not tickers:
        return
    ticker_latest_dates_imported = (
        ctx.get_state_value("ticker_latest_dates_imported") or {})
    conn = JsonHttpApiConnection(date_format="%Y-%m-%d")
    if use_https:
        endpoint_url = HTTPS_MARKETSTACK_API_BASE_URL + "eod"
    else:
        endpoint_url = MARKETSTACK_API_BASE_URL + "eod"
    for ticker in tickers:
        assert isinstance(ticker, str)
        latest_date_imported = ensure_date(
            ticker_latest_dates_imported.get(ticker, default_from_date))
        max_date = latest_date_imported
        params = {
            "limit": 1000,
            "offset": 0,
            "access_key": access_key,
            "symbols": ticker,
            "date_from": latest_date_imported,
        }
        while ctx.should_continue():
            resp = conn.get(endpoint_url, params)
            json_resp = resp.json()
            assert isinstance(json_resp, dict)
            records = json_resp["data"]
            if len(records) == 0:
                # All done
                break
            yield records
            # Update state
            max_date = max(max_date,
                           max(ensure_date(r["date"]) for r in records))
            ticker_latest_dates_imported[ticker] = max_date + timedelta(days=1)
            ctx.emit_state_value("ticker_latest_dates_imported",
                                 ticker_latest_dates_imported)
            # Setup for next page
            params["offset"] = params["offset"] + len(records)
Esempio n. 3
0
def alphavantage_import_eod_prices(
    ctx: Context,
    tickers_input: Optional[Reference[Ticker]],
    api_key: str,
    tickers: Optional[List] = None,
) -> Iterator[Records[AlphavantageEodPrice]]:
    assert api_key is not None
    tickers = prepare_tickers(tickers, tickers_input)
    if not tickers:
        return None
    ticker_latest_dates_imported = (
        ctx.get_state_value("ticker_latest_dates_imported") or {})
    conn = JsonHttpApiConnection()

    def fetch_prices(params: Dict, tries: int = 0) -> Optional[Records]:
        if tries > 2:
            return None
        resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params, stream=True)
        try:
            record = resp.json()
            # Json response means error
            if is_alphavantage_error(record):
                # TODO: Log this failure?
                print(f"Error for {params} {record}")
                return None
            if is_alphavantage_rate_limit(record):
                time.sleep(60)
                return fetch_prices(params, tries=tries + 1)
        except:
            pass
        # print(resp.raw.read().decode("utf8"))
        # resp.raw.seek(0)
        records = list(read_csv(resp.iter_lines()))
        return records

    for ticker in tickers:
        assert isinstance(ticker, str)
        latest_date_imported = ensure_datetime(
            ticker_latest_dates_imported.get(ticker, MIN_DATETIME))
        assert latest_date_imported is not None
        if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1):
            # Only check once a day
            continue
        params = prepare_params_for_ticker(ticker,
                                           ticker_latest_dates_imported)
        params["apikey"] = api_key
        records = fetch_prices(params)
        if records:
            # Symbol not included
            for r in records:
                r["symbol"] = ticker
            yield records
        # Update state
        ticker_latest_dates_imported[ticker] = utcnow()
        ctx.emit_state_value("ticker_latest_dates_imported",
                             ticker_latest_dates_imported)
        if not ctx.should_continue():
            break
Esempio n. 4
0
def import_observations(
    ctx: Context,
    api_key: str,
    series_id: str,
) -> Iterator[Records[FredObservation]]:
    latest_fetched_at = ctx.get_state_value("latest_fetched_at")
    if latest_fetched_at:
        # Two year curing window (to be safe)
        obs_start = ensure_datetime(latest_fetched_at) - timedelta(days=365 *
                                                                   2)
    else:
        obs_start = MIN_DATE
    params = {
        "file_type": "json",
        "api_key": api_key,
        "series_id": series_id,
        "observation_start": obs_start,
        "offset": 0,
        "limit": 100000,
    }
    conn = JsonHttpApiConnection(date_format="%Y-%m-%d")
    endpoint_url = FRED_API_BASE_URL + "series/observations"
    while True:
        resp = conn.get(endpoint_url, params)
        json_resp = resp.json()
        assert isinstance(json_resp, dict)
        records = json_resp["observations"]
        if len(records) == 0:
            # All done
            break
        for r in records:
            r["series_id"] = params[
                "series_id"]  # Add series ID to data so we know what the data is
            r["value"] = (
                None if r["value"] == "." else r["value"]
            )  # FRED quirk, returns empty decimal number "." instead of null
        yield records
        num_returned = len(records)
        if num_returned < json_resp["limit"]:
            # we got back less than limit, so must be done (no other way to tell?)
            break
        params["offset"] += num_returned
    # We only update date if we have fetched EVERYTHING available as of now
    ctx.emit_state_value("latest_fetched_at", utcnow())
Esempio n. 5
0
def marketstack_import_tickers(
    ctx: Context,
    access_key: str,
    exchanges: List = ["XNYS", "XNAS"],
) -> Iterator[Records[MarketstackTicker]]:
    use_https = False  # TODO: when do we want this True?
    # default_from_date = ctx.get_param("from_date", MIN_DATE)
    assert access_key is not None
    assert isinstance(exchanges, list)
    last_imported_at = ensure_datetime(
        ctx.get_state_value("last_imported_at") or "2020-01-01 00:00:00")
    assert last_imported_at is not None
    last_imported_at = ensure_utc(last_imported_at)
    if utcnow() - last_imported_at < timedelta(days=1):  # TODO: from config
        return
    conn = JsonHttpApiConnection()
    if use_https:
        endpoint_url = HTTPS_MARKETSTACK_API_BASE_URL + "tickers"
    else:
        endpoint_url = MARKETSTACK_API_BASE_URL + "tickers"
    for exchange in exchanges:
        params = {
            "limit": 1000,
            "offset": 0,
            "access_key": access_key,
            "exchange": exchange,
        }
        while ctx.should_continue():
            resp = conn.get(endpoint_url, params)
            json_resp = resp.json()
            assert isinstance(json_resp, dict)
            records = json_resp["data"]
            if len(records) == 0:
                # All done
                break
            # Add a flattened exchange indicator
            for r in records:
                r["exchange_acronym"] = r.get("stock_exchange",
                                              {}).get("acronym")
            yield records
            # Setup for next page
            params["offset"] = params["offset"] + len(records)
    ctx.emit_state_value("last_imported_at", utcnow())
Esempio n. 6
0
def alphavantage_import_company_overview(
    ctx: Context,
    tickers_input: Optional[Reference[Ticker]],
    api_key: str,
    tickers: Optional[List] = None,
) -> Iterator[Records[AlphavantageCompanyOverview]]:
    assert api_key is not None
    tickers = prepare_tickers(tickers, tickers_input)
    if tickers is None:
        # We didn't get an input block for tickers AND
        # the config is empty, so we are done
        return None
    ticker_latest_dates_imported = (
        ctx.get_state_value("ticker_latest_dates_imported") or {})
    conn = JsonHttpApiConnection()
    batch_size = 50
    records = []
    tickers_updated = []

    def fetch_overview(params: Dict, tries: int = 0) -> Optional[Dict]:
        if tries > 2:
            return None
        resp = conn.get(ALPHAVANTAGE_API_BASE_URL, params)
        record = resp.json()
        # Alphavantage returns 200 and json error message on failure
        if is_alphavantage_error(record):
            # TODO: Log this failure?
            # print(f"Error for ticker {params['symbol']}: {record}")
            return None
        if is_alphavantage_rate_limit(record):
            time.sleep(20)
            return fetch_overview(params, tries=tries + 1)
        return record

    for i, ticker in enumerate(tickers):
        assert isinstance(ticker, str)
        latest_date_imported = ensure_datetime(
            ticker_latest_dates_imported.get(ticker, MIN_DATETIME))
        assert latest_date_imported is not None
        # Refresh at most once a day
        # TODO: make this configurable instead of hard-coded 1 day
        if utcnow() - ensure_utc(latest_date_imported) < timedelta(days=1):
            continue
        params = {
            "apikey": api_key,
            "symbol": ticker,
            "function": "OVERVIEW",
        }
        record = fetch_overview(params)
        if not record:
            continue

        # Clean up json keys to be more DB friendly
        record = {title_to_snake_case(k): v for k, v in record.items()}
        records.append(record)
        tickers_updated.append(ticker)
        if len(records) >= batch_size or i == len(tickers) - 1:
            yield records
            # Update state
            for updated_ticker in tickers_updated:
                ticker_latest_dates_imported[updated_ticker] = utcnow()
                ctx.emit_state_value("ticker_latest_dates_imported",
                                     ticker_latest_dates_imported)
            if not ctx.should_continue():
                break
            records = []
            tickers_updated = []
Esempio n. 7
0
def import_subscription_items(
    ctx: Context,
    api_key: str,
    curing_window_days: int = 90
) -> Iterator[Records[StripeSubscriptionItemRaw]]:
    """
    # TODO: repeated code
    """
    latest_full_import_at = ctx.get_state_value("latest_full_import_at")
    latest_full_import_at = ensure_datetime(latest_full_import_at)
    current_starting_after = ctx.get_state_value("current_starting_after")
    params = {
        "limit": 100,
        "status": "all",
    }
    # if earliest_created_at_imported <= latest_full_import_at - timedelta(days=int(curing_window_days)):
    if latest_full_import_at and curing_window_days:
        # Import only more recent than latest imported at date, offset by a curing window
        # (default 90 days) to capture updates to objects (refunds, etc)
        params["created[gte]"] = int(
            (latest_full_import_at -
             timedelta(days=int(curing_window_days))).timestamp())
    if current_starting_after:
        params["starting_after"] = current_starting_after
    conn = JsonHttpApiConnection()
    endpoint_url = STRIPE_API_BASE_URL + "subscriptions"
    all_done = False
    while ctx.should_continue():
        resp = conn.get(endpoint_url, params, auth=HTTPBasicAuth(api_key, ""))
        json_resp = resp.json()
        assert isinstance(json_resp, dict)
        records = json_resp["data"]
        if len(records) == 0:
            # All done
            all_done = True
            break

        for record in records:
            item_params = {
                "limit": 100,
                "subscription": record["id"],
            }
            while True:
                items_url = STRIPE_API_BASE_URL + "subscription_items"
                items_resp = conn.get(items_url,
                                      item_params,
                                      auth=HTTPBasicAuth(api_key, ""))
                items_json_resp = items_resp.json()
                assert isinstance(items_json_resp, dict)
                items = items_json_resp["data"]
                if len(items) == 0:
                    # All done
                    break
                yield items
                if not items_json_resp.get("has_more"):
                    break
                latest_item_id = items[-1]["id"]
                item_params["starting_after"] = latest_item_id
            if not ctx.should_continue():
                break

        latest_object_id = records[-1]["id"]
        if not json_resp.get("has_more"):
            all_done = True
            break
        params["starting_after"] = latest_object_id
        ctx.emit_state_value("current_starting_after", latest_object_id)
    else:
        # Don't update any state, we just timed out
        return
    # We only update state if we have fetched EVERYTHING available as of now
    if all_done:
        ctx.emit_state_value("latest_imported_at", utcnow())
        # IMPORTANT: we reset the starting after cursor so we start from the beginning again on next run
        ctx.emit_state_value("current_starting_after", None)
Esempio n. 8
0
def stripe_importer(
    endpoint: str,
    ctx: Context,
    api_key: str,
    curing_window_days: int = None,
    extra_params: Dict = None,
):
    """
    Stripe only allows fetching records in one order: from newest to oldest,
    so we use its cursor based pagination to iterate once through all historical.

    Stripe doesn't have a way to request by "updated at" times, so we must
    refresh old records according to our own logic, using a "curing window"
    to re-import records up to one year (the default) old.
    """
    latest_full_import_at = ctx.get_state_value("latest_full_import_at")
    latest_full_import_at = ensure_datetime(latest_full_import_at)
    current_starting_after = ctx.get_state_value("current_starting_after")
    params = {
        "limit": 100,
    }
    if extra_params:
        params.update(extra_params)
    # if earliest_created_at_imported <= latest_full_import_at - timedelta(days=int(curing_window_days)):
    if latest_full_import_at and curing_window_days:
        # Import only more recent than latest imported at date, offset by a curing window
        # (default 90 days) to capture updates to objects (refunds, etc)
        params["created[gte]"] = int(
            (latest_full_import_at -
             timedelta(days=int(curing_window_days))).timestamp())
    if current_starting_after:
        params["starting_after"] = current_starting_after
    conn = JsonHttpApiConnection()
    endpoint_url = STRIPE_API_BASE_URL + endpoint
    all_done = False
    while ctx.should_continue():
        resp = conn.get(endpoint_url, params, auth=HTTPBasicAuth(api_key, ""))
        json_resp = resp.json()
        assert isinstance(json_resp, dict)
        records = json_resp["data"]
        if len(records) == 0:
            # All done
            all_done = True
            break

        # Return acutal data
        yield records

        latest_object_id = records[-1]["id"]
        if not json_resp.get("has_more"):
            all_done = True
            break
        params["starting_after"] = latest_object_id
        ctx.emit_state_value("current_starting_after", latest_object_id)
    else:
        # Don't update any state, we just timed out
        return
    # We only update state if we have fetched EVERYTHING available as of now
    if all_done:
        ctx.emit_state_value("latest_imported_at", utcnow())
        # IMPORTANT: we reset the starting after cursor so we start from the beginning again on next run
        ctx.emit_state_value("current_starting_after", None)