async def sync_trace_search(self, schema): """Incidents.""" stream = "trace_search" loop = asyncio.get_event_loop() singer.write_schema(stream, schema.to_dict(), ["hour"]) trace_search = await loop.run_in_executor(None, self.client.hourly_request, self.state, self.config, f"traces", stream) if trace_search: for trace in trace_search['usage']: singer.write_record(stream, trace) if trace_search['usage'] is not None and len( trace_search['usage']) > 0: self.state = write_bookmark( self.state, stream, "since", trace_search['usage'][len(trace_search['usage']) - 1]['hour'])
async def sync_custom_usage(self, schema): """Get hourly usage for custom metric.""" stream = "custom_usage" loop = asyncio.get_event_loop() singer.write_schema(stream, schema.to_dict(), ["hour"]) custom_usage = await loop.run_in_executor(None, self.client.hourly_request, self.state, self.config, f"timeseries", stream) if custom_usage: for c in custom_usage['usage']: singer.write_record(stream, c) if custom_usage['usage'] is not None and len( custom_usage['usage']) > 0: self.state = write_bookmark( self.state, stream, "since", custom_usage['usage'][len(custom_usage['usage']) - 1]['hour'])
def append_data(response, state, config, stream, tokens, query): ref = get_ref() key = "currentpage" tap_data = [] if not response: return tap_data, state tap_data = response.json()[ref[stream.tap_stream_id]] if "pagecount" in response.json(): tap_data = response.json()[ref[stream.tap_stream_id]] for i in range(0, response.json()["pagecount"]): query["pageoffset"] = i response = make_request(config, stream, tokens, query) if not response: state = write_bookmark(state, stream.tap_stream_id, key, i) return tap_data, state table = response.json()[ref[stream.tap_stream_id]] tap_data = tap_data + table return tap_data, state
async def sync_bills(self, schema, period: pendulum.period = None): """Output the `bills` in the period.""" stream = "bills" loop = asyncio.get_event_loop() if not period: # build a default period from the last bookmark bookmark = get_bookmark(self.state, stream, "start_time") start = pendulum.parse(bookmark) end = pendulum.now() period = pendulum.period(start, end) singer.write_schema(stream, schema.to_dict(), ["invoice_id"]) for at in period.range("months"): result = await loop.run_in_executor(None, self.client.bill, at) if result: singer.write_record(stream, result) end = datetime.datetime.strptime( result["end_time"], "%Y-%m-%dT%H:%M:%SZ").isoformat() self.state = write_bookmark(self.state, stream, "start_time", end)
async def sync_stats(self, schema, period: pendulum.period = None): """Output the stats in the period.""" stream = "stats" loop = asyncio.get_event_loop() singer.write_schema(stream, schema.to_dict(), ["service_id", "start_time"]) bookmark = get_bookmark(self.state, stream, "from") if bookmark is not None: if "UTC" in bookmark: bookmark = datetime.datetime.strptime( bookmark, '%Y-%m-%d %H:%M:%S UTC').isoformat() start_date = pendulum.parse(bookmark).int_timestamp else: start_date = pendulum.parse( self._config['start_date']).int_timestamp end_date = pendulum.now().int_timestamp result = await loop.run_in_executor(None, self.client.stats, start_date, end_date) if result: for n in result['data']: service_result = await loop.run_in_executor( None, self.client.service, n) for i in result['data'][n]: i['service_name'] = service_result['name'] i['service_versions'] = json.dumps( service_result['versions']) i['service_customer_id'] = service_result['customer_id'] i['service_publish_key'] = service_result['publish_key'] i['service_comment'] = service_result['comment'] i['service_deleted_at'] = service_result['deleted_at'] i['service_updated_at'] = service_result['updated_at'] i['service_created_at'] = service_result['created_at'] singer.write_record(stream, i) end = datetime.datetime.strptime( result['meta']["to"], '%Y-%m-%d %H:%M:%S UTC').isoformat() self.state = write_bookmark(self.state, stream, "from", end)
def validate_state(config, catalog, state): for stream in catalog["streams"]: if not stream["schema"].get("selected"): # If a stream is deselected while it's the current stream, unset the # current stream. if stream["tap_stream_id"] == get_currently_syncing(state): set_currently_syncing(state, None) continue replication_key = determine_replication_key(stream['tap_stream_id']) if not replication_key: continue # If there's no bookmark for a stream (new integration, newly selected, # reset, etc) we need to use the default start date from the config. bookmark = get_bookmark(state, stream["tap_stream_id"], replication_key) if bookmark is None: state = write_bookmark(state, stream["tap_stream_id"], replication_key, config["start_date"]) singer.write_state(state) return state
def sync_a_day(stream_id, path, params, start_ymd, end_ymd, func_get_records, client, catalog, state, window_end): """Sync a single day's worth of data, paginating through as many times as necessary. Loop Guide 1. Make the API call 2. Process the response to get a list of records 3. Persist the records 4. Update the call parameters When we get back an empty result set, log a message that we are done, reset the page bookmark to page 1, and update the datetime bookmark to the day we just finished syncing. The length of the response can vary, but once we get back an empty response, it seems that we have requested all objects. Do not trust the `total` field on the response, it has varied from the actual total by O(10). The path to that field is response.json()['orders']['total'], where the response is in the request function in `client.py`. """ # Page until we're done while True: page_bookmark = bookmarks.get_bookmark(state, stream_id, 'page', 1) # Set the page to start paginating on params['page'] = page_bookmark LOGGER.info('Syncing %s from: %s to: %s - page %s', stream_id, start_ymd, end_ymd, params['page']) data = client.get(path, params=params, endpoint=stream_id) records = func_get_records(data) if not records: LOGGER.info('Done daily pagination for endpoint %s at page %d', stream_id, params['page']) bookmarks.write_bookmark(state, stream_id, 'page', 1) bookmarks.write_bookmark(state, stream_id, 'datetime', utils.strftime(window_end)) singer.write_state(state) break else: persist_records(catalog, stream_id, records) singer.write_state(state) bookmarks.write_bookmark(state, stream_id, 'page', params['page'] + 1)
def sync_paginated(client, state, stream): # http://developers.marketo.com/rest-api/endpoint-reference/lead-database-endpoint-reference/#!/Campaigns/getCampaignsUsingGET # http://developers.marketo.com/rest-api/endpoint-reference/lead-database-endpoint-reference/#!/Static_Lists/getListsUsingGET # # Campaigns and Static Lists are paginated with a max return of 300 # items per page. There are no filters that can be used to only # return updated records. replication_key = determine_replication_key(stream['tap_stream_id']) singer.write_schema(stream["tap_stream_id"], stream["schema"], stream["key_properties"], bookmark_properties=[replication_key]) start_date = bookmarks.get_bookmark(state, stream["tap_stream_id"], replication_key) params = {"batchSize": 300} endpoint = "rest/v1/{}.json".format(stream["tap_stream_id"]) # Paginated requests use paging tokens for retrieving the next page # of results. These tokens are stored in the state for resuming # syncs. If a paging token exists in state, use it. next_page_token = bookmarks.get_bookmark(state, stream["tap_stream_id"], "next_page_token") if next_page_token: params["nextPageToken"] = next_page_token # Keep querying pages of data until no next page token. record_count = 0 job_started = pendulum.utcnow().isoformat() while True: data = client.request("GET", endpoint, endpoint_name=stream["tap_stream_id"], params=params) time_extracted = utils.now() # Each row just needs the values formatted. If the record is # newer than the original start date, stream the record. Finally, # update the bookmark if newer than the existing bookmark. for row in data["result"]: record = format_values(stream, row) if record[replication_key] >= start_date: record_count += 1 singer.write_record(stream["tap_stream_id"], record, time_extracted=time_extracted) # No next page, results are exhausted. if "nextPageToken" not in data: break # Store the next page token in state and continue. params["nextPageToken"] = data["nextPageToken"] state = bookmarks.write_bookmark(state, stream["tap_stream_id"], "next_page_token", data["nextPageToken"]) singer.write_state(state) # Once all results are exhausted, unset the next page token bookmark # so the subsequent sync starts from the beginning. state = bookmarks.write_bookmark(state, stream["tap_stream_id"], "next_page_token", None) state = bookmarks.write_bookmark(state, stream["tap_stream_id"], replication_key, job_started) singer.write_state(state) return state, record_count
def write_metrics_state(atx, metric, date_to_resume): write_bookmark(atx.state, metric, 'date_to_resume', date_to_resume.to_datetime_string()) atx.write_state()
def set_bookmark(self, path, val): if isinstance(val, date): val = val.isoformat() bks_.write_bookmark(self.state, path[0], path[1], val)
def set_bookmark(self, path, val): if isinstance(val, date): val = val.isoformat() if isinstance(val, str): val = pendulum.parse(val).to_iso8601_string() bks_.write_bookmark(self.state, path[0], path[1], val)
def sync_report(stream_name, stream_metadata, sdk_client): report_window_days = CONFIG.get("MAX_REPORT_TIME_WINDOW", 365) is_incremental = False if metadata.get(stream_metadata, (), "replication-method") == "INCREMENTAL": is_incremental = True customer_id = sdk_client.client_customer_id stream_schema, _ = create_schema_for_report(stream_name, sdk_client) stream_schema = add_synthetic_keys_to_stream_schema(stream_schema) xml_attribute_list = get_fields_to_sync(stream_schema, stream_metadata) primary_keys = metadata.get(stream_metadata, (), 'tap-adwords.report-key-properties') or [] LOGGER.info("{} primary keys are {}".format(stream_name, primary_keys)) write_schema(stream_name, stream_schema, primary_keys, bookmark_properties=['day']) field_list = [] for field in xml_attribute_list: field_list.append(stream_metadata[('properties', field)]['adwords.fieldName']) check_selected_fields(stream_name, field_list, sdk_client) # If an attribution window sync is interrupted, start where it left off start_date = get_attribution_window_bookmark(customer_id, stream_name) if start_date is not None: start_date = start_date + relativedelta(days=1) if start_date is None: start_date = apply_conversion_window( get_start_for_stream(customer_id, stream_name)) if stream_name in REPORTS_WITH_90_DAY_MAX: cutoff = utils.now() + relativedelta(days=-90) if start_date < cutoff: LOGGER.warning( "report only supports up to 90 days, will start at {}".format( start_date)) start_date = cutoff start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0) LOGGER.info('Selected fields: %s', field_list) max_end_date = utils.now() - relativedelta(days=1) required_end_date = get_end_date() report_end_date = min(max_end_date, required_end_date) report_end_date = report_end_date.replace(hour=23, minute=59, second=59, microsecond=0) next_start_date = start_date is_single_day_report = stream_name in REPORTS_REQUIRING_DAILY_REPORTS start_plus_window = next_start_date if not is_single_day_report: start_plus_window += relativedelta(days=report_window_days) end_date = min(start_plus_window, report_end_date) while next_start_date <= report_end_date: singer.log_info("syncing %s for %s - %s", stream_name, next_start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")) actual_end_date = min(end_date, report_end_date) sync_report_for_day(stream_name, stream_schema, sdk_client, next_start_date, field_list, actual_end_date) next_start_date = end_date + relativedelta(days=1) start_plus_window = next_start_date if not is_single_day_report: start_plus_window += relativedelta(days=report_window_days) end_date = start_plus_window bookmarks.write_bookmark(STATE, state_key_name(customer_id, stream_name), 'last_attribution_window_date', actual_end_date.strftime(utils.DATETIME_FMT)) singer.write_state(STATE) if not is_incremental: bookmarks.clear_bookmark(STATE, state_key_name(customer_id, stream_name), 'last_attribution_window_date') singer.write_state(STATE) LOGGER.info("Done syncing the %s report for customer_id %s", stream_name, customer_id)
def sync_report_for_day(stream_name, stream_schema, sdk_client, start, field_list): # pylint: disable=too-many-locals report_downloader = sdk_client.GetReportDownloader(version=VERSION) customer_id = sdk_client.client_customer_id report = { 'reportName': 'Seems this is required', 'dateRangeType': 'CUSTOM_DATE', 'reportType': stream_name, 'downloadFormat': 'CSV', 'selector': { 'fields': field_list, 'dateRange': { 'min': start.strftime('%Y%m%d'), 'max': start.strftime('%Y%m%d') } } } # Fetch the report as a csv string with metrics.http_request_timer(stream_name): result = attempt_download_report(report_downloader, report) headers, csv_reader = parse_csv_stream(result) with metrics.record_counter(stream_name) as counter: time_extracted = utils.now() with Transformer(singer.UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING ) as bumble_bee: for row in csv_reader: obj = dict( zip(get_xml_attribute_headers(stream_schema, headers), row)) obj['_sdc_customer_id'] = customer_id obj['_sdc_report_datetime'] = REPORT_RUN_DATETIME bumble_bee.pre_hook = transform_pre_hook obj = bumble_bee.transform(obj, stream_schema) singer.write_record(stream_name, obj, time_extracted=time_extracted) counter.increment() if start > get_start_for_stream(sdk_client.client_customer_id, stream_name): LOGGER.info( 'updating bookmark: %s > %s', start, get_start_for_stream(sdk_client.client_customer_id, stream_name)) bookmarks.write_bookmark( STATE, state_key_name(sdk_client.client_customer_id, stream_name), 'date', utils.strftime(start)) singer.write_state(STATE) else: LOGGER.info( 'not updating bookmark: %s <= %s', start, get_start_for_stream(sdk_client.client_customer_id, stream_name)) LOGGER.info( "Done syncing %s records for the %s report for customer_id %s on %s", counter.value, stream_name, customer_id, start)
def set_bookmark(self, path, val): bks_.write_bookmark(self.state, path[0], path[1], val)
def sync(config, state, catalog): """ Sync data from tap source """ access_token = config['access_token'] client = SurveyMonkeyClient(access_token) # Loop over selected streams in catalog for stream in catalog.get_selected_streams(state): stream_object = STREAMS[stream.tap_stream_id] mdata = metadata.to_map(stream.metadata) raw_schema = stream.schema.to_dict() LOGGER.info("Syncing stream: " + stream.tap_stream_id) bookmark_column = stream_object.replication_key is_sorted = stream_object.is_sorted # indicate whether data is sorted ascending on bookmark value # Publish schema to singer. singer.write_schema( stream_name=stream.tap_stream_id, schema=raw_schema, key_properties=stream.key_properties, ) with metrics.record_counter(stream.tap_stream_id) as counter: max_bookmark = None bookmark_value = None bookmark_key_prefix = '' with Transformer() as transformer: if stream_object.replication_key_from_parent: if bookmarks.get_bookmark(state, stream_object.stream_id, 'page_sync'): bookmark_value = bookmarks.get_bookmark( state, stream_object.stream_id, 'page_sync') if bookmarks.get_bookmark(state, stream_object.stream_id, 'full_sync'): bookmark_value = bookmarks.get_bookmark( state, stream_object.stream_id, 'full_sync') for parent_row in stream_object.parent_stream.fetch_data( client, None, config, state, bookmark_value=bookmark_value ) if stream_object.parent_stream else [{}]: bookmark_value = None if not stream_object.replication_key_from_parent: bookmark_key_prefix = build_bookmark_key_prefix( parent_row, stream_object.parent_stream) if bookmarks.get_bookmark(state, stream_object.stream_id, 'page_sync'): bookmark_value = bookmarks.get_bookmark( state, stream_object.stream_id, f'{bookmark_key_prefix}page_sync') if bookmarks.get_bookmark(state, stream_object.stream_id, 'full_sync'): bookmark_value = bookmarks.get_bookmark( state, stream_object.stream_id, f'{bookmark_key_prefix}full_sync') for row in stream_object.fetch_data( client, stream, config, state, parent_row=parent_row, bookmark_value=bookmark_value): # write one or more rows to the stream: singer.write_record( stream.tap_stream_id, transformer.transform(row, raw_schema, mdata)) if bookmark_column and not stream_object.replication_key_from_parent: if is_sorted: # update bookmark to latest value state = bookmarks.write_bookmark( state, stream.tap_stream_id, f'{bookmark_key_prefix}page_sync', row[bookmark_column]) singer.write_state(state) else: # if data unsorted, save max value until end of writes max_bookmark = max(max_bookmark, row[bookmark_column]) counter.increment() if bookmark_column and parent_row and stream_object.replication_key_from_parent: if is_sorted: # update bookmark to latest value state = bookmarks.write_bookmark( state, stream.tap_stream_id, f'{bookmark_key_prefix}page_sync', parent_row[bookmark_column]) singer.write_state(state) else: # if data unsorted, save max value until end of writes max_bookmark = max(max_bookmark, parent_row[bookmark_column]) if bookmark_column and not is_sorted: state = bookmarks.write_bookmark( state, stream.tap_stream_id, f'{bookmark_key_prefix}full_sync', row[bookmark_column]) singer.write_state(state) if bookmark_column and not is_sorted and parent_row and stream_object.replication_key_from_parent: state = bookmarks.write_bookmark( state, stream.tap_stream_id, f'{bookmark_key_prefix}full_sync', parent_row[bookmark_column]) singer.write_state(state) LOGGER.info('Stream: {}, Processed {} records'.format( stream.tap_stream_id, counter.value))
def write_forms_state(atx, form, date_to_resume): write_bookmark(atx.state, form, 'date_to_resume', date_to_resume.to_datetime_string()) atx.write_state()
def sync_daily(client, catalog, state, start_date, end_date, stream_id, stream_config): """Syncs a given date range, bookmarking after each day. Argument Types: client: [ShipHeroClient] catalog: [Dictionary] state: [Dictionary] start_date: [String, UTC datetime] end_date: Optional, non-inclusive day [String, UTC datetime] stream_id: [String] stream_config: [Dictionary] """ write_schema(catalog, stream_id) ####################################################################### ### Set up datetime versions of the start_date, end_date ####################################################################### # Rip this out once all bookmarks are converted if isinstance(state.get('bookmarks', {}).get(stream_id), str): # Old style bookmark found. Use it and delete it old_style_bookmark = state['bookmarks'].pop(stream_id) # Write this bookmark in the new style bookmarks.write_bookmark(state, stream_id, 'datetime', old_style_bookmark) start_date_bookmark = bookmarks.get_bookmark(state, stream_id, 'datetime', start_date) start_date_dt = strptime_to_utc(start_date_bookmark) # Since end_date is optional in the top level sync if end_date: end_date_dt = strptime_to_utc(end_date) else: end_date_dt = utils.now() if start_date_dt > end_date_dt: raise Exception( '{} start_date is greater than end_date'.format(stream_id)) ####################################################################### ### Sync data by day ####################################################################### # Extract params from config path = stream_config['path'] params = stream_config.get('params', {}) from_col = stream_config['from_col'] to_col = stream_config['to_col'] records_fn = stream_config['get_records'] page_bookmark = bookmarks.get_bookmark(state, stream_id, 'page', 1) # Set the page to start paginating on params['page'] = page_bookmark # Loop over all the days while start_date_dt != end_date_dt: window_end = start_date_dt + timedelta(days=1) if window_end > now() or window_end > end_date_dt: window_end = end_date_dt # The API expects the dates in %Y-%m-%d start_ymd = start_date_dt.strftime('%Y-%m-%d') end_ymd = window_end.strftime('%Y-%m-%d') if start_ymd == end_ymd: # NB: A range of 0 days will return 0 records. end_ymd = (utils.strptime_to_utc(end_ymd) + timedelta(days=1)).strftime('%Y-%m-%d') params.update({from_col: start_ymd, to_col: end_ymd}) sync_a_day(stream_id, path, params, start_ymd, end_ymd, records_fn, client, catalog, state, window_end) start_date_dt = window_end
def sync_statistics_for_day( config, state, stream, sdk_client, token, start, report_metrics, report_dimensions, ): # pylint: disable=too-many-locals """Sync and output Criteo Statistics endpoint for one day.""" mdata = metadata.to_map(stream.metadata) stats_query = { "report_type": stream.tap_stream_id, "dimensions": report_dimensions, "metrics": report_metrics, "start_date": start.strftime("%Y-%m-%d"), "end_date": start.strftime("%Y-%m-%d"), "currency": metadata.get(mdata, (), "tap-criteo.currency"), } # Filter advertiser_ids if defined in config advertiser_ids = config.get("advertiser_ids") if advertiser_ids: stats_query["advertiserId"] = advertiser_ids # Add ignore_x_device if defined in metadata ignore_x_device = metadata.get(mdata, (), "tap-criteo.ignoreXDevice") if ignore_x_device: stats_query["tap-criteo.ignoreXDevice"] = ignore_x_device # Fetch the report as a csv string with metrics.http_request_timer(stream.tap_stream_id): result = get_statistics_report(sdk_client, stats_query, token=token) csv_reader = parse_csv_string(mdata, result) with metrics.record_counter(stream.tap_stream_id) as counter: time_extracted = utils.now() with Transformer() as bumble_bee: for row in csv_reader: row["_sdc_report_datetime"] = REPORT_RUN_DATETIME row["_sdc_report_currency"] = metadata.get( mdata, (), "tap-criteo.currency") row = bumble_bee.transform(row, stream.schema.to_dict()) singer.write_record(stream.stream, row, time_extracted=time_extracted) counter.increment() if start > get_start_for_stream(config, state, advertiser_ids, stream.stream): LOGGER.info( "updating bookmark: %s > %s", start, get_start_for_stream(config, state, advertiser_ids, stream.stream), ) bookmarks.write_bookmark( state, state_key_name(advertiser_ids, stream.stream), "date", utils.strftime(start), ) singer.write_state(state) else: LOGGER.info( "not updating bookmark: %s <= %s", start, get_start_for_stream(config, state, advertiser_ids, stream.stream), ) LOGGER.info( "Done syncing %s records for the %s report for " + "advertiser_ids %s on %s", counter.value, stream.stream, advertiser_ids, start, )
def sync_statistics_report(config, state, stream, sdk_client, token): """Sync a stream which is backed by the Criteo Statistics endpoint.""" advertiser_ids = config.get("advertiser_ids", "") mdata = metadata.to_map(stream.metadata) stream = add_synthetic_keys_to_stream_schema(stream) field_list = get_field_list(stream) primary_keys = [] LOGGER.info("{} primary keys are {}".format(stream.stream, primary_keys)) singer.write_schema( stream.stream, stream.schema.to_dict(), primary_keys, bookmark_properties=["Day"], ) # If an attribution window sync is interrupted, start where it left off start_date = get_attribution_window_bookmark(state, advertiser_ids, stream.stream) if start_date is None: start_date = apply_conversion_window( config, get_start_for_stream(config, state, advertiser_ids, stream.stream), ) # According to Criteo's documentation the StatisticsApi only supports # between one and three dimensions and at least one metric. report_dimensions = [ field for field in field_list if metadata.get(mdata, ("properties", field), "tap-criteo.behaviour") == "dimension" ] LOGGER.info("Selected dimensions: %s", report_dimensions) if not 0 <= len(report_dimensions) <= 3: raise ValueError( "%s stream only supports up to 3 selected dimensions" % stream.stream) report_metrics = [ field for field in field_list if metadata.get(mdata, ("properties", field), "tap-criteo.behaviour") == "metric" ] LOGGER.info("Selected metrics: %s", report_metrics) if not len(report_metrics) >= 1: raise ValueError("%s stream must have at least 1 selected metric" % stream.stream) while start_date <= get_end_date(config): token = refresh_auth_token(sdk_client, token) sync_statistics_for_day( config, state, stream, sdk_client, token, start_date, report_metrics, report_dimensions, ) start_date = start_date + relativedelta(days=1) bookmarks.write_bookmark( state, state_key_name(advertiser_ids, stream.stream), "last_attribution_window_date", utils.strftime(start_date), ) singer.write_state(state) bookmarks.clear_bookmark( state, state_key_name(advertiser_ids, stream.stream), "last_attribution_window_date", ) singer.write_state(state) LOGGER.info( "Done syncing the %s report for advertiser_ids %s", stream.stream, advertiser_ids, )
def prepare_stream( tap_stream_id: str, stream_defs: _STREAM_DEFS, stream_versions: _STREAM_VERSIONS, catalog: Catalog, config: Dict[str, Any], state: Dict[str, Any], ) -> datetime: """Prepares a stream and any of its substreams by instantiating them and handling their preliminary Singer messages """ # mypy isn't properly considering is_substream stream_def: "Stream" = AVAILABLE_STREAMS[tap_stream_id]( catalog, config, filter_record) # type: ignore stream_defs[stream_def.tap_stream_id] = stream_def if stream_def.has_substreams: stream_def.instantiate_substreams(catalog, filter_record) for substream_def in stream_def.substreams: if not substream_def.is_selected: LOGGER.info('Skipping sub-stream "%s"', substream_def.tap_stream_id) continue # ignored type errors below seem to be caused by same issue as # https://github.com/python/mypy/issues/8993 stream_defs[substream_def.tap_stream_id] = substream_def substream_version = get_full_table_version() stream_versions[substream_def.tap_stream_id] = substream_version write_schema( stream_name=substream_def.tap_stream_id, schema=substream_def.schema_dict, key_properties=substream_def.key_properties, ) # All substreams are necessarily FULL_TABLE, so no need to # check if they're INCREMENTAL if is_first_run(substream_def.tap_stream_id, state): write_activate_version( substream_def.tap_stream_id, substream_version, ) write_bookmark(state, substream_def.tap_stream_id, "wrote_initial_activate_version", True) write_state(state) write_schema( stream_name=stream_def.tap_stream_id, schema=stream_def.schema_dict, key_properties=stream_def.key_properties, ) filter_datetime = get_filter_datetime(stream_def, config["start_date"], state) stream_version = (None if stream_def.is_valid_incremental else get_full_table_version()) stream_versions[stream_def.tap_stream_id] = stream_version if not stream_def.is_valid_incremental and is_first_run( stream_def.tap_stream_id, state): write_activate_version( stream_def.tap_stream_id, stream_version, ) write_bookmark(state, stream_def.tap_stream_id, "wrote_initial_activate_version", True) write_state(state) return filter_datetime