def test_round_trip(self): record_message = singer.RecordMessage(record={'name': 'foo'}, stream='users') schema_message = singer.SchemaMessage(stream='users', key_properties=['name'], schema={ 'type': 'object', 'properties': { 'name': { 'type': 'string' } } }) state_message = singer.StateMessage(value={'seq': 1}) self.assertEqual( record_message, singer.parse_message(singer.format_message(record_message))) self.assertEqual( schema_message, singer.parse_message(singer.format_message(schema_message))) self.assertEqual( state_message, singer.parse_message(singer.format_message(state_message)))
def sync(config, state, catalog): for catalog_entry in catalog.streams: catalog_metadata = metadata.to_map(catalog_entry.metadata) replication_key = catalog_metadata.get((), {}).get('replication-key') state = singer.set_currently_syncing(state, catalog_entry.tap_stream_id) _emit(singer.StateMessage(value=state)) if catalog_entry.is_view: key_properties = metadata.to_map(catalog_entry.metadata).get( (), {}).get('view-key-properties', []) else: key_properties = metadata.to_map(catalog_entry.metadata).get( (), {}).get('table-key-properties', []) _emit( singer.SchemaMessage(stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=replication_key)) with metrics.job_timer("sync_table") as timer: timer.tags["schema"] = catalog_entry.database timer.tags["table"] = catalog_entry.table _sync_table(config, state, catalog_entry) state = singer.set_currently_syncing(state, None) _emit(singer.StateMessage(value=state))
def generate_messages(conn, db_schema, catalog, state): catalog = resolve.resolve_catalog(discover_catalog(conn, db_schema), catalog, state) for catalog_entry in catalog.streams: state = singer.set_currently_syncing(state, catalog_entry.tap_stream_id) catalog_md = metadata.to_map(catalog_entry.metadata) if catalog_md.get((), {}).get('is-view'): key_properties = catalog_md.get((), {}).get('view-key-properties') else: key_properties = catalog_md.get((), {}).get('table-key-properties') bookmark_properties = catalog_md.get((), {}).get('replication-key') # Emit a state message to indicate that we've started this stream yield singer.StateMessage(value=copy.deepcopy(state)) # Emit a SCHEMA message before we sync any records yield singer.SchemaMessage(stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=bookmark_properties) # Emit a RECORD message for each record in the result set with metrics.job_timer('sync_table') as timer: timer.tags['database'] = catalog_entry.database timer.tags['table'] = catalog_entry.table for message in sync_table(conn, catalog_entry, state): yield message # If we get here, we've finished processing all the streams, so clear # currently_syncing from the state and emit a state message. state = singer.set_currently_syncing(state, None) yield singer.StateMessage(value=copy.deepcopy(state))
def write_schema_message(catalog_entry, bookmark_properties=[]): key_properties = common.get_key_properties(catalog_entry) singer.write_message( singer.SchemaMessage(stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=bookmark_properties))
def write_schema(schema, row, stream): schema_build_start_time = time.time() if common.row_to_schema(schema, row): singer.write_message(singer.SchemaMessage( stream=common.calculate_destination_stream_name(stream), schema=schema, key_properties=['_id'])) common.SCHEMA_COUNT[stream['tap_stream_id']] += 1 common.SCHEMA_TIMES[stream['tap_stream_id']] += time.time() - schema_build_start_time
def test_parse_message_schema_good(self): message = singer.parse_message('{"type": "SCHEMA", "stream": "users", "schema": {"type": "object", "properties": {"name": {"type": "string"}}}, "key_properties": ["name"]}') # nopep8 self.assertEqual( message, singer.SchemaMessage( stream='users', key_properties=['name'], schema={'type': 'object', 'properties': { 'name': {'type': 'string'}}}))
def write_schema_message(catalog_entry, bookmark_properties=None): if bookmark_properties is None: bookmark_properties = [] key_properties = get_key_properties(catalog_entry) singer.write_message( singer.SchemaMessage(stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=bookmark_properties))
def send_schema_message(stream, bookmark_properties): s_md = metadata.to_map(stream.metadata) if s_md.get((), {}).get('is-view'): key_properties = s_md.get((), {}).get('view-key-properties') else: key_properties = s_md.get((), {}).get('table-key-properties') schema_message = singer.SchemaMessage(stream=(stream.tap_stream_id or stream.stream), schema=stream.schema.to_dict(), key_properties=key_properties, bookmark_properties=bookmark_properties) singer.write_message(schema_message)
def sync_stream(config, state, stream): table_name = stream['tap_stream_id'] md_map = metadata.to_map(stream['metadata']) replication_method = metadata.get(md_map, (), 'replication-method') key_properties = metadata.get(md_map, (), 'table-key-properties') # write state message with currently_syncing bookmark state = clear_state_on_replication_change(stream, state) state = singer.set_currently_syncing(state, table_name) singer.write_state(state) singer.write_message( singer.SchemaMessage(stream=table_name, schema=stream['schema'], key_properties=key_properties)) rows_saved = 0 if replication_method == 'FULL_TABLE': LOGGER.info("Syncing full table for stream: %s", table_name) rows_saved += sync_full_table(config, state, stream) elif replication_method == 'LOG_BASED': LOGGER.info("Syncing log based for stream: %s", table_name) if has_stream_aged_out(config, state, stream): LOGGER.info("Clearing state because stream has aged out") state.get('bookmarks', {}).pop(table_name) # TODO Check to see if latest stream ARN has changed and wipe state if so if not singer.get_bookmark(state, table_name, 'initial_full_table_complete'): msg = 'Must complete full table sync before replicating from dynamodb streams for %s' LOGGER.info(msg, table_name) # only mark latest sequence numbers in dynamo streams on first sync so # tap has a starting point after the full table sync if not singer.get_bookmark(state, table_name, 'version'): latest_sequence_numbers = get_latest_seq_numbers( config, stream) state = singer.write_bookmark(state, table_name, 'shard_seq_numbers', latest_sequence_numbers) rows_saved += sync_full_table(config, state, stream) rows_saved += sync_log_based(config, state, stream) else: LOGGER.info('Unknown replication method: %s for stream: %s', replication_method, table_name) return rows_saved
def sync_stream(config, state, stream): table_name = stream['tap_stream_id'] md_map = metadata.to_map(stream['metadata']) replication_method = metadata.get(md_map, (), 'replication-method') key_properties = metadata.get(md_map, (), 'table-key-properties') # write state message with currently_syncing bookmark state = clear_state_on_replication_change(stream, state) state = singer.set_currently_syncing(state, table_name) singer.write_state(state) singer.write_message( singer.SchemaMessage(stream=table_name, schema=stream['schema'], key_properties=key_properties)) rows_saved = 0 if replication_method == 'FULL_TABLE': LOGGER.info("Syncing full table for stream: %s", table_name) rows_saved += full_table.sync(config, state, stream) elif replication_method == "QUERY": LOGGER.info(f"Syncing via a query for stream {table_name}") rows_saved += sync_query(config, state, stream) elif replication_method == 'LOG_BASED': LOGGER.info("Syncing log based for stream: %s", table_name) if log_based.has_stream_aged_out(state, table_name): LOGGER.info("Clearing state because stream has aged out") state.get('bookmarks', {}).pop(table_name) if not singer.get_bookmark(state, table_name, 'initial_full_table_complete'): msg = 'Must complete full table sync before replicating from dynamodb streams for %s' LOGGER.info(msg, table_name) state = log_based.get_initial_bookmarks(config, state, table_name) singer.write_state(state) rows_saved += full_table.sync(config, state, stream) rows_saved += log_based.sync(config, state, stream) else: LOGGER.info('Unknown replication method: %s for stream: %s', replication_method, table_name) state = singer.write_bookmark(state, table_name, 'success_timestamp', singer.utils.strftime(singer.utils.now())) singer.write_state(state) return rows_saved
def sync_endpoint(url, state): '''Syncs the url and paginates through until there are no more "next" urls. Yields schema, record, and state messages. Modifies state by setting the NEXT field every time we get a next url from Shippo. This allows us to resume paginating if we're terminated. ''' stream = parse_stream_from_url(url) yield singer.SchemaMessage(stream=stream, schema=load_schema(stream), key_properties=["object_id"]) if LAST_START_DATE in state: start = pendulum.parse(state[LAST_START_DATE]).subtract(days=2) else: start = pendulum.parse(CONFIG[START_DATE]) # The Shippo API does not return data from long ago, so we only try to # replicate the last 60 days sixty_days_ago = pendulum.now().subtract(days=60) bounded_start = max(start, sixty_days_ago) LOGGER.info("Replicating all %s from %s", stream, bounded_start) rows_read = 0 rows_written = 0 finished = False with metrics.record_counter(parse_stream_from_url(url)) as counter: while url and not finished: state[NEXT] = url yield singer.StateMessage(value=state) data = request(url) for row in data['results']: counter.increment() rows_read += 1 updated = pendulum.parse(row[OBJECT_UPDATED]) if updated >= bounded_start: row = fix_extra_map(row) yield singer.RecordMessage(stream=stream, record=row) rows_written += 1 else: finished = True break url = data.get(NEXT) if rows_read: LOGGER.info("Done syncing %s. Read %d records, wrote %d (%.2f%%)", stream, rows_read, rows_written, 100.0 * rows_written / float(rows_read))
def generate_messages(con, catalog, state): catalog = resolve_catalog(con, catalog, state) for catalog_entry in catalog.streams: state = singer.set_currently_syncing(state, catalog_entry.tap_stream_id) # Emit a state message to indicate that we've started this stream yield singer.StateMessage(value=copy.deepcopy(state)) md_map = metadata.to_map(catalog_entry.metadata) replication_method = md_map.get((), {}).get('replication-method') replication_key = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key') if catalog_entry.is_view: key_properties = md_map.get((), {}).get('view-key-properties') else: key_properties = md_map.get((), {}).get('table-key-properties') # Emit a SCHEMA message before we sync any records yield singer.SchemaMessage( stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=replication_key ) with metrics.job_timer('sync_table') as timer: timer.tags['database'] = catalog_entry.database timer.tags['table'] = catalog_entry.table log_engine(con, catalog_entry) if replication_method == 'INCREMENTAL': for message in incremental.sync_table(con, catalog_entry, state): yield message elif replication_method == 'FULL_TABLE': for message in full_table.sync_table(con, catalog_entry, state): yield message else: raise Exception("only INCREMENTAL and FULL TABLE replication methods are supported") # if we get here, we've finished processing all the streams, so clear # currently_syncing from the state and emit a state message. state = singer.set_currently_syncing(state, None) yield singer.StateMessage(value=copy.deepcopy(state))
def sync_table(conn, catalog_entry, state): LOGGER.info("Beginning sync for {}.{} table.".format( catalog_entry.database, catalog_entry.table )) entity = catalog_entry.tap_stream_id if not entity: return # tell singer about the structure of this schema yield singer.SchemaMessage( stream=entity, schema=catalog_entry.schema.to_dict(), key_properties=catalog_entry.key_properties ) start = get_start(entity) formatted_start = dateutil.parser.parse(start).strftime(DATETIME_FMT) params = { 'start': formatted_start, } with metrics.record_counter(None) as counter: counter.tags['database'] = catalog_entry.database counter.tags['table'] = catalog_entry.table for rows_saved, row in enumerate(gen_request(conn, catalog_entry, params)): counter.increment() transform_data(row, catalog_entry.schema) yield singer.RecordMessage( stream=catalog_entry.stream, record=row ) state = singer.write_bookmark( state, catalog_entry.tap_stream_id, 'last_record', row['datemodified'] ) if rows_saved % 1000 == 0: yield singer.StateMessage(value=copy.deepcopy(state)) yield singer.StateMessage(value=copy.deepcopy(state))
def generate_messages(conn, catalog, state): for catalog_entry in catalog.streams: # Emit a state message to indicate that we've started this stream yield singer.StateMessage(value=copy.deepcopy(state)) # Emit a SCHEMA message before we sync any records yield singer.SchemaMessage( stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=catalog_entry.key_properties ) # Emit a RECORD message for each record in the result set with metrics.job_timer('sync_table') as timer: timer.tags['database'] = catalog_entry.database timer.tags['table'] = catalog_entry.table for message in sync_table(conn, catalog_entry, state): yield message # Emit a state message yield singer.StateMessage(value=copy.deepcopy(state))
def overloaded_parse_message(msg): """Parse a message string into a Message object.""" # We are not using Decimals for parsing here. # We recognize that exposes data to potentially # lossy conversions. However, this will affect # very few data points and we have chosen to # leave conversion as is for now. obj = simplejson.loads(msg, use_decimal=True) msg_type = _required_key(obj, 'type') if msg_type == 'RECORD': time_extracted = obj.get('time_extracted') if time_extracted: try: time_extracted = ciso8601.parse_datetime(time_extracted) except Exception: time_extracted = None return singer.RecordMessage(stream=_required_key(obj, 'stream'), record=_required_key(obj, 'record'), version=obj.get('version'), time_extracted=time_extracted) if msg_type == 'SCHEMA': return singer.SchemaMessage( stream=_required_key(obj, 'stream'), schema=_required_key(obj, 'schema'), key_properties=_required_key(obj, 'key_properties'), bookmark_properties=obj.get('bookmark_properties')) if msg_type == 'STATE': return singer.StateMessage(value=_required_key(obj, 'value')) if msg_type == 'ACTIVATE_VERSION': return singer.ActivateVersionMessage( stream=_required_key(obj, 'stream'), version=_required_key(obj, 'version')) return None
def generate_messages(conn, catalog, state): for catalog_entry in catalog.streams: if not catalog_entry.is_selected(): continue # Emit a SCHEMA message before we sync any records yield singer.SchemaMessage( stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=catalog_entry.key_properties, bookmark_properties=[REPLICATION_KEY] ) # Emit a RECORD message for each record in the result set with metrics.job_timer('sync_table') as timer: timer.tags['database'] = catalog_entry.database timer.tags['table'] = catalog_entry.table for message in sync_table(conn, catalog_entry, state): yield message # Emit a state message yield singer.StateMessage(value=copy.deepcopy(state))
def generate_schema_message(catalog_entry, key_properties, bookmark_properties): return singer.SchemaMessage(stream=catalog_entry.stream, schema=catalog_entry.schema.to_dict(), key_properties=key_properties, bookmark_properties=bookmark_properties)
def sync_collection(client, stream, state, projection): tap_stream_id = stream['tap_stream_id'] LOGGER.info('Starting full table sync for %s', tap_stream_id) md_map = metadata.to_map(stream['metadata']) database_name = metadata.get(md_map, (), 'database-name') db = client[database_name] collection = db[stream['stream']] #before writing the table version to state, check if we had one to begin with first_run = singer.get_bookmark(state, stream['tap_stream_id'], 'version') is None # last run was interrupted if there is a last_id_fetched bookmark was_interrupted = singer.get_bookmark(state, stream['tap_stream_id'], 'last_id_fetched') is not None #pick a new table version if last run wasn't interrupted if was_interrupted: stream_version = singer.get_bookmark(state, stream['tap_stream_id'], 'version') else: stream_version = int(time.time() * 1000) state = singer.write_bookmark(state, stream['tap_stream_id'], 'version', stream_version) singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) activate_version_message = singer.ActivateVersionMessage( stream=common.calculate_destination_stream_name(stream), version=stream_version) # For the initial replication, emit an ACTIVATE_VERSION message # at the beginning so the records show up right away. if first_run: singer.write_message(activate_version_message) if singer.get_bookmark(state, stream['tap_stream_id'], 'max_id_value'): # There is a bookmark max_id_value = singer.get_bookmark(state, stream['tap_stream_id'], 'max_id_value') max_id_type = singer.get_bookmark(state, stream['tap_stream_id'], 'max_id_type') max_id_value = common.string_to_class(max_id_value, max_id_type) else: max_id_value = get_max_id_value(collection) last_id_fetched = singer.get_bookmark(state, stream['tap_stream_id'], 'last_id_fetched') if max_id_value: # Write the bookmark if max_id_value is defined state = singer.write_bookmark( state, stream['tap_stream_id'], 'max_id_value', common.class_to_string(max_id_value, max_id_value.__class__.__name__)) state = singer.write_bookmark(state, stream['tap_stream_id'], 'max_id_type', max_id_value.__class__.__name__) find_filter = {'$lte': max_id_value} if last_id_fetched: last_id_fetched_type = singer.get_bookmark(state, stream['tap_stream_id'], 'last_id_fetched_type') find_filter['$gte'] = common.string_to_class(last_id_fetched, last_id_fetched_type) query_message = 'Querying {} with:\n\tFind Parameters: {}'.format( stream['tap_stream_id'], find_filter) if projection: query_message += '\n\tProjection: {}'.format(projection) # pylint: disable=logging-format-interpolation LOGGER.info(query_message) with collection.find({'_id': find_filter}, projection, sort=[("_id", pymongo.ASCENDING)]) as cursor: rows_saved = 0 time_extracted = utils.now() start_time = time.time() schema = stream['schema'] or {"type": "object", "properties": {}} for row in cursor: rows_saved += 1 schema_build_start_time = time.time() if common.row_to_schema(schema, row): singer.write_message( singer.SchemaMessage( stream=common.calculate_destination_stream_name( stream), schema=schema, key_properties=['_id'])) common.SCHEMA_COUNT[stream['tap_stream_id']] += 1 common.SCHEMA_TIMES[stream['tap_stream_id']] += time.time( ) - schema_build_start_time record_message = common.row_to_singer_record( stream, row, stream_version, time_extracted) singer.write_message(record_message) state = singer.write_bookmark( state, stream['tap_stream_id'], 'last_id_fetched', common.class_to_string(row['_id'], row['_id'].__class__.__name__)) state = singer.write_bookmark(state, stream['tap_stream_id'], 'last_id_fetched_type', row['_id'].__class__.__name__) if rows_saved % common.UPDATE_BOOKMARK_PERIOD == 0: singer.write_message( singer.StateMessage(value=copy.deepcopy(state))) common.COUNTS[tap_stream_id] += rows_saved common.TIMES[tap_stream_id] += time.time() - start_time # clear max pk value and last pk fetched upon successful sync singer.clear_bookmark(state, stream['tap_stream_id'], 'max_id_value') singer.clear_bookmark(state, stream['tap_stream_id'], 'max_id_type') singer.clear_bookmark(state, stream['tap_stream_id'], 'last_id_fetched') singer.clear_bookmark(state, stream['tap_stream_id'], 'last_id_fetched_type') state = singer.write_bookmark(state, stream['tap_stream_id'], 'initial_full_table_complete', True) singer.write_message(activate_version_message) LOGGER.info('Syncd {} records for {}'.format(rows_saved, tap_stream_id))
def write_schema_message(stream): singer.write_message( singer.SchemaMessage( stream=common.calculate_destination_stream_name(stream), schema=stream['schema'], key_properties=['_id']))
def sync_collection(client, stream, state, projection): tap_stream_id = stream['tap_stream_id'] LOGGER.info('Starting incremental sync for %s', tap_stream_id) stream_metadata = metadata.to_map(stream['metadata']).get(()) collection = client[stream_metadata['database-name']][stream['stream']] #before writing the table version to state, check if we had one to begin with first_run = singer.get_bookmark(state, stream['tap_stream_id'], 'version') is None #pick a new table version if last run wasn't interrupted if first_run: stream_version = int(time.time() * 1000) else: stream_version = singer.get_bookmark(state, stream['tap_stream_id'], 'version') state = singer.write_bookmark(state, stream['tap_stream_id'], 'version', stream_version) activate_version_message = singer.ActivateVersionMessage( stream=common.calculate_destination_stream_name(stream), version=stream_version ) # For the initial replication, emit an ACTIVATE_VERSION message # at the beginning so the records show up right away. if first_run: singer.write_message(activate_version_message) # get replication key, and bookmarked value/type stream_state = state.get('bookmarks', {}).get(tap_stream_id, {}) replication_key_name = stream_metadata.get('replication-key') replication_key_value_bookmark = stream_state.get('replication_key_value') # write state message singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) # create query find_filter = {} if replication_key_value_bookmark: find_filter[replication_key_name] = {} find_filter[replication_key_name]['$gte'] = \ common.string_to_class(replication_key_value_bookmark, stream_state.get('replication_key_type')) # log query query_message = 'Querying {} with:\n\tFind Parameters: {}'.format(tap_stream_id, find_filter) if projection: query_message += '\n\tProjection: {}'.format(projection) LOGGER.info(query_message) # query collection schema = {"type": "object", "properties": {}} with collection.find(find_filter, projection, sort=[(replication_key_name, pymongo.ASCENDING)]) as cursor: rows_saved = 0 time_extracted = utils.now() start_time = time.time() for row in cursor: schema_build_start_time = time.time() if common.row_to_schema(schema, row): singer.write_message(singer.SchemaMessage( stream=common.calculate_destination_stream_name(stream), schema=schema, key_properties=['_id'])) common.SCHEMA_COUNT[tap_stream_id] += 1 common.SCHEMA_TIMES[tap_stream_id] += time.time() - schema_build_start_time record_message = common.row_to_singer_record(stream, row, stream_version, time_extracted) # gen_schema = common.row_to_schema_message(schema, record_message.record, row) # if DeepDiff(schema, gen_schema, ignore_order=True) != {}: # emit gen_schema # schema = gen_schema singer.write_message(record_message) rows_saved += 1 update_bookmark(row, state, tap_stream_id, replication_key_name) if rows_saved % common.UPDATE_BOOKMARK_PERIOD == 0: singer.write_message(singer.StateMessage(value=copy.deepcopy(state))) common.COUNTS[tap_stream_id] += rows_saved common.TIMES[tap_stream_id] += time.time()-start_time singer.write_message(activate_version_message) LOGGER.info('Synced %s records for %s', rows_saved, tap_stream_id)
def sync_endpoint(initial_url, state): '''Syncs the url and paginates through until there are no more "next" urls. Yields schema, record, and state messages. Modifies state by setting the NEXT field every time we get a next url from Shippo. This allows us to resume paginating if we're terminated. ''' stream = parse_stream_from_url(initial_url) yield singer.SchemaMessage(stream=stream, schema=load_schema(stream), key_properties=["object_id"]) # The Shippo API does not return data from long ago, so we only try to # replicate the last 60 days # Some streams allow us to page by date, so we can request historical data for them sliding_window_key = SLIDING_WINDOW_STREAMS.get(stream) if sliding_window_key: bounded_start = get_start(state) sliding_query_start = bounded_start sliding_query_end = bounded_start.add(days=SLIDING_WINDOW_DAYS) url = initial_url.format( sliding_window_key, sliding_query_start.strftime("%Y-%m-%dT%I:%M:%SZ"), sliding_query_end.strftime("%Y-%m-%dT%I:%M:%SZ")) else: bounded_start = max(get_start(state), pendulum.now().subtract(days=60)) url = initial_url LOGGER.info("Replicating all %s from %s", stream, bounded_start) rows_read = 0 rows_written = 0 with metrics.record_counter(parse_stream_from_url(url)) as counter: endpoint_start = pendulum.now() while url: state[NEXT] = url yield singer.StateMessage(value=state) data = request(url) for row in data['results']: counter.increment() rows_read += 1 updated = pendulum.parse(row[OBJECT_UPDATED]) if updated >= bounded_start: row = fix_extra_map(row) yield singer.RecordMessage(stream=stream, record=row) rows_written += 1 if data.get(NEXT): url = data.get(NEXT) elif sliding_window_key and sliding_query_end < endpoint_start: sliding_query_start = sliding_query_end sliding_query_end = sliding_query_start.add( days=SLIDING_WINDOW_DAYS) url = initial_url.format( sliding_window_key, sliding_query_start.strftime("%Y-%m-%dT%I:%M:%SZ"), sliding_query_end.strftime("%Y-%m-%dT%I:%M:%SZ")) else: url = None if rows_read: LOGGER.info("Done syncing %s. Read %d records, wrote %d (%.2f%%)", stream, rows_read, rows_written, 100.0 * rows_written / float(rows_read))
def write_schema_message(stream): singer.write_message( singer.SchemaMessage(stream=stream['tap_stream_id'], schema=stream['schema'], key_properties=['_id']))