def do_sync(client, catalog, state, start_date): selected_stream_names = get_selected_streams(catalog) validate_dependencies(selected_stream_names) populate_class_schemas(catalog, selected_stream_names) all_sub_stream_names = get_sub_stream_names() skipped_streams = [] for stream in catalog.streams: stream_name = stream.tap_stream_id mdata = metadata.to_map(stream.metadata) if stream_name not in selected_stream_names: skipped_streams.append(stream_name) continue # if starting_stream: # if starting_stream == stream_name: # LOGGER.info("%s: Resuming", stream_name) # starting_stream = None # else: # LOGGER.info("%s: Skipping - already synced", stream_name) # continue # else: # LOGGER.info("%s: Starting", stream_name) key_properties = metadata.get(mdata, (), "table-key-properties") singer.write_schema(stream_name, stream.schema.to_dict(), key_properties) sub_stream_names = SUB_STREAMS.get(stream_name) if sub_stream_names: for sub_stream_name in sub_stream_names: if sub_stream_name not in selected_stream_names: continue sub_stream = STREAMS[sub_stream_name].stream sub_mdata = metadata.to_map(sub_stream.metadata) sub_key_properties = metadata.get(sub_mdata, (), "table-key-properties") singer.write_schema(sub_stream.tap_stream_id, sub_stream.schema.to_dict(), sub_key_properties) # parent stream will sync sub stream if stream_name in all_sub_stream_names: continue LOGGER.info("%s: Starting sync", stream_name) instance = STREAMS[stream_name](client) counter_value = sync_stream(state, start_date, instance) # singer.write_state(state) LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value) if skipped_streams: LOGGER.info("The following streams were skipped: {}".format( ", ".join(skipped_streams))) zendesk_metrics.log_aggregate_rates() singer.write_state(state) LOGGER.info("Finished sync") zendesk_metrics.log_aggregate_rates()
def do_sync(client, catalog, state, config): selected_stream_names = get_selected_streams(catalog) validate_dependencies(selected_stream_names) populate_class_schemas(catalog, selected_stream_names) all_sub_stream_names = get_sub_stream_names() for stream in catalog.streams: stream_name = stream.tap_stream_id mdata = metadata.to_map(stream.metadata) if stream_name not in selected_stream_names: LOGGER.info("%s: Skipping - not selected", stream_name) continue key_properties = metadata.get(mdata, (), 'table-key-properties') sideload_objects = metadata.get(mdata, (), 'sideload-objects') if sideload_objects: stream_schema = get_side_load_schemas(sideload_objects, stream) stream.schema = Schema.from_dict(stream_schema) singer.write_schema(stream_name, stream.schema.to_dict(), key_properties) sub_stream_names = SUB_STREAMS.get(stream_name) if sub_stream_names: for sub_stream_name in sub_stream_names: if sub_stream_name not in selected_stream_names: continue sub_stream = STREAMS[sub_stream_name].stream sub_mdata = metadata.to_map(sub_stream.metadata) sub_key_properties = metadata.get(sub_mdata, (), 'table-key-properties') sideload_objects = metadata.get(mdata, (), 'sideload-objects') if sideload_objects: sub_stream_schema = get_side_load_schemas( sideload_objects, sub_stream) sub_stream.schema = Schema.from_dict(sub_stream_schema) singer.write_schema(sub_stream.tap_stream_id, sub_stream.schema.to_dict(), sub_key_properties) # parent stream will sync sub stream if stream_name in all_sub_stream_names: continue LOGGER.info("%s: Starting sync", stream_name) instance = STREAMS[stream_name](client, config) counter_value = sync_stream(state, config.get('start_date'), instance) singer.write_state(state) LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value) zendesk_metrics.log_aggregate_rates() singer.write_state(state) LOGGER.info("Finished sync") zendesk_metrics.log_aggregate_rates()