def do_sync(sf, catalog, state): starting_stream = state.get("current_stream") if starting_stream: LOGGER.info("Resuming sync from %s", starting_stream) else: LOGGER.info("Starting sync") for catalog_entry in catalog["streams"]: stream_version = get_stream_version(catalog_entry, state) stream = catalog_entry['stream'] stream_alias = catalog_entry.get('stream_alias') stream_name = catalog_entry["tap_stream_id"] activate_version_message = singer.ActivateVersionMessage( stream=(stream_alias or stream), version=stream_version) catalog_metadata = metadata.to_map(catalog_entry['metadata']) replication_key = catalog_metadata.get((), {}).get('replication-key') mdata = metadata.to_map(catalog_entry['metadata']) if not stream_is_selected(mdata): LOGGER.info("%s: Skipping - not selected", stream_name) continue if starting_stream: if starting_stream == stream_name: LOGGER.info("%s: Resuming", stream_name) starting_stream = None else: LOGGER.info("%s: Skipping - already synced", stream_name) continue else: LOGGER.info("%s: Starting", stream_name) state["current_stream"] = stream_name singer.write_state(state) key_properties = metadata.to_map(catalog_entry['metadata']).get((), {}).get('table-key-properties') singer.write_schema( stream, catalog_entry['schema'], key_properties, replication_key, stream_alias) job_id = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobID') if job_id: with metrics.record_counter(stream) as counter: LOGGER.info("Found JobID from previous Bulk Query. Resuming sync for job: %s", job_id) # Resuming a sync should clear out the remaining state once finished counter = resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter) LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter.value) # Remove Job info from state once we complete this resumed query. One of a few cases could have occurred: # 1. The job succeeded, in which case make JobHighestBookmarkSeen the new bookmark # 2. The job partially completed, in which case make JobHighestBookmarkSeen the new bookmark, or # existing bookmark if no bookmark exists for the Job. # 3. The job completely failed, in which case maintain the existing bookmark, or None if no bookmark state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}).pop('JobID', None) state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}).pop('BatchIDs', None) bookmark = state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}) \ .pop('JobHighestBookmarkSeen', None) existing_bookmark = state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}) \ .pop(replication_key, None) state = singer.write_bookmark( state, catalog_entry['tap_stream_id'], replication_key, bookmark or existing_bookmark) # If job is removed, reset to existing bookmark or None singer.write_state(state) else: # Tables with a replication_key or an empty bookmark will emit an # activate_version at the beginning of their sync bookmark_is_empty = state.get('bookmarks', {}).get( catalog_entry['tap_stream_id']) is None if replication_key or bookmark_is_empty: singer.write_message(activate_version_message) state = singer.write_bookmark(state, catalog_entry['tap_stream_id'], 'version', stream_version) counter = sync_stream(sf, catalog_entry, state) LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter.value) state["current_stream"] = None singer.write_state(state) LOGGER.info("Finished sync")
def do_sync(sf, catalog, state): starting_stream = state.get("current_stream") if starting_stream: LOGGER.info("Resuming sync from %s", starting_stream) else: LOGGER.info("Starting sync") for catalog_entry in catalog["streams"]: stream_version = get_stream_version(catalog_entry, state) stream = catalog_entry['stream'] stream_alias = catalog_entry.get('stream_alias') stream_name = catalog_entry["tap_stream_id"] activate_version_message = singer.ActivateVersionMessage( stream=(stream_alias or stream), version=stream_version) replication_key = catalog_entry.get('replication_key') mdata = metadata.to_map(catalog_entry['metadata']) if not stream_is_selected(mdata): LOGGER.info("%s: Skipping - not selected", stream_name) continue if starting_stream: if starting_stream == stream_name: LOGGER.info("%s: Resuming", stream_name) starting_stream = None else: LOGGER.info("%s: Skipping - already synced", stream_name) continue else: LOGGER.info("%s: Starting", stream_name) state["current_stream"] = stream_name singer.write_state(state) singer.write_schema(stream, catalog_entry['schema'], catalog_entry['key_properties'], replication_key, stream_alias) job_id = singer.get_bookmark(state, catalog_entry['tap_stream_id'], 'JobID') if job_id: with metrics.record_counter(stream) as counter: LOGGER.info("Resuming sync for stream: %s", stream_name) # Resuming a sync should clear out the remaining state once finished counter = resume_syncing_bulk_query(sf, catalog_entry, job_id, state, counter) LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter.value) state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}).pop('JobID', None) state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}).pop('BatchIDs', None) bookmark = state.get('bookmarks', {}).get(catalog_entry['tap_stream_id'], {}).pop('JobHighestBookmarkSeen', None) state = singer.write_bookmark(state, catalog_entry['tap_stream_id'], replication_key, bookmark) singer.write_state(state) else: # Tables with a replication_key or an empty bookmark will emit an # activate_version at the beginning of their sync bookmark_is_empty = state.get('bookmarks', {}).get( catalog_entry['tap_stream_id']) is None if replication_key or bookmark_is_empty: singer.write_message(activate_version_message) state = singer.write_bookmark(state, catalog_entry['tap_stream_id'], 'version', stream_version) counter = sync_stream(sf, catalog_entry, state) LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter.value) state["current_stream"] = None singer.write_state(state) LOGGER.info("Finished sync")