def sync_table(mysql_conn, catalog_entry, state, columns, stream_version): common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state) bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {}) version_exists = True if 'version' in bookmark else False initial_full_table_complete = singer.get_bookmark( state, catalog_entry.tap_stream_id, 'initial_full_table_complete') state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'version') activate_version_message = singer.ActivateVersionMessage( stream=catalog_entry.stream, version=stream_version) # For the initial replication, emit an ACTIVATE_VERSION message # at the beginning so the records show up right away. if not initial_full_table_complete and not (version_exists and state_version is None): singer.write_message(activate_version_message) perform_resumable_sync = sync_is_resumable(mysql_conn, catalog_entry) pk_clause = "" with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: select_sql = common.generate_select_sql(catalog_entry, columns) if perform_resumable_sync: LOGGER.info( "Full table sync is resumable based on primary key definition, will replicate incrementally" ) state = update_incremental_full_table_state( catalog_entry, state, cur) pk_clause = generate_pk_clause(catalog_entry, state) select_sql += pk_clause try: select_sql = _create_temp_table(mysql_conn, catalog_entry, columns, pk_clause) except Exception as ex: logging.warning("creating temp table failed: {}".format( str(ex))) params = {} common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params) # clear max pk value and last pk fetched upon successful sync singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') singer.write_message(activate_version_message)
def sync_table(mysql_conn, catalog_entry, state, columns, original_state_file=''): common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state) catalog_metadata = metadata.to_map(catalog_entry.metadata) stream_metadata = catalog_metadata.get((), {}) replication_key_metadata = stream_metadata.get('replication-key') replication_key_state = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key') replication_key_value = None if replication_key_metadata == replication_key_state: replication_key_value = singer.get_bookmark( state, catalog_entry.tap_stream_id, 'replication_key_value') else: state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'replication_key', replication_key_metadata) state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value') stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) activate_version_message = singer.ActivateVersionMessage( stream='%s_%s' % (common.get_database_name(catalog_entry), catalog_entry.stream), version=stream_version) singer.write_message(activate_version_message) with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} if replication_key_value is not None: if catalog_entry.schema.properties[ replication_key_metadata].format == 'date-time': replication_key_value = pendulum.parse( replication_key_value) select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format( replication_key_metadata, replication_key_metadata) params['replication_key_value'] = replication_key_value common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params, original_state_file)
def sync_table(mysql_conn, catalog_entry, state, columns): common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state) catalog_metadata = metadata.to_map(catalog_entry.metadata) stream_metadata = catalog_metadata.get((), {}) replication_key_metadata = stream_metadata.get("replication-key") replication_key_state = singer.get_bookmark( state, catalog_entry.tap_stream_id, "replication_key" ) replication_key_value = None if replication_key_metadata == replication_key_state: replication_key_value = singer.get_bookmark( state, catalog_entry.tap_stream_id, "replication_key_value" ) else: state = singer.write_bookmark( state, catalog_entry.tap_stream_id, "replication_key", replication_key_metadata ) state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, "replication_key_value") stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, "version", stream_version) activate_version_message = singer.ActivateVersionMessage( stream=catalog_entry.stream, version=stream_version ) singer.write_message(activate_version_message) with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} if replication_key_value is not None: if catalog_entry.schema.properties[replication_key_metadata].format == "date-time": replication_key_value = pendulum.parse(replication_key_value) select_sql += " WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC".format( replication_key_metadata, replication_key_metadata ) params["replication_key_value"] = replication_key_value elif replication_key_metadata is not None: select_sql += " ORDER BY `{}` ASC".format(replication_key_metadata) common.sync_query( cur, catalog_entry, state, select_sql, columns, stream_version, params )
def sync_table(connection, catalog_entry, state, columns, stream_version): common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state) bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {}) version_exists = True if 'version' in bookmark else False initial_full_table_complete = singer.get_bookmark( state, catalog_entry.tap_stream_id, 'initial_full_table_complete') state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'version') activate_version_message = singer.ActivateVersionMessage( stream=catalog_entry.stream, version=stream_version) # For the initial replication, emit an ACTIVATE_VERSION message # at the beginning so the records show up right away. if not initial_full_table_complete and not (version_exists and state_version is None): yield activate_version_message with connection.cursor() as cursor: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} for message in common.sync_query(cursor, catalog_entry, state, select_sql, columns, stream_version, params): yield message yield activate_version_message
def sync_table(mysql_conn, config, catalog_entry, state, columns, stream_version): common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state) bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {}) version_exists = True if 'version' in bookmark else False initial_full_table_complete = singer.get_bookmark( state, catalog_entry.tap_stream_id, 'initial_full_table_complete') state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'version') activate_version_message = singer.ActivateVersionMessage( stream='%s_%s' % (common.get_database_name(catalog_entry), catalog_entry.stream), version=stream_version) # For the initial replication, emit an ACTIVATE_VERSION message # at the beginning so the records show up right away. if not initial_full_table_complete and not (version_exists and state_version is None): singer.write_message(activate_version_message) with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} # common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params) common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params) # clear max pk value and last pk fetched upon successful sync singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') singer.write_message(activate_version_message)
def sync_table(connection, catalog_entry, state, columns): common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state) catalog_metadata = metadata.to_map(catalog_entry.metadata) stream_metadata = catalog_metadata.get((), {}) replication_key_metadata = stream_metadata.get('replication-key') replication_key_state = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key') replication_key = replication_key_state or replication_key_metadata replication_key_value = None if replication_key_metadata == replication_key_state: replication_key_value = singer.get_bookmark( state, catalog_entry.tap_stream_id, 'replication_key_value') else: state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'replication_key', replication_key) state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value') stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) yield singer.ActivateVersionMessage(stream=catalog_entry.stream, version=stream_version) with connection.cursor() as cursor: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} if replication_key_value is not None: if catalog_entry.schema.properties[ replication_key].format == 'date-time': replication_key_value = pendulum.parse(replication_key_value) select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format( replication_key, replication_key) params['replication_key_value'] = replication_key_value elif replication_key is not None: select_sql += ' ORDER BY `{}` ASC'.format(replication_key) for message in common.sync_query(cursor, catalog_entry, state, select_sql, columns, stream_version, params): yield message
def sync_table(connection, catalog_entry, state): columns = common.generate_column_list(catalog_entry) if not columns: LOGGER.warning( 'There are no columns selected for table %s, skipping it', catalog_entry.table) return bookmark_is_empty = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id) is None stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) activate_version_message = singer.ActivateVersionMessage( stream=catalog_entry.stream, version=stream_version ) # If there is no bookmark at all for this stream, assume it is the # very first replication. Emity an ACTIVATE_VERSION message at the # beginning so the recors show up right away. if bookmark_is_empty: yield activate_version_message with connection.cursor() as cursor: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} for message in common.sync_query(cursor, catalog_entry, state, select_sql, columns, stream_version, params): yield message # Clear the stream's version from the state so that subsequent invocations will # emit a distinct stream version. state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', None) yield activate_version_message yield singer.StateMessage(value=copy.deepcopy(state))
def sync_table(connection, catalog_entry, state): columns = common.generate_column_list(catalog_entry) if not columns: LOGGER.warning( 'There are no columns selected for table %s, skipping it', catalog_entry.table) return replication_key_value = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value') replication_key = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key') stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) yield singer.ActivateVersionMessage(stream=catalog_entry.stream, version=stream_version) with connection.cursor() as cursor: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} if replication_key_value is not None: if catalog_entry.schema.properties[ replication_key].format == 'date-time': replication_key_value = pendulum.parse(replication_key_value) select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format( replication_key, replication_key) params['replication_key_value'] = replication_key_value elif replication_key is not None: select_sql += ' ORDER BY `{}` ASC'.format(replication_key) for message in common.sync_query(cursor, catalog_entry, state, select_sql, columns, stream_version, params): yield message
def sync_table(mysql_conn, catalog_entry, state, columns, stream_version): common.whitelist_bookmark_keys(generate_bookmark_keys(catalog_entry), catalog_entry.tap_stream_id, state) bookmark = state.get('bookmarks', {}).get(catalog_entry.tap_stream_id, {}) version_exists = True if 'version' in bookmark else False initial_full_table_complete = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'initial_full_table_complete') state_version = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'version') activate_version_message = singer.ActivateVersionMessage( stream=catalog_entry.stream, version=stream_version ) # For the initial replication, emit an ACTIVATE_VERSION message # at the beginning so the records show up right away. if not initial_full_table_complete and not (version_exists and state_version is None): singer.write_message(activate_version_message) key_props_are_auto_incrementing = pks_are_auto_incrementing(mysql_conn, catalog_entry) with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: select_sql = common.generate_select_sql(catalog_entry, columns) if key_props_are_auto_incrementing: LOGGER.info("Detected auto-incrementing primary key(s) - will replicate incrementally") max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') or get_max_pk_values(cur, catalog_entry) if not max_pk_values: LOGGER.info("No max value for auto-incrementing PK found for table {}".format(catalog_entry.table)) else: state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values', max_pk_values) pk_clause = generate_pk_clause(catalog_entry, state) select_sql += pk_clause params = {} common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params) # clear max pk value and last pk fetched upon successful sync singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') singer.write_message(activate_version_message)
def sync_table(mysql_conn, catalog_entry, state, columns, limit=None): common.whitelist_bookmark_keys(BOOKMARK_KEYS, catalog_entry.tap_stream_id, state) catalog_metadata = metadata.to_map(catalog_entry.metadata) stream_metadata = catalog_metadata.get((), {}) iterate_limit = True while iterate_limit: replication_key_metadata = stream_metadata.get('replication-key') replication_key_state = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key') replication_key_value = None if replication_key_metadata == replication_key_state: replication_key_value = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value') else: state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'replication_key', replication_key_metadata) state = singer.clear_bookmark(state, catalog_entry.tap_stream_id, 'replication_key_value') stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state) state = singer.write_bookmark(state, catalog_entry.tap_stream_id, 'version', stream_version) activate_version_message = singer.ActivateVersionMessage( stream=catalog_entry.stream, version=stream_version ) singer.write_message(activate_version_message) with connect_with_backoff(mysql_conn) as open_conn: with open_conn.cursor() as cur: select_sql = common.generate_select_sql(catalog_entry, columns) params = {} if replication_key_value is not None: if catalog_entry.schema.properties[replication_key_metadata].format == 'date-time': replication_key_value = pendulum.parse(replication_key_value) select_sql += ' WHERE `{}` >= %(replication_key_value)s ORDER BY `{}` ASC'.format( replication_key_metadata, replication_key_metadata) params['replication_key_value'] = replication_key_value elif replication_key_metadata is not None: select_sql += ' ORDER BY `{}` ASC'.format(replication_key_metadata) if limit: select_sql += ' LIMIT {}'.format(limit) num_rows = common.sync_query(cur, catalog_entry, state, select_sql, columns, stream_version, params) if limit is None or num_rows < limit: iterate_limit = False