def get_max_pk_values(cursor, catalog_entry): database_name = common.get_database_name(catalog_entry) escaped_db = common.escape(database_name) escaped_table = common.escape(catalog_entry.table) key_properties = common.get_key_properties(catalog_entry) escaped_columns = [common.escape(c) for c in key_properties] sql = """SELECT {} FROM {}.{} """ select_column_clause = ", ".join( ["max(" + pk + ")" for pk in escaped_columns]) cursor.execute(sql.format(select_column_clause, escaped_db, escaped_table)) result = cursor.fetchone() processed_results = [] for bm in result: if isinstance(bm, (datetime.date, datetime.datetime, datetime.timedelta)): processed_results += [common.to_utc_datetime_str(bm)] elif bm is not None: processed_results += [bm] max_pk_values = {} if processed_results: max_pk_values = dict(zip(key_properties, processed_results)) return max_pk_values
def get_max_pk_values(cursor, catalog_entry): database_name = common.get_database_name(catalog_entry) escaped_db = common.escape(database_name) escaped_table = common.escape(catalog_entry.table) key_properties = common.get_key_properties(catalog_entry) escaped_columns = [common.escape(c) for c in key_properties] sql = """SELECT {} FROM {}.{} ORDER BY {} LIMIT 1 """ select_column_clause = ", ".join(escaped_columns) order_column_clause = ", ".join([pk + " DESC" for pk in escaped_columns]) cursor.execute( sql.format(select_column_clause, escaped_db, escaped_table, order_column_clause)) result = cursor.fetchone() if result: max_pk_values = dict(zip(key_properties, result)) else: max_pk_values = {} return max_pk_values
def generate_pk_clause(catalog_entry, state): key_properties = common.get_key_properties(catalog_entry) escaped_columns = [common.escape(c) for c in key_properties] where_clause = " AND ".join([pk + " > `{}`" for pk in escaped_columns]) order_by_clause = ", ".join(['`{}`, ' for pk in escaped_columns]) max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') if last_pk_fetched: pk_comparisons = ["({} > {} AND {} <= {})".format(common.escape(pk), last_pk_fetched[pk], common.escape(pk), max_pk_values[pk]) for pk in key_properties] else: pk_comparisons = ["{} <= {}".format(common.escape(pk), max_pk_values[pk]) for pk in key_properties] sql = " WHERE {} ORDER BY {} ASC".format(" AND ".join(pk_comparisons), ", ".join(escaped_columns)) return sql
def generate_pk_clause(catalog_entry, state): key_properties = common.get_key_properties(catalog_entry) escaped_columns = [common.escape(c) for c in key_properties] where_clause = " AND ".join([pk + " > `{}`" for pk in escaped_columns]) order_by_clause = ", ".join(['`{}`, ' for pk in escaped_columns]) max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') pk_comparisons = [] if not max_pk_values: return "" if last_pk_fetched: for pk in key_properties: column_type = catalog_entry.schema.properties.get(pk).type # quote last/max PK val if column is VARCHAR if 'string' in column_type: last_pk_val = "'" + last_pk_fetched[pk] + "'" max_pk_val = "'" + max_pk_values[pk] + "'" else: last_pk_val = last_pk_fetched[pk] max_pk_val = max_pk_values[pk] pk_comparisons.append("({} > {} AND {} <= {})".format( common.escape(pk), last_pk_val, common.escape(pk), max_pk_val)) else: for pk in key_properties: column_schema = catalog_entry.schema.properties.get(pk) column_type = column_schema.type # quote last/max PK val if column is VARCHAR if 'string' in column_type: pk_val = "'{}'".format(max_pk_values[pk]) else: pk_val = max_pk_values[pk] pk_comparisons.append("{} <= {}".format(common.escape(pk), pk_val)) sql = " WHERE {} ORDER BY {} ASC".format(" AND ".join(pk_comparisons), ", ".join(escaped_columns)) return sql
def generate_pk_clause(catalog_entry, state): key_properties = common.get_key_properties(catalog_entry) max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'max_pk_values') last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id, 'last_pk_fetched') last_pk_clause = '' max_pk_comparisons = [] if not max_pk_values: return "" if last_pk_fetched: for pk in key_properties: column_type = catalog_entry.schema.properties.get(pk).type # Add AND to interpolate along with max_pk_values clauses last_pk_clause = '({}) AND '.format( generate_pk_bookmark_clause(key_properties, last_pk_fetched, catalog_entry)) max_pk_comparisons.append("{} <= {}".format( common.escape(pk), quote_where_clause_value(max_pk_values[pk], column_type))) else: for pk in key_properties: column_schema = catalog_entry.schema.properties.get(pk) column_type = column_schema.type pk_val = quote_where_clause_value(max_pk_values[pk], column_type) max_pk_comparisons.append("{} <= {}".format( common.escape(pk), pk_val)) order_by_columns = [common.escape(c) for c in key_properties] sql = " WHERE {}{} ORDER BY {} ASC".format( last_pk_clause, " AND ".join(max_pk_comparisons), ", ".join(order_by_columns)) return sql
def _create_temp_table(open_connection, catalog_entry, columns, pk_clause: str): with open_connection.cursor() as c: temporary_catalog_entry = copy.deepcopy(catalog_entry) logging.info("try creating temp table") db_name = common.get_database_name(temporary_catalog_entry) select_sql = common.generate_select_sql(catalog_entry, columns) if pk_clause: select_sql += pk_clause tmp_uuid = str(uuid.uuid4()).replace("-", "") temp_table_name = "tmp_" + tmp_uuid temp_sql = "CREATE TEMPORARY TABLE {}.{} {}".format( common.escape(db_name), common.escape(temp_table_name), select_sql) logging.info("write data to temporary table: {}".format(temp_sql)) c.execute(temp_sql) logging.info("created temporary table successful") temporary_catalog_entry.table = temp_table_name select_temp_sql = common.generate_select_sql(temporary_catalog_entry, columns) if pk_clause: select_temp_sql += pk_clause return select_temp_sql
def generate_pk_bookmark_clause(key_properties, last_pk_fetched, catalog_entry): """ Generates a bookmark clause based on `key_properties`, and `last_pk_fetched` bookmark. This ensures that the stream is resumed at the location in the data set per primary key component. Inclusivity is not maintained, since these are primary keys. Example: key_properties = ['name','birthday'] last_pk_fetched = {'name': "Phil Collins", 'birthday': "1951-01-30"} Returns: "(`name` > 'Phil Collins') OR (`name` = 'Phil Collins' AND `birthday` > '1951-01-30') """ assert last_pk_fetched is not None, \ "Must call generate_pk_bookmark with a non-null 'last_pk_fetched' dict" clause_terms = [] inclusive_pk_values = [] for pk in key_properties: term = [] for prev_pk, prev_pk_val, prev_col_type in inclusive_pk_values: term.append( common.escape(prev_pk) + ' = ' + quote_where_clause_value(prev_pk_val, prev_col_type)) column_type = catalog_entry.schema.properties.get(pk).type term.append( common.escape(pk) + ' > ' + quote_where_clause_value(last_pk_fetched[pk], column_type)) inclusive_pk_values.append((pk, last_pk_fetched[pk], column_type)) clause_terms.append(' AND '.join(term)) return '({})'.format(') OR ('.join(clause_terms)) if clause_terms else ''