Example #1
0
def get_max_pk_values(cursor, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    escaped_db = common.escape(database_name)
    escaped_table = common.escape(catalog_entry.table)

    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    sql = """SELECT {}
               FROM {}.{}
    """

    select_column_clause = ", ".join(
        ["max(" + pk + ")" for pk in escaped_columns])

    cursor.execute(sql.format(select_column_clause, escaped_db, escaped_table))
    result = cursor.fetchone()
    processed_results = []
    for bm in result:
        if isinstance(bm,
                      (datetime.date, datetime.datetime, datetime.timedelta)):
            processed_results += [common.to_utc_datetime_str(bm)]
        elif bm is not None:
            processed_results += [bm]

    max_pk_values = {}
    if processed_results:
        max_pk_values = dict(zip(key_properties, processed_results))

    return max_pk_values
Example #2
0
def get_max_pk_values(cursor, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    escaped_db = common.escape(database_name)
    escaped_table = common.escape(catalog_entry.table)

    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    sql = """SELECT {}
               FROM {}.{}
              ORDER BY {}
              LIMIT 1
    """

    select_column_clause = ", ".join(escaped_columns)
    order_column_clause = ", ".join([pk + " DESC" for pk in escaped_columns])

    cursor.execute(
        sql.format(select_column_clause, escaped_db, escaped_table,
                   order_column_clause))
    result = cursor.fetchone()

    if result:
        max_pk_values = dict(zip(key_properties, result))
    else:
        max_pk_values = {}

    return max_pk_values
Example #3
0
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    where_clause = " AND ".join([pk + " > `{}`" for pk in escaped_columns])
    order_by_clause = ", ".join(['`{}`, ' for pk in escaped_columns])

    max_pk_values = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    if last_pk_fetched:
        pk_comparisons = ["({} > {} AND {} <= {})".format(common.escape(pk),
                                                          last_pk_fetched[pk],
                                                          common.escape(pk),
                                                          max_pk_values[pk])
                          for pk in key_properties]
    else:
        pk_comparisons = ["{} <= {}".format(common.escape(pk), max_pk_values[pk])
                          for pk in key_properties]

    sql = " WHERE {} ORDER BY {} ASC".format(" AND ".join(pk_comparisons),
                                             ", ".join(escaped_columns))

    return sql
Example #4
0
def pks_are_auto_incrementing(mysql_conn, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    key_properties = common.get_key_properties(catalog_entry)

    if not key_properties:
        return False

    sql = """SELECT 1
               FROM information_schema.columns
              WHERE table_schema = '{}'
                AND table_name = '{}'
                AND column_name = '{}'
                AND extra LIKE '%auto_increment%'
    """

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            for pk in key_properties:
                cur.execute(sql.format(database_name,
                                          catalog_entry.table,
                                          pk))

                result = cur.fetchone()

                if not result:
                    return False

    return True
Example #5
0
def pks_are_integer_or_varchar(mysql_conn, config, catalog_entry):
    database_name = common.get_database_name(catalog_entry)
    key_properties = common.get_key_properties(catalog_entry)

    if config.get('allow_non_auto_increment_pks') == 'true' and key_properties:
        valid_column_types = set([
            'tinyint', 'smallint'
            'mediumint', 'int', 'bigint', 'varchar', 'char'
        ])

        sql = """SELECT data_type
                   FROM information_schema.columns
                  WHERE table_schema = '{}'
                    AND table_name = '{}'
                    AND column_name = '{}'
        """

        with connect_with_backoff(mysql_conn) as open_conn:
            with open_conn.cursor() as cur:
                for pk in key_properties:
                    cur.execute(
                        sql.format(database_name, catalog_entry.table, pk))

                    result = cur.fetchone()

                    if not result:
                        raise Exception(
                            "Primary key column {} does not exist.".format(pk))

                    if result[0] not in valid_column_types:
                        return False

        return True

    return False
Example #6
0
def write_schema_message(catalog_entry, bookmark_properties=[]):
    key_properties = common.get_key_properties(catalog_entry)

    singer.write_message(
        singer.SchemaMessage(stream=catalog_entry.stream,
                             schema=catalog_entry.schema.to_dict(),
                             key_properties=key_properties,
                             bookmark_properties=bookmark_properties))
Example #7
0
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)
    escaped_columns = [common.escape(c) for c in key_properties]

    where_clause = " AND ".join([pk + " > `{}`" for pk in escaped_columns])
    order_by_clause = ", ".join(['`{}`, ' for pk in escaped_columns])

    max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    pk_comparisons = []

    if not max_pk_values:
        return ""

    if last_pk_fetched:
        for pk in key_properties:
            column_type = catalog_entry.schema.properties.get(pk).type

            # quote last/max PK val if column is VARCHAR
            if 'string' in column_type:
                last_pk_val = "'" + last_pk_fetched[pk] + "'"
                max_pk_val = "'" + max_pk_values[pk] + "'"
            else:
                last_pk_val = last_pk_fetched[pk]
                max_pk_val = max_pk_values[pk]

            pk_comparisons.append("({} > {} AND {} <= {})".format(
                common.escape(pk), last_pk_val, common.escape(pk), max_pk_val))
    else:
        for pk in key_properties:
            column_schema = catalog_entry.schema.properties.get(pk)
            column_type = column_schema.type

            # quote last/max PK val if column is VARCHAR
            if 'string' in column_type:
                pk_val = "'{}'".format(max_pk_values[pk])
            else:
                pk_val = max_pk_values[pk]

            pk_comparisons.append("{} <= {}".format(common.escape(pk), pk_val))

    sql = " WHERE {} ORDER BY {} ASC".format(" AND ".join(pk_comparisons),
                                             ", ".join(escaped_columns))

    return sql
Example #8
0
def do_sync_full_table(mysql_conn, catalog_entry, state, columns):
    LOGGER.info("Stream %s is using full table replication", catalog_entry.stream)
    key_properties = common.get_key_properties(catalog_entry)

    write_schema_message(catalog_entry)

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)

    full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version)

    # Prefer initial_full_table_complete going forward
    singer.clear_bookmark(state, catalog_entry.tap_stream_id, "version")

    state = singer.write_bookmark(
        state, catalog_entry.tap_stream_id, "initial_full_table_complete", True
    )

    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
Example #9
0
def generate_pk_clause(catalog_entry, state):
    key_properties = common.get_key_properties(catalog_entry)

    max_pk_values = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state, catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    last_pk_clause = ''
    max_pk_comparisons = []

    if not max_pk_values:
        return ""

    if last_pk_fetched:
        for pk in key_properties:
            column_type = catalog_entry.schema.properties.get(pk).type

            # Add AND to interpolate along with max_pk_values clauses
            last_pk_clause = '({}) AND '.format(
                generate_pk_bookmark_clause(key_properties, last_pk_fetched,
                                            catalog_entry))
            max_pk_comparisons.append("{} <= {}".format(
                common.escape(pk),
                quote_where_clause_value(max_pk_values[pk], column_type)))
    else:
        for pk in key_properties:
            column_schema = catalog_entry.schema.properties.get(pk)
            column_type = column_schema.type

            pk_val = quote_where_clause_value(max_pk_values[pk], column_type)

            max_pk_comparisons.append("{} <= {}".format(
                common.escape(pk), pk_val))

    order_by_columns = [common.escape(c) for c in key_properties]
    sql = " WHERE {}{} ORDER BY {} ASC".format(
        last_pk_clause, " AND ".join(max_pk_comparisons),
        ", ".join(order_by_columns))

    return sql
Example #10
0
def sync_is_resumable(mysql_conn, catalog_entry):
    ''' In order to resume a full table sync, a table requires
    '''
    database_name = common.get_database_name(catalog_entry)
    key_properties = common.get_key_properties(catalog_entry)

    if not key_properties:
        return False

    sql = """SELECT data_type
               FROM information_schema.columns
              WHERE table_schema = '{}'
                AND table_name = '{}'
                AND column_name = '{}'
    """

    with connect_with_backoff(mysql_conn) as open_conn:
        with open_conn.cursor() as cur:
            for pk in key_properties:
                cur.execute(sql.format(database_name, catalog_entry.table, pk))

                result = cur.fetchone()

                if not result:
                    raise Exception(
                        "Primary key column {} does not exist.".format(pk))

                if result[0] not in RESUMABLE_PK_TYPES:
                    LOGGER.warn(
                        "Found primary key column %s with type %s. Will not be able "
                        +
                        "to resume interrupted FULL_TABLE sync using this key.",
                        pk, result[0])
                    return False

    return True
Example #11
0
def do_sync_historical_binlog(mysql_conn, config, catalog_entry, state, columns):
    binlog.verify_binlog_config(mysql_conn)

    is_view = common.get_is_view(catalog_entry)
    key_properties = common.get_key_properties(catalog_entry)

    if is_view:
        raise Exception("Unable to replicate stream({}) with binlog because it is a view.".format(catalog_entry.stream))

    log_file = singer.get_bookmark(state,
                                   catalog_entry.tap_stream_id,
                                   'log_file')

    log_pos = singer.get_bookmark(state,
                                  catalog_entry.tap_stream_id,
                                  'log_pos')

    max_pk_values = singer.get_bookmark(state,
                                        catalog_entry.tap_stream_id,
                                        'max_pk_values')

    last_pk_fetched = singer.get_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'last_pk_fetched')

    write_schema_message(catalog_entry)

    stream_version = common.get_stream_version(catalog_entry.tap_stream_id, state)

    if log_file and log_pos and max_pk_values:
        LOGGER.info("Resuming initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id)
        full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version)

    else:
        LOGGER.info("Performing initial full table sync for LOG_BASED stream %s", catalog_entry.tap_stream_id)

        state = singer.write_bookmark(state,
                                      catalog_entry.tap_stream_id,
                                      'initial_binlog_complete',
                                      False)

        current_log_file, current_log_pos = binlog.fetch_current_log_file_and_pos(mysql_conn)
        state = singer.write_bookmark(state,
                                      catalog_entry.tap_stream_id,
                                      'version',
                                      stream_version)

        if full_table.sync_is_resumable(mysql_conn, catalog_entry):
            # We must save log_file and log_pos across FULL_TABLE syncs when performing
            # a resumable full table sync
            state = singer.write_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'log_file',
                                          current_log_file)

            state = singer.write_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'log_pos',
                                          current_log_pos)

            full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version)
        else:
            full_table.sync_table(mysql_conn, catalog_entry, state, columns, stream_version)
            state = singer.write_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'log_file',
                                          current_log_file)

            state = singer.write_bookmark(state,
                                          catalog_entry.tap_stream_id,
                                          'log_pos',
                                          current_log_pos)