Example #1
0
def sync_traditional_stream(conn_config, stream, state, sync_method, end_lsn):
    LOGGER.info("Beginning sync of stream(%s) with sync method(%s)",
                stream["tap_stream_id"], sync_method)
    md_map = metadata.to_map(stream["metadata"])
    conn_config["dbname"] = md_map.get(()).get("database-name")
    desired_columns = [
        c for c in stream["schema"]["properties"].keys()
        if sync_common.should_sync_column(md_map, c)
    ]
    desired_columns.sort()

    if len(desired_columns) == 0:
        LOGGER.warning(
            "There are no columns selected for stream %s, skipping it",
            stream["tap_stream_id"])
        return state

    register_type_adapters(conn_config)

    if sync_method == "full":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        state = do_sync_full_table(conn_config, stream, state, desired_columns,
                                   md_map)
    elif sync_method == "incremental":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        state = do_sync_incremental(conn_config, stream, state,
                                    desired_columns, md_map)
    elif sync_method == "logical_initial":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        LOGGER.info("Performing initial full table sync")
        state = singer.write_bookmark(state, stream["tap_stream_id"], "lsn",
                                      end_lsn)

        sync_common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
        state = singer.write_bookmark(state, stream["tap_stream_id"], "xmin",
                                      None)
    elif sync_method == "logical_initial_interrupted":
        state = singer.set_currently_syncing(state, stream["tap_stream_id"])
        LOGGER.info(
            "Initial stage of full table sync was interrupted. resuming...")
        sync_common.send_schema_message(stream, [])
        state = full_table.sync_table(conn_config, stream, state,
                                      desired_columns, md_map)
    else:
        raise Exception("unknown sync method {} for stream {}".format(
            sync_method, stream["tap_stream_id"]))

    state = singer.set_currently_syncing(state, None)
    singer.write_message(singer.StateMessage(value=copy.deepcopy(state)))
    return state
Example #2
0
def sync_method_for_streams(streams, state, default_replication_method):
    lookup = {}
    traditional_steams = []
    logical_streams = []

    for stream in streams:
        stream_metadata = metadata.to_map(stream["metadata"])
        replication_method = stream_metadata.get(
            (), {}).get("replication-method", default_replication_method)
        replication_key = stream_metadata.get((), {}).get("replication-key")

        state = clear_state_on_replication_change(state,
                                                  stream["tap_stream_id"],
                                                  replication_key,
                                                  replication_method)

        if replication_method not in set(
            ["LOG_BASED", "FULL_TABLE", "INCREMENTAL"]):
            raise Exception(
                "Unrecognized replication_method {} for stream {}".format(
                    replication_method, stream["tap_stream_id"]))

        md_map = metadata.to_map(stream["metadata"])
        desired_columns = [
            c for c in stream["schema"]["properties"].keys()
            if sync_common.should_sync_column(md_map, c)
        ]
        desired_columns.sort()

        if len(desired_columns) == 0:
            LOGGER.warning(
                "There are no columns selected for stream %s, skipping it",
                stream["tap_stream_id"])
            continue

        if replication_method == "LOG_BASED" and stream_metadata.get(
            (), {}).get("is-view"):
            raise Exception(
                "Logical Replication is NOT supported for views."
                "Please change the replication method for {}".format(
                    stream["tap_stream_id"]))

        if replication_method == "FULL_TABLE":
            lookup[stream["tap_stream_id"]] = "full"
            traditional_steams.append(stream)
        elif replication_method == "INCREMENTAL":
            lookup[stream["tap_stream_id"]] = "incremental"
            traditional_steams.append(stream)

        elif get_bookmark(state,
                          stream["tap_stream_id"], "xmin") and get_bookmark(
                              state, stream["tap_stream_id"], "lsn"):
            # finishing previously interrupted full-table (first stage of logical replication)
            lookup[stream["tap_stream_id"]] = "logical_initial_interrupted"
            traditional_steams.append(stream)

        # inconsistent state
        elif get_bookmark(
                state, stream["tap_stream_id"], "xmin") and not get_bookmark(
                    state, stream["tap_stream_id"], "lsn"):
            raise Exception(
                "Xmin found(%s) in state implying full-table replication but no lsn is present"
            )

        elif not get_bookmark(state, stream["tap_stream_id"],
                              "xmin") and not get_bookmark(
                                  state, stream["tap_stream_id"], "lsn"):
            # initial full-table phase of logical replication
            lookup[stream["tap_stream_id"]] = "logical_initial"
            traditional_steams.append(stream)

        else:  # no xmin but we have an lsn
            # initial stage of logical replication(full-table) has been completed.
            # Moving onto pure logical replication
            lookup[stream["tap_stream_id"]] = "pure_logical"
            logical_streams.append(stream)

    return lookup, traditional_steams, logical_streams