コード例 #1
0
def validate_state(config, catalog, state):
    for stream in catalog["streams"]:
        for mdata in stream['metadata']:
            if mdata['breadcrumb'] == [] and mdata['metadata'].get('selected') != True:
                # If a stream is deselected while it's the current stream, unset the
                # current stream.
                if stream["tap_stream_id"] == get_currently_syncing(state):
                    set_currently_syncing(state, None)
                break

        replication_key = determine_replication_key(stream['tap_stream_id'])
        if not replication_key:
            continue

        # If there's no bookmark for a stream (new integration, newly selected,
        # reset, etc) we need to use the default start date from the config.
        bookmark = get_bookmark(state,
                                stream["tap_stream_id"],
                                replication_key)
        if bookmark is None:
            state = write_bookmark(state,
                                   stream["tap_stream_id"],
                                   replication_key,
                                   config["start_date"])

    singer.write_state(state)
    return state
コード例 #2
0
def sync(client, catalog, config, state):
    starting_stream = bookmarks.get_currently_syncing(state)
    if starting_stream:
        singer.log_info("Resuming sync from %s", starting_stream)
    else:
        singer.log_info("Starting sync")

    for stream in catalog["streams"]:
        # Skip unselected streams.
        mdata = metadata.to_map(stream['metadata'])

        if not metadata.get(mdata, (), 'selected'):
            singer.log_info("%s: not selected", stream["tap_stream_id"])
            continue

        # Skip streams that have already be synced when resuming.
        if starting_stream and stream["tap_stream_id"] != starting_stream:
            singer.log_info("%s: already synced", stream["tap_stream_id"])
            continue

        singer.log_info("%s: starting sync", stream["tap_stream_id"])

        # Now that we've started, there's no more "starting stream". Set
        # the current stream to resume on next run.
        starting_stream = None
        state = bookmarks.set_currently_syncing(state, stream["tap_stream_id"])
        singer.write_state(state)

        # Sync stream based on type.
        if stream["tap_stream_id"] == "activity_types":
            state, record_count = sync_activity_types(client, state, stream)
        elif stream["tap_stream_id"] == "leads":
            state, record_count = sync_leads(client, state, stream, config)
        elif stream["tap_stream_id"].startswith("activities_"):
            state, record_count = sync_activities(client, state, stream,
                                                  config)
        elif stream["tap_stream_id"] in ["campaigns", "lists"]:
            state, record_count = sync_paginated(client, state, stream)
        elif stream["tap_stream_id"] == "programs":
            state, record_count = sync_programs(client, state, stream)
        else:
            raise Exception("Stream %s not implemented" %
                            stream["tap_stream_id"])

        # Emit metric for record count.
        counter = singer.metrics.record_counter(stream["tap_stream_id"])
        counter.value = record_count
        counter._pop()  # pylint: disable=protected-access

        # Unset current stream.
        state = bookmarks.set_currently_syncing(state, None)
        singer.write_state(state)
        singer.log_info("%s: finished sync", stream["tap_stream_id"])

    # If Corona is not supported, log a warning near the end of the tap
    # log with instructions on how to get Corona supported.
    singer.log_info("Finished sync.")
    if not client.use_corona:
        singer.log_warning(NO_CORONA_WARNING)
コード例 #3
0
def sync(config: Dict[str, Any], state: Dict[str, Any],
         catalog: Catalog) -> None:
    # For looking up Catalog-configured streams more efficiently
    # later Singer stores catalog entries as a list and iterates
    # over it with .get_stream()
    stream_defs: Dict[str, Union["Stream", "Substream"]] = {}
    stream_versions: Dict[str, Optional[int]] = {}

    check_dependency_conflicts(catalog)

    for stream in catalog.get_selected_streams(state):
        if is_substream(AVAILABLE_STREAMS[stream.tap_stream_id]):
            LOGGER.info(
                'Skipping substream "%s" until parent stream is reached',
                stream.tap_stream_id,
            )

            continue

        LOGGER.info("Syncing stream: %s", stream.tap_stream_id)

        filter_datetime = prepare_stream(stream.tap_stream_id, stream_defs,
                                         stream_versions, catalog, config,
                                         state)
        stream_def = stream_defs[stream.tap_stream_id]

        LOGGER.info("Querying since: %s", filter_datetime)

        for tap_stream_id, record in stream_def.sync(
                filter_datetime):  # type: ignore
            state = handle_record(
                tap_stream_id,
                record,
                stream_defs[tap_stream_id],
                stream_versions[tap_stream_id],
                state,
            )

        write_state(state)

        for substream_def in stream_def.substreams:  # type: ignore
            if not substream_def.is_selected:
                continue

            # All substreams are necessarily FULL_TABLE and thus have a version,
            # so write their ACTIVATE_VERSION messages without check.
            write_activate_version(
                substream_def.tap_stream_id,
                stream_versions[substream_def.tap_stream_id],
            )

        if stream_versions[stream_def.tap_stream_id] is not None:
            write_activate_version(
                stream_def.tap_stream_id,
                stream_versions[stream_def.tap_stream_id],
            )

    state = set_currently_syncing(state, None)
    write_state(state)
コード例 #4
0
def handle_record(
    tap_stream_id: str,
    record: Dict[str, Any],
    stream_def: Union["Stream", "Substream"],
    stream_version: Optional[int],
    state: Dict[str, Any],
) -> Dict[str, Any]:
    """Handles a single record's emission"""

    print_record(tap_stream_id, record, version=stream_version)

    if not is_substream(stream_def):
        state = set_currently_syncing(state, tap_stream_id)

    if not stream_def.is_valid_incremental:
        return state

    replication_key = stream_def.replication_key

    # mypy ignoring is_valid_incremental above
    bookmark_date = record.get(replication_key)  # type: ignore

    if bookmark_date is None:
        LOGGER.warning(
            'State not updated. Replication key "%s" not found in record for stream "%s": %s',
            replication_key,
            tap_stream_id,
            record,
        )

        return state

    LOGGER.debug("Adding bookmark for %s at %s", tap_stream_id, bookmark_date)

    state = write_bookmark(
        state,
        tap_stream_id,
        replication_key,
        bookmark_date,
    )

    write_state(state)

    return state
コード例 #5
0
def sync(config: Dict[str, Any], state: Dict[str, Any],
         catalog: Catalog) -> None:
    # For looking up Catalog-configured streams more efficiently
    # later Singer stores catalog entries as a list and iterates
    # over it with .get_stream()
    stream_defs: Dict[str, Union["Stream", "Substream"]] = {}
    stream_versions: Dict[str, Optional[int]] = {}

    check_dependency_conflicts(catalog)

    for stream in catalog.get_selected_streams(state):
        if is_substream(AVAILABLE_STREAMS[stream.tap_stream_id]):
            LOGGER.info(
                'Skipping substream "%s" until parent stream is reached',
                stream.tap_stream_id,
            )

            continue

        LOGGER.info("Syncing stream: %s", stream.tap_stream_id)

        filter_datetime = prepare_stream(stream.tap_stream_id, stream_defs,
                                         stream_versions, catalog, config,
                                         state)
        stream_def = stream_defs[stream.tap_stream_id]

        LOGGER.info("Querying since: %s", filter_datetime)

        for tap_stream_id, record in stream_def.sync(
                filter_datetime):  # type: ignore
            state = handle_record(
                tap_stream_id,
                record,
                stream_defs[tap_stream_id],
                stream_versions[tap_stream_id],
                state,
            )

        write_state(state)

    state = set_currently_syncing(state, None)
    write_state(state)
コード例 #6
0
def update_current_stream(state, stream_name=None):
    set_currently_syncing(state, stream_name)
    singer.write_state(state)