Beispiel #1
0
def do_sync(client, catalog, state, start_date):

    selected_stream_names = get_selected_streams(catalog)
    validate_dependencies(selected_stream_names)
    populate_class_schemas(catalog, selected_stream_names)
    all_sub_stream_names = get_sub_stream_names()
    skipped_streams = []
    for stream in catalog.streams:
        stream_name = stream.tap_stream_id
        mdata = metadata.to_map(stream.metadata)
        if stream_name not in selected_stream_names:
            skipped_streams.append(stream_name)
            continue

        # if starting_stream:
        #     if starting_stream == stream_name:
        #         LOGGER.info("%s: Resuming", stream_name)
        #         starting_stream = None
        #     else:
        #         LOGGER.info("%s: Skipping - already synced", stream_name)
        #         continue
        # else:
        #     LOGGER.info("%s: Starting", stream_name)

        key_properties = metadata.get(mdata, (), "table-key-properties")
        singer.write_schema(stream_name, stream.schema.to_dict(),
                            key_properties)

        sub_stream_names = SUB_STREAMS.get(stream_name)
        if sub_stream_names:
            for sub_stream_name in sub_stream_names:
                if sub_stream_name not in selected_stream_names:
                    continue
                sub_stream = STREAMS[sub_stream_name].stream
                sub_mdata = metadata.to_map(sub_stream.metadata)
                sub_key_properties = metadata.get(sub_mdata, (),
                                                  "table-key-properties")
                singer.write_schema(sub_stream.tap_stream_id,
                                    sub_stream.schema.to_dict(),
                                    sub_key_properties)

        # parent stream will sync sub stream
        if stream_name in all_sub_stream_names:
            continue

        LOGGER.info("%s: Starting sync", stream_name)
        instance = STREAMS[stream_name](client)
        counter_value = sync_stream(state, start_date, instance)

        # singer.write_state(state)
        LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value)
        if skipped_streams:
            LOGGER.info("The following streams were skipped: {}".format(
                ", ".join(skipped_streams)))
        zendesk_metrics.log_aggregate_rates()

    singer.write_state(state)
    LOGGER.info("Finished sync")
    zendesk_metrics.log_aggregate_rates()
def do_sync(client, catalog, state, config):
    selected_stream_names = get_selected_streams(catalog)
    validate_dependencies(selected_stream_names)
    populate_class_schemas(catalog, selected_stream_names)
    all_sub_stream_names = get_sub_stream_names()

    for stream in catalog.streams:
        stream_name = stream.tap_stream_id
        mdata = metadata.to_map(stream.metadata)
        if stream_name not in selected_stream_names:
            LOGGER.info("%s: Skipping - not selected", stream_name)
            continue

        key_properties = metadata.get(mdata, (), 'table-key-properties')
        sideload_objects = metadata.get(mdata, (), 'sideload-objects')
        if sideload_objects:
            stream_schema = get_side_load_schemas(sideload_objects, stream)
            stream.schema = Schema.from_dict(stream_schema)

        singer.write_schema(stream_name, stream.schema.to_dict(),
                            key_properties)

        sub_stream_names = SUB_STREAMS.get(stream_name)
        if sub_stream_names:
            for sub_stream_name in sub_stream_names:
                if sub_stream_name not in selected_stream_names:
                    continue
                sub_stream = STREAMS[sub_stream_name].stream
                sub_mdata = metadata.to_map(sub_stream.metadata)
                sub_key_properties = metadata.get(sub_mdata, (),
                                                  'table-key-properties')
                sideload_objects = metadata.get(mdata, (), 'sideload-objects')
                if sideload_objects:
                    sub_stream_schema = get_side_load_schemas(
                        sideload_objects, sub_stream)
                    sub_stream.schema = Schema.from_dict(sub_stream_schema)
                singer.write_schema(sub_stream.tap_stream_id,
                                    sub_stream.schema.to_dict(),
                                    sub_key_properties)

        # parent stream will sync sub stream
        if stream_name in all_sub_stream_names:
            continue

        LOGGER.info("%s: Starting sync", stream_name)
        instance = STREAMS[stream_name](client, config)
        counter_value = sync_stream(state, config.get('start_date'), instance)
        singer.write_state(state)
        LOGGER.info("%s: Completed sync (%s rows)", stream_name, counter_value)
        zendesk_metrics.log_aggregate_rates()

    singer.write_state(state)
    LOGGER.info("Finished sync")
    zendesk_metrics.log_aggregate_rates()