Esempio n. 1
0
def sync():
    shop_attributes = initialize_shopify_client()
    sdc_fields = {"_sdc_shop_" + x: shop_attributes[x] for x in SDC_KEYS}

    # Emit all schemas first so we have them for child streams
    for stream in Context.catalog["streams"]:
        if Context.is_selected(stream["tap_stream_id"]):
            singer.write_schema(stream["tap_stream_id"],
                                stream["schema"],
                                stream["key_properties"],
                                bookmark_properties=stream["replication_key"])
            Context.counts[stream["tap_stream_id"]] = 0

    # If there is a currently syncing stream bookmark, shuffle the
    # stream order so it gets sync'd first
    currently_sync_stream_name = Context.state.get(
        'bookmarks', {}).get('currently_sync_stream')
    if currently_sync_stream_name:
        shuffle_streams(currently_sync_stream_name)

    # Loop over streams in catalog
    for catalog_entry in Context.catalog['streams']:
        stream_id = catalog_entry['tap_stream_id']
        stream = Context.stream_objects[stream_id]()

        if not Context.is_selected(stream_id):
            LOGGER.info('Skipping stream: %s', stream_id)
            continue

        LOGGER.info('Syncing stream: %s', stream_id)

        if not Context.state.get('bookmarks'):
            Context.state['bookmarks'] = {}
        Context.state['bookmarks']['currently_sync_stream'] = stream_id

        # some fields have epoch-time as date, hence transform into UTC date
        with Transformer(
                singer.UNIX_SECONDS_INTEGER_DATETIME_PARSING) as transformer:
            for rec in stream.sync():
                extraction_time = singer.utils.now()
                record_schema = catalog_entry['schema']
                record_metadata = metadata.to_map(catalog_entry['metadata'])
                rec = transformer.transform({
                    **rec,
                    **sdc_fields
                }, record_schema, record_metadata)
                singer.write_record(stream_id,
                                    rec,
                                    time_extracted=extraction_time)
                Context.counts[stream_id] += 1

        Context.state['bookmarks'].pop('currently_sync_stream')
        singer.write_state(Context.state)

    LOGGER.info('----------------------')
    for stream_id, stream_count in Context.counts.items():
        LOGGER.info('%s: %d', stream_id, stream_count)
    LOGGER.info('----------------------')
Esempio n. 2
0
def sync():
    initialize_shopify_client()

    # Emit all schemas first so we have them for child streams
    for stream in Context.catalog["streams"]:
        if Context.is_selected(stream["tap_stream_id"]):
            singer.write_schema(stream["tap_stream_id"],
                                stream["schema"],
                                stream["key_properties"],
                                bookmark_properties=stream["replication_key"])
            Context.counts[stream["tap_stream_id"]] = 0

    # If there is a currently syncing stream bookmark, shuffle the
    # stream order so it gets sync'd first

    currently_sync_stream_name = Context.state.get(
        'bookmarks', {}).get('currently_sync_stream')
    if currently_sync_stream_name:
        shuffle_streams(currently_sync_stream_name)

    # Loop over streams in catalog
    for catalog_entry in Context.catalog['streams']:
        stream_id = catalog_entry['tap_stream_id']
        stream = Context.stream_objects[stream_id]()

        if not Context.is_selected(stream_id):
            LOGGER.info('Skipping stream: %s', stream_id)
            continue

        LOGGER.info('Syncing stream: %s', stream_id)

        if not Context.state.get('bookmarks'):
            Context.state['bookmarks'] = {}
        Context.state['bookmarks']['currently_sync_stream'] = stream_id

        with Transformer() as transformer:
            for rec in stream.sync():
                extraction_time = singer.utils.now()
                record_schema = catalog_entry['schema']
                record_metadata = metadata.to_map(catalog_entry['metadata'])
                rec = transformer.transform(rec, record_schema,
                                            record_metadata)
                singer.write_record(stream_id,
                                    rec,
                                    time_extracted=extraction_time)
                Context.counts[stream_id] += 1

        Context.state['bookmarks'].pop('currently_sync_stream')
        singer.write_state(Context.state)

    LOGGER.info('----------------------')
    for stream_id, stream_count in Context.counts.items():
        LOGGER.info('%s: %d', stream_id, stream_count)
    LOGGER.info('----------------------')
Esempio n. 3
0
def get_selected_parents():
    for parent_stream in ['orders', 'customers', 'products', 'custom_collections']:
        if Context.is_selected(parent_stream):
            yield Context.stream_objects[parent_stream]()
Esempio n. 4
0
def sync():
    initialize_shopify_client()

    # Emit all schemas first so we have them for child streams
    for stream in Context.catalog["streams"]:
        if Context.is_selected(stream["tap_stream_id"]):
            singer.write_schema(stream["tap_stream_id"],
                                stream["schema"],
                                stream["key_properties"],
                                bookmark_properties=stream["replication_key"])
            Context.counts[stream["tap_stream_id"]] = 0
            Context.durations[stream["tap_stream_id"]] = None

    # If there is a currently syncing stream bookmark, shuffle the
    # stream order so it gets sync'd first
    currently_sync_stream_name = Context.state.get('bookmarks', {}).get('currently_sync_stream')
    if currently_sync_stream_name:
        shuffle_streams(currently_sync_stream_name)

    # Loop over streams in catalog
    for catalog_entry in Context.catalog['streams']:
        stream_start_time = time.time()
        stream_id = catalog_entry['tap_stream_id']
        stream = Context.stream_objects[stream_id]()
        stream.schema = catalog_entry['schema']

        if not Context.is_selected(stream_id):
            LOGGER.info('Skipping stream: %s', stream_id)
            continue

        LOGGER.info('Syncing stream: %s', stream_id)

        if not Context.state.get('bookmarks'):
            Context.state['bookmarks'] = {}
        Context.state['bookmarks']['currently_sync_stream'] = stream_id

        if Context.config.get("use_async", False) and stream.async_available:
            Context.counts[stream_id] = stream.sync_async()
        else:
            with Transformer() as transformer:
                for rec in stream.sync():
                    extraction_time = singer.utils.now()
                    record_metadata = metadata.to_map(catalog_entry['metadata'])
                    rec = transformer.transform(rec, stream.schema, record_metadata)
                    singer.write_record(stream_id,
                                        rec,
                                        time_extracted=extraction_time)
                    Context.counts[stream_id] += 1

        Context.state['bookmarks'].pop('currently_sync_stream')
        singer.write_state(Context.state)
        stream_job_duration = time.strftime("%H:%M:%S", time.gmtime(time.time() - stream_start_time))
        Context.durations[stream_id] = stream_job_duration

    div = "-"*50
    info_msg = "\n{d}".format(d=div)
    info_msg += "\nShop: {}".format(Context.config['shop'])
    info_msg += "\n{d}\n".format(d=div)
    for stream_id, stream_count in Context.counts.items():
        info_msg += "\n{}: {}".format(stream_id, stream_count)
        info_msg += "\nDuration: {}".format(Context.durations[stream_id])
    info_msg += "\n{d}\n".format(d=div)
    LOGGER.info(info_msg)
Esempio n. 5
0
def sync():
    initialize_shopify_client()

    # Emit all schemas first so we have them for child streams
    for stream in Context.catalog["streams"]:
        if Context.is_selected(stream["tap_stream_id"]):
            singer.write_schema(stream["tap_stream_id"],
                                stream["schema"],
                                stream["key_properties"],
                                bookmark_properties=stream["replication_key"])
            Context.counts[stream["tap_stream_id"]] = 0

    # If there is a currently syncing stream bookmark, shuffle the
    # stream order so it gets sync'd first
    currently_sync_stream_name = Context.state.get(
        'bookmarks', {}).get('currently_sync_stream')
    if currently_sync_stream_name:
        shuffle_streams(currently_sync_stream_name)

    # Loop over streams in catalog
    for catalog_entry in Context.catalog['streams']:
        stream_id = catalog_entry['tap_stream_id']
        stream = Context.stream_objects[stream_id]()

        if not Context.is_selected(stream_id):
            LOGGER.info('Skipping stream: %s', stream_id)
            continue

        LOGGER.info('Syncing stream: %s', stream_id)

        if not Context.state.get('bookmarks'):
            Context.state['bookmarks'] = {}
        Context.state['bookmarks']['currently_sync_stream'] = stream_id

        with Transformer() as transformer:
            try:
                for rec in stream.sync():
                    extraction_time = singer.utils.now()
                    record_schema = catalog_entry['schema']
                    record_metadata = metadata.to_map(
                        catalog_entry['metadata'])
                    rec = transformer.transform(rec, record_schema,
                                                record_metadata)
                    singer.write_record(stream_id,
                                        rec,
                                        time_extracted=extraction_time)
                    Context.counts[stream_id] += 1
            except pyactiveresource.connection.ResourceNotFound as exc:
                raise ShopifyError(exc,
                                   'Ensure shop is entered correctly') from exc
            except pyactiveresource.connection.UnauthorizedAccess as exc:
                raise ShopifyError(exc, 'Invalid access token - Re-authorize the connection') \
                    from exc
            except pyactiveresource.connection.ConnectionError as exc:
                msg = ''
                try:
                    body_json = exc.response.body.decode()
                    body = json.loads(body_json)
                    msg = body.get('errors')
                finally:
                    raise ShopifyError(exc, msg) from exc
            except Exception as exc:
                raise ShopifyError(exc) from exc

        Context.state['bookmarks'].pop('currently_sync_stream')
        singer.write_state(Context.state)

    LOGGER.info('----------------------')
    for stream_id, stream_count in Context.counts.items():
        LOGGER.info('%s: %d', stream_id, stream_count)
    LOGGER.info('----------------------')