Beispiel #1
0
    def sync(self):
        bookmark = bookmarks.get_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            MIN_TIME,
        )

        start_dt = utils.strptime_to_utc(bookmark)
        resp = self.client.request_feed(self.stream_name)

        max_dt = start_dt
        for station in resp.get("data").get("stations"):
            last_reported = station.get("last_reported")
            last_reported_dt = datetime.fromtimestamp(last_reported).replace(
                tzinfo=pytz.UTC)
            if last_reported_dt > max_dt:
                max_dt = last_reported_dt
            if last_reported_dt > start_dt:
                yield station

        bookmarks.write_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            utils.strftime(max_dt),
        )
        messages.write_state(self.state)
Beispiel #2
0
def sync_records(config, state, stream):
    write_schema(
        stream.tap_stream_id,
        stream.schema.to_dict(),
        stream.key_properties,
    )

    client = Client(config['subdomain'], config['api_key'])
    model_name = STREAM_MODEL_MAP[stream.tap_stream_id]
    model = client.model(model_name)
    domain = get_sync_domain(state, stream, model_name)
    sort_order = [
        ('write_date', 'asc'),
        ('create_date', 'asc'),
        ('id', 'asc'),
    ]

    # Get all fields defined in schema now
    fields = list(stream.schema.properties.keys())

    # Add create and write date to keep track of state
    fields.extend(['id', 'create_date', 'write_date'])
    for record in model.search_read_all(domain, sort_order, fields):
        transform(record)
        write_record(stream.tap_stream_id, record, time_extracted=utils.now())
        state = write_bookmark(state, stream.tap_stream_id, 'last_updated_at',
                               record['write_date'] or record['create_date'])
        state = write_bookmark(state, stream.tap_stream_id, 'last_record_id',
                               record['id'])
        write_state(state)
Beispiel #3
0
    def sync(self):
        bookmark = bookmarks.get_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            MIN_TIME,
        )

        start_dt = utils.strptime_to_utc(bookmark)
        resp = self.client.request_feed(self.stream_name)
        last_updated = resp.get("last_updated").replace(tzinfo=pytz.UTC)

        if start_dt >= last_updated:
            return

        for station in resp.get("data").get("stations"):
            # Delete array/complex properties because we're not powerful enough yet
            if "rental_methods" in station:
                del station["rental_methods"]
            if "rental_uris" in station:
                del station["rental_uris"]
            yield station

        bookmarks.write_bookmark(
            self.state,
            self.stream_name,
            self.replication_keys[0],
            utils.strftime(last_updated),
        )
        messages.write_state(self.state)
Beispiel #4
0
def sync(config: Dict[str, Any], state: Dict[str, Any],
         catalog: Catalog) -> None:
    # For looking up Catalog-configured streams more efficiently
    # later Singer stores catalog entries as a list and iterates
    # over it with .get_stream()
    stream_defs: Dict[str, Union["Stream", "Substream"]] = {}
    stream_versions: Dict[str, Optional[int]] = {}

    check_dependency_conflicts(catalog)

    for stream in catalog.get_selected_streams(state):
        if is_substream(AVAILABLE_STREAMS[stream.tap_stream_id]):
            LOGGER.info(
                'Skipping substream "%s" until parent stream is reached',
                stream.tap_stream_id,
            )

            continue

        LOGGER.info("Syncing stream: %s", stream.tap_stream_id)

        filter_datetime = prepare_stream(stream.tap_stream_id, stream_defs,
                                         stream_versions, catalog, config,
                                         state)
        stream_def = stream_defs[stream.tap_stream_id]

        LOGGER.info("Querying since: %s", filter_datetime)

        for tap_stream_id, record in stream_def.sync(
                filter_datetime):  # type: ignore
            state = handle_record(
                tap_stream_id,
                record,
                stream_defs[tap_stream_id],
                stream_versions[tap_stream_id],
                state,
            )

        write_state(state)

        for substream_def in stream_def.substreams:  # type: ignore
            if not substream_def.is_selected:
                continue

            # All substreams are necessarily FULL_TABLE and thus have a version,
            # so write their ACTIVATE_VERSION messages without check.
            write_activate_version(
                substream_def.tap_stream_id,
                stream_versions[substream_def.tap_stream_id],
            )

        if stream_versions[stream_def.tap_stream_id] is not None:
            write_activate_version(
                stream_def.tap_stream_id,
                stream_versions[stream_def.tap_stream_id],
            )

    state = set_currently_syncing(state, None)
    write_state(state)
Beispiel #5
0
def handle_record(
    tap_stream_id: str,
    record: Dict[str, Any],
    stream_def: Union["Stream", "Substream"],
    stream_version: Optional[int],
    state: Dict[str, Any],
) -> Dict[str, Any]:
    """Handles a single record's emission"""

    print_record(tap_stream_id, record, version=stream_version)

    if not is_substream(stream_def):
        state = set_currently_syncing(state, tap_stream_id)

    if not stream_def.is_valid_incremental:
        return state

    replication_key = stream_def.replication_key

    # mypy ignoring is_valid_incremental above
    bookmark_date = record.get(replication_key)  # type: ignore

    if bookmark_date is None:
        LOGGER.warning(
            'State not updated. Replication key "%s" not found in record for stream "%s": %s',
            replication_key,
            tap_stream_id,
            record,
        )

        return state

    LOGGER.debug("Adding bookmark for %s at %s", tap_stream_id, bookmark_date)

    state = write_bookmark(
        state,
        tap_stream_id,
        replication_key,
        bookmark_date,
    )

    write_state(state)

    return state
Beispiel #6
0
def sync(config: Dict[str, Any], state: Dict[str, Any],
         catalog: Catalog) -> None:
    # For looking up Catalog-configured streams more efficiently
    # later Singer stores catalog entries as a list and iterates
    # over it with .get_stream()
    stream_defs: Dict[str, Union["Stream", "Substream"]] = {}
    stream_versions: Dict[str, Optional[int]] = {}

    check_dependency_conflicts(catalog)

    for stream in catalog.get_selected_streams(state):
        if is_substream(AVAILABLE_STREAMS[stream.tap_stream_id]):
            LOGGER.info(
                'Skipping substream "%s" until parent stream is reached',
                stream.tap_stream_id,
            )

            continue

        LOGGER.info("Syncing stream: %s", stream.tap_stream_id)

        filter_datetime = prepare_stream(stream.tap_stream_id, stream_defs,
                                         stream_versions, catalog, config,
                                         state)
        stream_def = stream_defs[stream.tap_stream_id]

        LOGGER.info("Querying since: %s", filter_datetime)

        for tap_stream_id, record in stream_def.sync(
                filter_datetime):  # type: ignore
            state = handle_record(
                tap_stream_id,
                record,
                stream_defs[tap_stream_id],
                stream_versions[tap_stream_id],
                state,
            )

        write_state(state)

    state = set_currently_syncing(state, None)
    write_state(state)
Beispiel #7
0
def prepare_stream(
    tap_stream_id: str,
    stream_defs: _STREAM_DEFS,
    stream_versions: _STREAM_VERSIONS,
    catalog: Catalog,
    config: Dict[str, Any],
    state: Dict[str, Any],
) -> datetime:
    """Prepares a stream and any of its substreams by instantiating them and
    handling their preliminary Singer messages
    """

    # mypy isn't properly considering is_substream
    stream_def: "Stream" = AVAILABLE_STREAMS[tap_stream_id](
        catalog, config, filter_record)  # type: ignore
    stream_defs[stream_def.tap_stream_id] = stream_def

    if stream_def.has_substreams:
        stream_def.instantiate_substreams(catalog, filter_record)

        for substream_def in stream_def.substreams:
            if not substream_def.is_selected:
                LOGGER.info('Skipping sub-stream "%s"',
                            substream_def.tap_stream_id)

                continue

            # ignored type errors below seem to be caused by same issue as
            # https://github.com/python/mypy/issues/8993
            stream_defs[substream_def.tap_stream_id] = substream_def
            substream_version = get_full_table_version()
            stream_versions[substream_def.tap_stream_id] = substream_version

            write_schema(
                stream_name=substream_def.tap_stream_id,
                schema=substream_def.schema_dict,
                key_properties=substream_def.key_properties,
            )

            # All substreams are necessarily FULL_TABLE, so no need to
            # check if they're INCREMENTAL
            if is_first_run(substream_def.tap_stream_id, state):
                write_activate_version(
                    substream_def.tap_stream_id,
                    substream_version,
                )

                write_bookmark(state, substream_def.tap_stream_id,
                               "wrote_initial_activate_version", True)
                write_state(state)

    write_schema(
        stream_name=stream_def.tap_stream_id,
        schema=stream_def.schema_dict,
        key_properties=stream_def.key_properties,
    )

    filter_datetime = get_filter_datetime(stream_def, config["start_date"],
                                          state)
    stream_version = (None if stream_def.is_valid_incremental else
                      get_full_table_version())
    stream_versions[stream_def.tap_stream_id] = stream_version

    if not stream_def.is_valid_incremental and is_first_run(
            stream_def.tap_stream_id, state):
        write_activate_version(
            stream_def.tap_stream_id,
            stream_version,
        )

        write_bookmark(state, stream_def.tap_stream_id,
                       "wrote_initial_activate_version", True)
        write_state(state)

    return filter_datetime