Esempio n. 1
0
    def test_with_incremental_substream(self, _):
        """Ensure substreams don't set currently_syncing"""

        # There's no point in setting currently_syncing
        # substreams since we rely on the parent stream, so
        # we can't immediately pick-up there.

        state = handle_record(
            "foo",
            record={
                "bar": "biz",
                "modified_at": "2020-01-01"
            },
            stream_def=MagicMock(is_valid_incremental=True,
                                 replication_key="modified_at"),
            stream_version=1,
            state={"foo": "bar"},
        )

        self.assertDictEqual(
            state,
            {
                "bookmarks": {
                    "foo": {
                        "modified_at": "2020-01-01"
                    }
                },
                "foo": "bar",
            },
        )
Esempio n. 2
0
    def test_with_incremental_stream(self):
        """Ensure currently_syncing is set and bookmark add/updated"""

        state = handle_record(
            "foo",
            record={
                "bar": "biz",
                "modified_at": "2020-01-01"
            },
            stream_def=MagicMock(is_valid_incremental=True,
                                 replication_key="modified_at"),
            stream_version=1,
            state={"foo": "bar"},
        )

        self.assertDictEqual(
            state,
            {
                "currently_syncing": "foo",
                "bookmarks": {
                    "foo": {
                        "modified_at": "2020-01-01"
                    }
                },
                "foo": "bar",
            },
        )
Esempio n. 3
0
    def test_with_full_table_stream(self):
        state = handle_record(
            "foo",
            record={"bar": "biz"},
            stream_def=MagicMock(is_valid_incremental=False,
                                 replication_key=None),
            stream_version=1234,
            state={"foo": "bar"},
        )

        self.assertDictEqual(state, {"currently_syncing": "foo", "foo": "bar"})
Esempio n. 4
0
    def test_with_missing_replication_key(self):
        """Ensure the bookmarks aren't touched when the replication_key
        is missing from the record
        """

        state = handle_record(
            "foo",
            record={"bar": "biz"},
            stream_def=MagicMock(is_valid_incremental=True,
                                 replication_key="modified_at"),
            stream_version=1,
            state={"foo": "bar"},
        )

        self.assertDictEqual(
            state,
            {
                "currently_syncing": "foo",
                "foo": "bar",
            },
        )
def process_stream(
    stream_def: Union[Stream, ResponseSubstream, EndpointSubstream],
    stream_version: Optional[int],
    state: Dict[str, Any],
    json_message: Dict[str, Any],
    filter_datetime: "datetime",
) -> None:
    LOGGER.info("Message: %s", json.dumps(json_message))
    stream_id = pluralize(underscore(json_message["object"]))

    record = json_message["record"]
    # Filter based off of the message timestamp or
    # the replication key?
    if filter_record(
            record,
            DataContext(tap_stream_id=stream_id,
                        stream=stream_def,
                        filter_datetime=filter_datetime),
    ):
        return None

    state = handle_record(stream_id, record, stream_def, stream_version, state)

    # Make sure stream is selected for record to print
    if stream_def.is_selected:
        if isinstance(stream_def, Stream):
            for substream in stream_def.substreams:
                # Can't handle EndpointSubstream's like this -
                # I'm assuming the producer is pushing the data
                # in a similar way to the API?
                if not substream.is_selected:
                    continue
                if not isinstance(substream, ResponseSubstream):
                    continue

                # .sync_sub_records performs transformations, so not necessary
                # to invoke ourselves here
                for tap_substream_id, sub_record in stream_def.sync_sub_records(
                        substream, record, filter_datetime):
                    state = handle_record(tap_substream_id, sub_record,
                                          stream_def, stream_version, state)

            with stream_def.transformer_class() as transformer:
                for record in transformer.transform(
                        record,
                        stream_def.schema_dict,
                        context=DataContext(
                            stream=stream_def,
                            filter_datetime=filter_datetime,
                            tap_stream_id=stream_id,
                        ),
                        metadata=stream_def.mapped_metadata,
                ):
                    state = handle_record(stream_id, record, stream_def,
                                          stream_version, state)

        elif isinstance(stream_def, EndpointSubstream):
            # This assumes the data being consumed is akin to
            # the API. As in - /customer/<id>/notes is separated
            # into its own individual message
            context = DataContext(
                tap_stream_id=stream_def.tap_stream_id,
                stream=stream_def,
                filter_datetime=filter_datetime,
            )

            with stream_def.transformer_class() as transformer:
                records = transformer.transform(
                    record,
                    stream_def.schema_dict,
                    context=context,
                    metadata=stream_def.mapped_metadata,
                )

                for record in records:
                    state = handle_record(stream_id, record, stream_def,
                                          stream_version, state)

        write_state(state)

    return None