Ejemplo n.º 1
0
def test_incremental_sync(config, configured_catalog):
    today = pendulum.now().date()
    start_date = today.subtract(months=1)
    config["start_date"] = start_date.to_date_string()

    google_ads_client = SourceGoogleAds()
    records = list(google_ads_client.read(AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(configured_catalog)))
    latest_state = None
    for record in records[::-1]:
        if record and record.type == Type.STATE:
            latest_state = record.state.data["ad_group_ad_report"][config["customer_id"]]["segments.date"]
            break

    for message in records:
        if not message or message.type != Type.RECORD:
            continue
        cursor_value = message.record.data["segments.date"]
        assert cursor_value <= latest_state
        assert cursor_value >= start_date.subtract(days=GAP_DAYS).to_date_string()

    #  next sync
    records = list(
        google_ads_client.read(
            AirbyteLogger(),
            config,
            ConfiguredAirbyteCatalog.parse_obj(configured_catalog),
            {"ad_group_ad_report": {config["customer_id"]: {"segments.date": latest_state}}},
        )
    )

    for record in records:
        if record.type == Type.RECORD:
            assert record.record.data["segments.date"] >= pendulum.parse(latest_state).subtract(days=GAP_DAYS).to_date_string()
        if record.type == Type.STATE:
            assert record.state.data["ad_group_ad_report"][config["customer_id"]]["segments.date"] >= latest_state
Ejemplo n.º 2
0
def test_incremental_sync(config):
    google_ads_client = SourceGoogleAds()
    state = "2021-05-24"
    records = google_ads_client.read(
        AirbyteLogger(), config,
        ConfiguredAirbyteCatalog.parse_obj(SAMPLE_CATALOG),
        {"ad_group_ad_report": {
            "segments.date": state
        }})
    current_state = pendulum.parse(state).subtract(days=14).to_date_string()

    for record in records:
        if record and record.type == Type.STATE:
            print(record)
            current_state = record.state.data["ad_group_ad_report"][
                "segments.date"]
        if record and record.type == Type.RECORD:
            assert record.record.data["segments.date"] >= current_state

    # Next sync
    state = "2021-06-04"
    records = google_ads_client.read(
        AirbyteLogger(), config,
        ConfiguredAirbyteCatalog.parse_obj(SAMPLE_CATALOG),
        {"ad_group_ad_report": {
            "segments.date": state
        }})
    current_state = pendulum.parse(state).subtract(days=14).to_date_string()

    for record in records:
        if record and record.type == Type.STATE:
            current_state = record.state.data["ad_group_ad_report"][
                "segments.date"]
        if record and record.type == Type.RECORD:
            assert record.record.data["segments.date"] >= current_state

    # Abnormal state
    state = "2029-06-04"
    records = google_ads_client.read(
        AirbyteLogger(), config,
        ConfiguredAirbyteCatalog.parse_obj(SAMPLE_CATALOG),
        {"ad_group_ad_report": {
            "segments.date": state
        }})
    current_state = pendulum.parse(state).subtract(days=14).to_date_string()

    no_records = True
    for record in records:
        if record and record.type == Type.STATE:
            assert record.state.data["ad_group_ad_report"][
                "segments.date"] == state
        if record and record.type == Type.RECORD:
            no_records = False

    assert no_records
Ejemplo n.º 3
0
    def read(
        self, logger: AirbyteLogger, config_container: ConfigContainer, catalog_path: str, state_path: str = None
    ) -> Generator[AirbyteMessage, None, None]:
        """
        Implements the parent class read method.
        """
        catalogs = self._discover_internal(logger, config_container.config_path)
        masked_airbyte_catalog = ConfiguredAirbyteCatalog.parse_obj(self.read_config(catalog_path))
        selected_singer_catalog_path = SingerHelper.create_singer_catalog_with_selection(masked_airbyte_catalog, catalogs.singer_catalog)

        read_cmd = self.read_cmd(logger, config_container.config_path, selected_singer_catalog_path, state_path)
        return SingerHelper.read(logger, read_cmd)
Ejemplo n.º 4
0
def configured_catalog_fixture():
    configured_catalog = {
        "streams": [{
            "stream": {
                "name": "quotes",
                "json_schema": {},
                "supported_sync_modes": ["full_refresh", "incremental"],
                "source_defined_cursor": True,
                "default_cursor_field": ["updatedAt"],
            },
            "sync_mode": "incremental",
            "cursor_field": ["updatedAt"],
            "destination_sync_mode": "append",
        }]
    }
    return ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
Ejemplo n.º 5
0
def test_read_catalog(source):
    configured_catalog = {
        "streams": [
            {
                "stream": {"name": "mystream", "json_schema": {"type": "object", "properties": {"k": "v"}}},
                "destination_sync_mode": "overwrite",
                "sync_mode": "full_refresh",
            }
        ]
    }
    expected = ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
    with tempfile.NamedTemporaryFile("w") as catalog_file:
        catalog_file.write(expected.json(exclude_unset=True))
        catalog_file.flush()
        actual = source.read_catalog(catalog_file.name)
        assert actual == expected
Ejemplo n.º 6
0
def catalog():
    configured_catalog = {
        "streams": [
            {
                "stream": {"name": "mock_http_stream", "json_schema": {}},
                "destination_sync_mode": "overwrite",
                "sync_mode": "full_refresh",
            },
            {
                "stream": {"name": "mock_stream", "json_schema": {}},
                "destination_sync_mode": "overwrite",
                "sync_mode": "full_refresh",
            },
        ]
    }
    return ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
Ejemplo n.º 7
0
def test_abnormally_large_state(config, configured_catalog):
    google_ads_client = SourceGoogleAds()
    records = google_ads_client.read(
        AirbyteLogger(),
        config,
        ConfiguredAirbyteCatalog.parse_obj(configured_catalog),
        {"ad_group_ad_report": {"segments.date": "2222-06-04"}},
    )

    no_data_records = True
    state_records = False
    for record in records:
        if record and record.type == Type.STATE:
            state_records = True
        if record and record.type == Type.RECORD:
            no_data_records = False

    assert no_data_records
    assert state_records
Ejemplo n.º 8
0
def configured_catalog():
    return ConfiguredAirbyteCatalog.parse_obj(
        json.loads(read_file("./configured_catalog.json")))
Ejemplo n.º 9
0
def configured_catalog():
    with open("unit_tests/configured_catalog.json") as f:
        data = json.loads(f.read())
    return ConfiguredAirbyteCatalog.parse_obj(data)
Ejemplo n.º 10
0
 def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog:
     return ConfiguredAirbyteCatalog.parse_obj(
         self.read_config(catalog_path))