def test_incremental_sync(config, configured_catalog): today = pendulum.now().date() start_date = today.subtract(months=1) config["start_date"] = start_date.to_date_string() google_ads_client = SourceGoogleAds() records = list(google_ads_client.read(AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(configured_catalog))) latest_state = None for record in records[::-1]: if record and record.type == Type.STATE: latest_state = record.state.data["ad_group_ad_report"][config["customer_id"]]["segments.date"] break for message in records: if not message or message.type != Type.RECORD: continue cursor_value = message.record.data["segments.date"] assert cursor_value <= latest_state assert cursor_value >= start_date.subtract(days=GAP_DAYS).to_date_string() # next sync records = list( google_ads_client.read( AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(configured_catalog), {"ad_group_ad_report": {config["customer_id"]: {"segments.date": latest_state}}}, ) ) for record in records: if record.type == Type.RECORD: assert record.record.data["segments.date"] >= pendulum.parse(latest_state).subtract(days=GAP_DAYS).to_date_string() if record.type == Type.STATE: assert record.state.data["ad_group_ad_report"][config["customer_id"]]["segments.date"] >= latest_state
def test_incremental_sync(config): google_ads_client = SourceGoogleAds() state = "2021-05-24" records = google_ads_client.read( AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(SAMPLE_CATALOG), {"ad_group_ad_report": { "segments.date": state }}) current_state = pendulum.parse(state).subtract(days=14).to_date_string() for record in records: if record and record.type == Type.STATE: print(record) current_state = record.state.data["ad_group_ad_report"][ "segments.date"] if record and record.type == Type.RECORD: assert record.record.data["segments.date"] >= current_state # Next sync state = "2021-06-04" records = google_ads_client.read( AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(SAMPLE_CATALOG), {"ad_group_ad_report": { "segments.date": state }}) current_state = pendulum.parse(state).subtract(days=14).to_date_string() for record in records: if record and record.type == Type.STATE: current_state = record.state.data["ad_group_ad_report"][ "segments.date"] if record and record.type == Type.RECORD: assert record.record.data["segments.date"] >= current_state # Abnormal state state = "2029-06-04" records = google_ads_client.read( AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(SAMPLE_CATALOG), {"ad_group_ad_report": { "segments.date": state }}) current_state = pendulum.parse(state).subtract(days=14).to_date_string() no_records = True for record in records: if record and record.type == Type.STATE: assert record.state.data["ad_group_ad_report"][ "segments.date"] == state if record and record.type == Type.RECORD: no_records = False assert no_records
def read( self, logger: AirbyteLogger, config_container: ConfigContainer, catalog_path: str, state_path: str = None ) -> Generator[AirbyteMessage, None, None]: """ Implements the parent class read method. """ catalogs = self._discover_internal(logger, config_container.config_path) masked_airbyte_catalog = ConfiguredAirbyteCatalog.parse_obj(self.read_config(catalog_path)) selected_singer_catalog_path = SingerHelper.create_singer_catalog_with_selection(masked_airbyte_catalog, catalogs.singer_catalog) read_cmd = self.read_cmd(logger, config_container.config_path, selected_singer_catalog_path, state_path) return SingerHelper.read(logger, read_cmd)
def configured_catalog_fixture(): configured_catalog = { "streams": [{ "stream": { "name": "quotes", "json_schema": {}, "supported_sync_modes": ["full_refresh", "incremental"], "source_defined_cursor": True, "default_cursor_field": ["updatedAt"], }, "sync_mode": "incremental", "cursor_field": ["updatedAt"], "destination_sync_mode": "append", }] } return ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
def test_read_catalog(source): configured_catalog = { "streams": [ { "stream": {"name": "mystream", "json_schema": {"type": "object", "properties": {"k": "v"}}}, "destination_sync_mode": "overwrite", "sync_mode": "full_refresh", } ] } expected = ConfiguredAirbyteCatalog.parse_obj(configured_catalog) with tempfile.NamedTemporaryFile("w") as catalog_file: catalog_file.write(expected.json(exclude_unset=True)) catalog_file.flush() actual = source.read_catalog(catalog_file.name) assert actual == expected
def catalog(): configured_catalog = { "streams": [ { "stream": {"name": "mock_http_stream", "json_schema": {}}, "destination_sync_mode": "overwrite", "sync_mode": "full_refresh", }, { "stream": {"name": "mock_stream", "json_schema": {}}, "destination_sync_mode": "overwrite", "sync_mode": "full_refresh", }, ] } return ConfiguredAirbyteCatalog.parse_obj(configured_catalog)
def test_abnormally_large_state(config, configured_catalog): google_ads_client = SourceGoogleAds() records = google_ads_client.read( AirbyteLogger(), config, ConfiguredAirbyteCatalog.parse_obj(configured_catalog), {"ad_group_ad_report": {"segments.date": "2222-06-04"}}, ) no_data_records = True state_records = False for record in records: if record and record.type == Type.STATE: state_records = True if record and record.type == Type.RECORD: no_data_records = False assert no_data_records assert state_records
def configured_catalog(): return ConfiguredAirbyteCatalog.parse_obj( json.loads(read_file("./configured_catalog.json")))
def configured_catalog(): with open("unit_tests/configured_catalog.json") as f: data = json.loads(f.read()) return ConfiguredAirbyteCatalog.parse_obj(data)
def read_catalog(self, catalog_path: str) -> ConfiguredAirbyteCatalog: return ConfiguredAirbyteCatalog.parse_obj( self.read_config(catalog_path))