def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ client = self._client() logger.info("Starting syncing Dawa") yield from client.get_records(catalog, logger, state) logger.info("Finished syncing Dawa")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: try: for configured_stream in catalog.streams: if configured_stream.sync_mode == SyncMode.full_refresh: stream_name = configured_stream.stream.name reader = Reader(logger, config) table_client = reader.get_table_client(stream_name) logger.info(f"Reading data from stream '{stream_name}'") for row in reader.read(table_client, None): # Timestamp property is in metadata object # row.metadata.timestamp row["additionalProperties"] = True yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=stream_name, data=row, emitted_at=int(datetime.now().timestamp()) * 1000), ) if configured_stream.sync_mode == SyncMode.incremental: logger.warn( f"Incremental sync is not supported by stream {stream_name}" ) except Exception as err: reason = f"Failed to read data of {stream_name}: {repr(err)}\n{traceback.format_exc()}" logger.error(reason) raise err
def run_in_external_process(fn: Callable, timeout: int, max_timeout: int, logger: AirbyteLogger, args: List[Any]) -> Mapping[str, Any]: """ fn passed in must return a tuple of (desired return value, Exception OR None) This allows propagating any errors from the process up and raising accordingly """ result = None while result is None: q_worker: Queue = mp.Queue() proc = mp.Process( target=multiprocess_queuer, # use dill to pickle the function for Windows-compatibility args=(dill.dumps(fn), q_worker, *args), ) proc.start() try: # this attempts to get return value from function with our specified timeout up to max result, potential_error = q_worker.get(timeout=min(timeout, max_timeout)) except mp.queues.Empty: # type: ignore[attr-defined] if timeout >= max_timeout: # if we've got to max_timeout and tried once with that value raise TimeoutError(f"Timed out too many times while running {fn.__name__}, max timeout of {max_timeout} seconds reached.") logger.info(f"timed out while running {fn.__name__} after {timeout} seconds, retrying...") timeout *= 2 # double timeout and try again else: if potential_error is None: return result # type: ignore[no-any-return] traceback.print_exception(type(potential_error), potential_error, potential_error.__traceback__) raise potential_error finally: try: proc.terminate() except Exception as e: logger.info(f"'{fn.__name__}' proc unterminated, error: {e}")
def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None) -> Iterator[AirbyteMessage]: """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.io/architecture/airbyte-specification.""" connector_state = copy.deepcopy(state or {}) logger.info(f"Starting syncing {self.name}") # TODO assert all streams exist in the connector # get the streams once in case the connector needs to make any queries to generate them stream_instances = {s.name: s for s in self.streams(config)} for configured_stream in catalog.streams: stream_instance = stream_instances.get( configured_stream.stream.name) if not stream_instance: raise KeyError( f"The requested stream {configured_stream.stream.name} was not found in the source. Available streams: {stream_instances.keys()}" ) try: yield from self._read_stream( logger=logger, stream_instance=stream_instance, configured_stream=configured_stream, connector_state=connector_state) except Exception as e: logger.exception( f"Encountered an exception while reading stream {self.name}" ) raise e logger.info(f"Finished syncing {self.name}")
def parse_config(config: json, logger: AirbyteLogger) -> Dict[str, Any]: """ Convert dict of config values to firebolt.db.Connection arguments :param config: json-compatible dict of settings :param logger: AirbyteLogger instance to print logs. :return: dictionary of firebolt.db.Connection-compatible kwargs """ connection_args = { "database": config["database"], "auth": UsernamePassword(config["username"], config["password"]), "api_endpoint": config.get("host", DEFAULT_API_URL), "account_name": config.get("account"), } # engine can be a name or a full URL of a cluster engine = config.get("engine") if engine: if "." in engine: connection_args["engine_url"] = engine else: connection_args["engine_name"] = engine else: logger.info( "Engine parameter was not provided. Connecting to the default engine." ) return connection_args
def _read_stream( self, logger: AirbyteLogger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], ) -> Iterator[AirbyteMessage]: use_incremental = configured_stream.sync_mode == SyncMode.incremental and stream_instance.supports_incremental if use_incremental: record_iterator = self._read_incremental(logger, stream_instance, configured_stream, connector_state) else: record_iterator = self._read_full_refresh(stream_instance, configured_stream) record_counter = 0 stream_name = configured_stream.stream.name logger.info(f"Syncing stream: {stream_name} ") for record in record_iterator: if record.type == MessageType.RECORD: record_counter += 1 yield record logger.info(f"Read {record_counter} records from {stream_name} stream")
def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None) -> Iterable[AirbyteMessage]: state = state or {} client = self._get_client(config) logger.info(f"Starting syncing {self.name}") total_state = copy.deepcopy(state) for configured_stream in catalog.streams: try: yield from self._read_stream( logger=logger, client=client, configured_stream=configured_stream, state=total_state) except Exception: logger.exception( f"Encountered an exception while reading stream {self.name}" ) raise logger.info(f"Finished syncing {self.name}")
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: """ Returns an AirbyteCatalog representing the available streams and fields in this integration. For example, given valid credentials to a Postgres database, returns an Airbyte catalog where each postgres table is a stream, and each table column is a field. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :return: AirbyteCatalog is an object describing a list of all available streams in this source. A stream is an AirbyteStream object that includes: - its stream name (or table name in the case of Postgres) - json_schema providing the specifications of expected schema for this stream (a list of columns described by their names and types) """ async def get_streams(): async with await establish_async_connection(config, logger) as connection: tables = await get_firebolt_tables(connection) logger.info(f"Found {len(tables)} available tables.") return await gather( *[get_table_stream(connection, table) for table in tables]) loop = get_event_loop() streams = loop.run_until_complete(get_streams()) logger.info(f"Provided {len(streams)} streams to the Aribyte Catalog.") return AirbyteCatalog(streams=streams)
def _read_stream(self, logger: AirbyteLogger, client: BaseClient, configured_stream: ConfiguredAirbyteStream, state: MutableMapping[str, Any]): stream_name = configured_stream.stream.name use_incremental = configured_stream.sync_mode == SyncMode.incremental and client.stream_has_state( stream_name) if use_incremental and state.get(stream_name): logger.info( f"Set state of {stream_name} stream to {state.get(stream_name)}" ) client.set_stream_state(stream_name, state.get(stream_name)) logger.info(f"Syncing {stream_name} stream") for record in client.read_stream(configured_stream.stream): now = int(datetime.now().timestamp()) * 1000 message = AirbyteRecordMessage(stream=stream_name, data=record, emitted_at=now) yield AirbyteMessage(type=MessageType.RECORD, record=message) if use_incremental and client.get_stream_state(stream_name): state[stream_name] = client.get_stream_state(stream_name) # output state object only together with other stream states yield AirbyteMessage(type=MessageType.STATE, state=AirbyteStateMessage(data=state))
def discover(self, logger: AirbyteLogger, config: json) -> AirbyteCatalog: client = GoogleSheetsClient(self.get_credentials(config)) spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"]) try: logger.info(f"Running discovery on sheet {spreadsheet_id}") spreadsheet_metadata = Spreadsheet.parse_obj( client.get(spreadsheetId=spreadsheet_id, includeGridData=False)) grid_sheets = Helpers.get_grid_sheets(spreadsheet_metadata) streams = [] for sheet_name in grid_sheets: try: header_row_data = Helpers.get_first_row( client, spreadsheet_id, sheet_name) stream = Helpers.headers_to_airbyte_stream( logger, sheet_name, header_row_data) streams.append(stream) except Exception as err: if str(err).startswith( "Expected data for exactly one row for sheet"): logger.warn(f"Skip empty sheet: {sheet_name}") else: logger.error(str(err)) return AirbyteCatalog(streams=streams) except errors.HttpError as err: reason = str(err) if err.resp.status == status_codes.NOT_FOUND: reason = "Requested spreadsheet was not found." raise Exception(f"Could not run discovery: {reason}")
def check(self, logger: AirbyteLogger, config: json) -> AirbyteConnectionStatus: client = self._get_client(config) logger.info("Checking access to Amazon SP-API") try: client.check_connection() return AirbyteConnectionStatus(status=Status.SUCCEEDED) except Exception as e: return AirbyteConnectionStatus( status=Status.FAILED, message=f"An exception occurred: {str(e)}")
def read_reports( self, logger: AirbyteLogger, stream_name: str, state: MutableMapping[str, Any]) -> Generator[AirbyteMessage, None, None]: cursor_field = self._amazon_client.get_cursor_for_stream(stream_name) cursor_value = self._get_cursor_or_none( state, stream_name, cursor_field) or self.start_date if pendulum.parse(cursor_value) > pendulum.now(): yield self._state(state) return current_date = cursor_value while pendulum.parse(current_date) < pendulum.yesterday(): logger.info(f"Started pulling data from {current_date}") start_date, end_date = self._get_date_parameters(current_date) # Request for the report logger.info(f"Requested report from {start_date} to {end_date}") response = self._amazon_client.request_report( stream_name, start_date, end_date) reportId = response["reportId"] # Wait for the report status status, document_id = BaseClient._wait_for_report( logger, self._amazon_client, reportId) # Move to next month when the report is CANCELLED if status is False: current_date = self._increase_date_by_month(current_date) continue # Pull data for a report data = self._amazon_client.get_report_document(document_id) # Loop through all records and yield for row in self._get_records(data): current_cursor_value = pendulum.parse( row[cursor_field]).to_date_string() cursor_value = max( current_cursor_value, cursor_value) if cursor_value else current_cursor_value yield self._record(stream=stream_name, data=row, seller_id=self.seller_id) if cursor_value: state[stream_name][cursor_field] = pendulum.parse( cursor_value).add(days=1).to_date_string() yield self._state(state) current_date = self._increase_date_by_month(current_date)
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: client = GoogleSheetsClient(self.get_credentials(config)) sheet_to_column_name = Helpers.parse_sheet_and_column_names_from_catalog( catalog) spreadsheet_id = Helpers.get_spreadsheet_id(config["spreadsheet_id"]) logger.info(f"Starting syncing spreadsheet {spreadsheet_id}") # For each sheet in the spreadsheet, get a batch of rows, and as long as there hasn't been # a blank row, emit the row batch sheet_to_column_index_to_name = Helpers.get_available_sheets_to_column_index_to_name( client, spreadsheet_id, sheet_to_column_name) sheet_row_counts = Helpers.get_sheet_row_count(client, spreadsheet_id) logger.info(f"Row counts: {sheet_row_counts}") for sheet in sheet_to_column_index_to_name.keys(): logger.info(f"Syncing sheet {sheet}") column_index_to_name = sheet_to_column_index_to_name[sheet] row_cursor = 2 # we start syncing past the header row # For the loop, it is necessary that the initial row exists when we send a request to the API, # if the last row of the interval goes outside the sheet - this is normal, we will return # only the real data of the sheet and in the next iteration we will loop out. while row_cursor <= sheet_row_counts[sheet]: range = f"{sheet}!{row_cursor}:{row_cursor + ROW_BATCH_SIZE}" logger.info(f"Fetching range {range}") row_batch = SpreadsheetValues.parse_obj( client.get_values(spreadsheetId=spreadsheet_id, ranges=range, majorDimension="ROWS")) row_cursor += ROW_BATCH_SIZE + 1 # there should always be one range since we requested only one value_ranges = row_batch.valueRanges[0] if not value_ranges.values: break row_values = value_ranges.values if len(row_values) == 0: break for row in row_values: if not Helpers.is_row_empty( row) and Helpers.row_contains_relevant_data( row, column_index_to_name.keys()): yield AirbyteMessage( type=Type.RECORD, record=Helpers.row_data_to_record_message( sheet, row, column_index_to_name)) logger.info(f"Finished syncing spreadsheet {spreadsheet_id}")
def read_reports( self, logger: AirbyteLogger, stream_name: str, state: MutableMapping[str, Any]) -> Generator[AirbyteMessage, None, None]: cursor_field = self._amazon_client.get_cursor_for_stream(stream_name) cursor_value = self._get_cursor_or_none( state, stream_name, cursor_field) or self.start_date if cursor_value > date.today().isoformat(): state[stream_name][cursor_field] = date.today().isoformat() yield self._state(state) return current_date = cursor_value while current_date < date.today().isoformat(): logger.info(f"Started pulling data from {current_date}") start_date, end_date = self._get_date_parameters(current_date) # Request for the report logger.info(f"Requested report from {start_date} to {end_date}") response = self._amazon_client.request_report( stream_name, start_date, end_date) reportId = response["reportId"] # Wait for the report status document_id = self._wait_for_report(logger, self._amazon_client, reportId) # Pull data for a report data = self._amazon_client.get_report_document(document_id) # Loop through all records and yield for row in self._get_records(data): current_cursor_value = datetime.fromisoformat( row[cursor_field]).date().isoformat() cursor_value = max( current_cursor_value, cursor_value) if cursor_value else current_cursor_value yield self._record(stream=stream_name, data=row) if cursor_value: state[stream_name][cursor_field] = self._get_cursor_state( cursor_value, end_date) yield self._state(state) current_date = self._increase_date_by_month(current_date)
def _read_incremental( self, logger: AirbyteLogger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], internal_config: InternalConfig, ) -> Iterator[AirbyteMessage]: stream_name = configured_stream.stream.name stream_state = connector_state.get(stream_name, {}) if stream_state: logger.info( f"Setting state of {stream_name} stream to {stream_state}") checkpoint_interval = stream_instance.state_checkpoint_interval slices = stream_instance.stream_slices( cursor_field=configured_stream.cursor_field, sync_mode=SyncMode.incremental, stream_state=stream_state) total_records_counter = 0 for slice in slices: records = stream_instance.read_records( sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state, cursor_field=configured_stream.cursor_field or None, ) for record_counter, record_data in enumerate(records, start=1): yield self._as_airbyte_record(stream_name, record_data) stream_state = stream_instance.get_updated_state( stream_state, record_data) if checkpoint_interval and record_counter % checkpoint_interval == 0: yield self._checkpoint_state(stream_name, stream_state, connector_state, logger) total_records_counter += 1 # This functionality should ideally live outside of this method # but since state is managed inside this method, we keep track # of it here. if self._limit_reached(internal_config, total_records_counter): # Break from slice loop to save state and exit from _read_incremental function. break yield self._checkpoint_state(stream_name, stream_state, connector_state, logger) if self._limit_reached(internal_config, total_records_counter): return
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: client = self._client(config) logger.info("Starting syncing recurly") for configured_stream in catalog.streams: # TODO handle incremental syncs stream = configured_stream.stream if stream.name not in client.ENTITIES: logger.warn( f"Stream '{stream}' not found in the recognized entities") continue for record in self._read_record(client=client, stream=stream.name): yield AirbyteMessage(type=Type.RECORD, record=record) logger.info("Finished syncing recurly")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ logger.info("Reading data from Apify dataset") dataset_id = config["datasetId"] clean = config.get("clean", False) client = ApifyClient() dataset_client = client.dataset(dataset_id) # Get total number of items in dataset. This will be used in pagination dataset = dataset_client.get() num_items = dataset["itemCount"] with concurrent.futures.ThreadPoolExecutor() as executor: for result in executor.map( partial(self._apify_get_dataset_items, dataset_client, clean), range(0, num_items, BATCH_SIZE)): for data in result.items: yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage( stream=DATASET_ITEMS_STREAM_NAME, data=data, emitted_at=int(datetime.now().timestamp()) * 1000), )
def read( self, logger: AirbyteLogger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None ) -> Generator[AirbyteMessage, None, None]: client = self._get_client(config) logger.info("Starting syncing Amazon Seller API") for configured_stream in catalog.streams: yield from self._read_record(logger=logger, client=client, configured_stream=configured_stream, state=state) logger.info("Finished syncing Amazon Seller API")
def read_stream( self, logger: AirbyteLogger, stream_name: str, state: MutableMapping[str, Any]) -> Generator[AirbyteMessage, None, None]: cursor_field = self._amazon_client.get_cursor_for_stream(stream_name) cursor_value = self._get_cursor_or_none( state, stream_name, cursor_field) or self.start_date if pendulum.parse(cursor_value) > pendulum.now(): yield self._state(state) return current_date = self._apply_conversion_window(cursor_value) logger.info(f"Started pulling data from {current_date}") HAS_NEXT = True NEXT_TOKEN = None PAGE = 1 while HAS_NEXT: logger.info(f"Pulling for page: {PAGE}") response = self._amazon_client.fetch_orders( current_date, self._amazon_client.PAGECOUNT, NEXT_TOKEN) orders = response["Orders"] if "NextToken" in response: NEXT_TOKEN = response["NextToken"] HAS_NEXT = True if NEXT_TOKEN else False PAGE = PAGE + 1 for order in orders: current_date = pendulum.parse( order[cursor_field]).to_date_string() cursor_value = max( current_date, cursor_value) if cursor_value else current_date yield self._record(stream=stream_name, data=order, seller_id=self.seller_id) if cursor_value: state[stream_name][cursor_field] = pendulum.parse( cursor_value).add(days=1) yield self._state(state) # Sleep for 2 seconds time.sleep(2)
def _read_stream( self, logger: AirbyteLogger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], internal_config: InternalConfig, ) -> Iterator[AirbyteMessage]: if internal_config.page_size and isinstance(stream_instance, HttpStream): logger.info(f"Setting page size for {stream_instance.name} to {internal_config.page_size}") stream_instance.page_size = internal_config.page_size use_incremental = configured_stream.sync_mode == SyncMode.incremental and stream_instance.supports_incremental if use_incremental: record_iterator = self._read_incremental(logger, stream_instance, configured_stream, connector_state, internal_config) else: record_iterator = self._read_full_refresh(stream_instance, configured_stream, internal_config) record_counter = 0 stream_name = configured_stream.stream.name logger.info(f"Syncing stream: {stream_name} ") for record in record_iterator: if record.type == MessageType.RECORD: record_counter += 1 yield record logger.info(f"Read {record_counter} records from {stream_name} stream")
def _read_incremental( self, logger: AirbyteLogger, stream_instance: Stream, configured_stream: ConfiguredAirbyteStream, connector_state: MutableMapping[str, Any], ) -> Iterator[AirbyteMessage]: stream_name = configured_stream.stream.name stream_state = connector_state.get(stream_name, {}) if stream_state: logger.info( f"Setting state of {stream_name} stream to {stream_state.get(stream_name)}" ) checkpoint_interval = stream_instance.state_checkpoint_interval slices = stream_instance.stream_slices( cursor_field=configured_stream.cursor_field, sync_mode=SyncMode.incremental, stream_state=stream_state) for slice in slices: record_counter = 0 records = stream_instance.read_records( sync_mode=SyncMode.incremental, stream_slice=slice, stream_state=stream_state, cursor_field=configured_stream.cursor_field or None, ) for record_data in records: record_counter += 1 yield self._as_airbyte_record(stream_name, record_data) stream_state = stream_instance.get_updated_state( stream_state, record_data) if checkpoint_interval and record_counter % checkpoint_interval == 0: yield self._checkpoint_state(stream_name, stream_state, connector_state, logger) yield self._checkpoint_state(stream_name, stream_state, connector_state, logger)
def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any], ) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ logger.info( f"Reading data from {len(catalog.streams)} Firebolt tables.") with establish_connection(config, logger) as connection: with connection.cursor() as cursor: for c_stream in catalog.streams: table_name = c_stream.stream.name table_properties = c_stream.stream.json_schema[ "properties"] columns = list(table_properties.keys()) # Escape columns with " to avoid reserved keywords e.g. id escaped_columns = ['"{}"'.format(col) for col in columns] query = "SELECT {columns} FROM {table}".format( columns=",".join(escaped_columns), table=table_name) cursor.execute(query) logger.info( f"Fetched {cursor.rowcount} rows from table {table_name}." ) for result in cursor.fetchall(): message = airbyte_message_from_data( result, columns, table_name) if message: yield message logger.info("Data read complete.")
def read(self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]) -> Generator[AirbyteMessage, None, None]: client = self._get_client(config) logger.info(f"Starting syncing {self.__class__.__name__}") for configured_stream in catalog.streams: stream = configured_stream.stream if stream.name not in client.ENTITY_MAP.keys(): continue logger.info(f"Syncing {stream.name} stream") for record in self._read_record(client=client, stream=stream.name): yield AirbyteMessage(type=Type.RECORD, record=record) logger.info(f"Finished syncing {self.__class__.__name__}")
def _write_config(self, token): logger = AirbyteLogger() logger.info("Credentials Refreshed")