def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ for stream in catalog.streams: name = stream.stream.name key = stream.stream.name logger.debug(f'****** mode {stream.sync_mode} state={state}') if key == 'SiteMetaData': url = sitemetadata_url(config) elif key == 'WellScreens': url = screens_url(config) elif key == 'ManualGWL': url = manual_water_levels_url(config) elif key == 'PressureGWL': url = pressure_water_levels_url(config) elif key == 'AcousticGWL': url = acoustic_water_levels_url(config) else: continue while 1: objectid = state[key] if objectid: curl = f'{url}?objectid={objectid}' else: curl = url logger.info(f'fetching url={curl}') jobj = get_json(logger, curl) if jobj: state[key] = jobj[-1]['OBJECTID'] else: break for di in jobj: di['import_uuid'] = str(uuid.uuid4()) yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage(stream=name, data=di, emitted_at=int(datetime.now().timestamp()) * 1000))
def read( self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any] ) -> Generator[AirbyteMessage, None, None]: """ Returns a generator of the AirbyteMessages generated by reading the source with the given configuration, catalog, and state. :param logger: Logging object to display debug/info/error to the logs (logs will not be accessible via airbyte UI if they are not passed to this logger) :param config: Json object containing the configuration of this source, content of this json is as specified in the properties of the spec.json file :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog returned by discover(), but in addition, it's been configured in the UI! For each particular stream and field, there may have been provided with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume replication in the future from that saved checkpoint. This is the object that is provided with state from previous runs and avoid replicating the entire set of data everytime. :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object. """ stream_name = StreamGetSiteMetaData # Example req_url = get_request_url(stream_name,config) # iterate configured streams and fetch their data for stream in catalog.streams: #logger.debug(f"configured catalog stream: {stream}") stream_name = stream.stream.name is_incremental = stream.sync_mode == SyncMode.incremental # and key in state logger.info(f"incremental state for stream {stream_name}: {is_incremental}: stream.sync_mode = '{stream.sync_mode}', SyncMode.incremental = '{SyncMode.incremental}'") req_url = get_request_url(stream_name,config) if stream_name == StreamGetSiteMetaData: data = get_site_metadata(req_url,logger,state,config,stream_name,is_incremental) elif stream_name == StreamGetSensorMetaData: data = get_sensor_metadata(req_url,logger,state,config,stream_name,is_incremental) elif stream_name == StreamGetSensorData: data = get_sensor_data(logger,state,config,stream_name,is_incremental) else: raise NotImplementedError(f"read(): don't handle stream {key} found in catalog") result_count=0 for d in data: result_count=result_count+1 yield AirbyteMessage( type=Type.RECORD, record=AirbyteRecordMessage(stream=stream_name, data=d, emitted_at=int(datetime.now().timestamp()) * 1000), ) if result_count < 1: logger.debug(f'no new data for {stream_name}: state={state.get(stream_name)}') # RETRIEVE SENSOR METADATA AND RETURN AS STREAM stream_name = StreamGetSensorMetaData req_url = get_request_url(stream_name,config) # RETRIEVE SENSOR DATA AND RETURN AS STREAM stream_name = StreamGetSensorData req_url = get_request_url(stream_name,config)