Beispiel #1
0
    def read(
            self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        """
        Returns a generator of the AirbyteMessages generated by reading the source with the given configuration,
        catalog, and state.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config: Json object containing the configuration of this source, content of this json is as specified in
            the properties of the spec.json file
        :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog
            returned by discover(), but
        in addition, it's been configured in the UI! For each particular stream and field, there may have been provided
        with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc
        :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume
            replication in the future from that saved checkpoint.
            This is the object that is provided with state from previous runs and avoid replicating the entire set of
            data everytime.

        :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object.
        """

        for stream in catalog.streams:
            name = stream.stream.name
            key = stream.stream.name
            logger.debug(f'****** mode {stream.sync_mode} state={state}')
            if key == 'SiteMetaData':
                url = sitemetadata_url(config)
            elif key == 'WellScreens':
                url = screens_url(config)
            elif key == 'ManualGWL':
                url = manual_water_levels_url(config)
            elif key == 'PressureGWL':
                url = pressure_water_levels_url(config)
            elif key == 'AcousticGWL':
                url = acoustic_water_levels_url(config)
            else:
                continue

            while 1:
                objectid = state[key]
                if objectid:
                    curl = f'{url}?objectid={objectid}'
                else:
                    curl = url

                logger.info(f'fetching url={curl}')
                jobj = get_json(logger, curl)
                if jobj:
                    state[key] = jobj[-1]['OBJECTID']
                else:
                    break

                for di in jobj:
                    di['import_uuid'] = str(uuid.uuid4())
                    yield AirbyteMessage(
                        type=Type.RECORD,
                        record=AirbyteRecordMessage(stream=name, data=di,
                                                    emitted_at=int(datetime.now().timestamp()) * 1000))
Beispiel #2
0
    def read(
        self, logger: AirbyteLogger, config: json, catalog: ConfiguredAirbyteCatalog, state: Dict[str, any]
    ) -> Generator[AirbyteMessage, None, None]:
        """
        Returns a generator of the AirbyteMessages generated by reading the source with the given configuration,
        catalog, and state.

        :param logger: Logging object to display debug/info/error to the logs
            (logs will not be accessible via airbyte UI if they are not passed to this logger)
        :param config: Json object containing the configuration of this source, content of this json is as specified in
            the properties of the spec.json file
        :param catalog: The input catalog is a ConfiguredAirbyteCatalog which is almost the same as AirbyteCatalog
            returned by discover(), but
        in addition, it's been configured in the UI! For each particular stream and field, there may have been provided
        with extra modifications such as: filtering streams and/or columns out, renaming some entities, etc
        :param state: When a Airbyte reads data from a source, it might need to keep a checkpoint cursor to resume
            replication in the future from that saved checkpoint.
            This is the object that is provided with state from previous runs and avoid replicating the entire set of
            data everytime.

        :return: A generator that produces a stream of AirbyteRecordMessage contained in AirbyteMessage object.
        """
        stream_name = StreamGetSiteMetaData  # Example
         

        req_url = get_request_url(stream_name,config)


        # iterate configured streams and fetch their data
        for stream in catalog.streams:
            #logger.debug(f"configured catalog stream: {stream}")
            stream_name = stream.stream.name
            is_incremental = stream.sync_mode == SyncMode.incremental # and key in state

            logger.info(f"incremental state for stream {stream_name}: {is_incremental}: stream.sync_mode = '{stream.sync_mode}', SyncMode.incremental = '{SyncMode.incremental}'")
            req_url = get_request_url(stream_name,config)
            if stream_name == StreamGetSiteMetaData:
                data = get_site_metadata(req_url,logger,state,config,stream_name,is_incremental) 
            elif stream_name == StreamGetSensorMetaData:
                data = get_sensor_metadata(req_url,logger,state,config,stream_name,is_incremental)
            elif stream_name == StreamGetSensorData:
                data = get_sensor_data(logger,state,config,stream_name,is_incremental)
            else:
                raise NotImplementedError(f"read(): don't handle stream {key} found in catalog")

            result_count=0
            for d in data:
                result_count=result_count+1
                yield AirbyteMessage(
                    type=Type.RECORD,
                    record=AirbyteRecordMessage(stream=stream_name, data=d, emitted_at=int(datetime.now().timestamp()) * 1000),
                )

            if result_count < 1:
                logger.debug(f'no new data for {stream_name}: state={state.get(stream_name)}')



        # RETRIEVE SENSOR METADATA AND RETURN AS STREAM
        stream_name = StreamGetSensorMetaData

        req_url = get_request_url(stream_name,config)

        # RETRIEVE SENSOR DATA AND RETURN AS STREAM
        stream_name = StreamGetSensorData

        req_url = get_request_url(stream_name,config)