Example #1
0
    def on_event_batch(self, partition_context, events):
        for event in events:
            self.metrics.messages_consumed.inc()
            values = event.body_as_json()
            device = event.system_properties[
                b"iothub-connection-device-id"].decode("utf-8")
            for key in values.keys():
                ext_id = f"{device}_{key}"

                if not ext_id in self.time_series_ensured:
                    ensure_time_series(
                        self.cdf_client,
                        [
                            TimeSeries(
                                external_id=ext_id,
                                name=f"{device} {key}",
                                asset_id=self.asset_id,
                            )
                        ],
                    )
                    self.time_series_ensured.add(ext_id)
                    self.metrics.iouthub_timeseries_ensured.inc()

                timestamp = event.system_properties[b"iothub-enqueuedtime"]

                self.queue.add_to_upload_queue(
                    external_id=ext_id,
                    datapoints=[(timestamp, values[key])],
                )

        self.queue.upload()  # upload to CDF
        partition_context.update_checkpoint()
Example #2
0
    def test_all_in_cdf(self):
        time_series = [
            TimeSeries(external_id="a"),
            TimeSeries(external_id="b")
        ]

        ensure_time_series(self.client, time_series)

        self.client.time_series.create.assert_not_called()
Example #3
0
    def test_some_in_cdf(self):
        existing = [TimeSeries(external_id="a")]
        new = [TimeSeries(external_id="b")]

        self.client.time_series.retrieve_multiple = Mock(
            side_effect=CogniteNotFoundError([{
                "externalId": ts.external_id
            } for ts in new]))

        ensure_time_series(self.client, existing + new)

        self.client.time_series.create.assert_called_once_with(new)
Example #4
0
    def test_nothing_in_cdf(self):
        time_series = [
            TimeSeries(external_id="a"),
            TimeSeries(external_id="b")
        ]

        self.client.time_series.retrieve_multiple = Mock(
            side_effect=CogniteNotFoundError([{
                "externalId": ts.external_id
            } for ts in time_series]))

        ensure_time_series(self.client, time_series)

        self.client.time_series.create.assert_called_once_with(time_series)
Example #5
0
    cdf = config.cognite.get_cognite_client("weather-extractor")
    state_store = config.extractor.state_store.create_state_store(cdf)
    state_store.initialize()

    logger.info("Getting info about weather stations")
    weather_stations = init_stations(config.locations, frost)

    if config.extractor.create_assets:
        assets = create_assets(weather_stations, config, cdf)
    else:
        assets = None

    time_series = list_time_series(weather_stations, config, assets)

    logger.info(f"Ensuring that {len(time_series)} time series exist in CDF")
    ensure_time_series(cdf, time_series)

    # Create a stopping condition
    stop = Event()

    # Reroute ctrl-C to trigger the stopping condition instead of exiting uncleanly
    def sigint_handler(sig, frame):
        print()  # ensure newline before log
        logger.warning("Interrupt signal received, stopping")
        stop.set()
        logger.info("Waiting for threads to complete")

    signal.signal(signal.SIGINT, sigint_handler)

    if config.metrics:
        config.metrics.start_pushers(cdf)