def test_row_metadata_dictionary(): metadata = get_source("row").get_metadata( SeriesSelector("row", "test-tag-6")) assert metadata.series == SeriesSelector("row", "test-tag-6") assert metadata.data_type == DataType.DICTIONARY assert metadata.dictionary_name == "Active" assert isinstance(metadata.dictionary, Dictionary)
def search( self, selector: SeriesSelector ) -> Generator[Union[Metadata, SeriesSelector], None, None]: """Search time series using the Flight service.""" query = SeriesSelector(self.__source_name, selector.name) for result in self.__client.search(query): if isinstance(result, SeriesSelector): yield SeriesSelector(selector.source, result.name) else: result.series = SeriesSelector(selector.source, result.series.name) yield result
def __search_metadata( self, selector: SeriesSelector) -> Generator[Metadata, None, None]: connection = self.connect() cursor = connection.cursor() dictionary_cursor = None if self._config.dictionary_query is not None: dictionary_cursor = self.connect().cursor() cursor.execute(self._config.list_query) series_name_index = None for i, name in enumerate(self._config.list_columns): if name == "series name": series_name_index = i if series_name_index is None: raise InvalidMetadataError('column "series name" not found') for row in cursor: selector = SeriesSelector(selector.source, row[series_name_index]) metadata = Metadata(selector) for i, name in enumerate(self._config.list_columns): if i == series_name_index: continue if row[i] is None: continue metadata.set_field( name, self._metadata_value_mapper.from_source(name, row[i])) if metadata.dictionary_name is not None and dictionary_cursor is not None: metadata.dictionary = self.__query_dictionary( dictionary_cursor, metadata.dictionary_name) yield metadata
def search( self, selector: SeriesSelector ) -> Generator[Union[SeriesSelector, Metadata], None, None]: """Search for all time series matching the given selector. The result is either a sequence of selectors for each time series in the source or a sequence of metadata entries for all series in the source if fetching the metadata can be done in the same request. If metadata sources are configured, query them as well and merge the results. This means that sources that are fast to search because they return metadata now also result in one additional query to each metadata source for each series.""" results = self.__source.metadata.search(selector) if results is None: return for result in results: if ( len(self.__metadata) == 0 or isinstance(result, SeriesSelector) or result.series.name is None ): yield result else: extra_metadata = self.get_metadata( SeriesSelector(result.series.source, result.series.name) ) for k, v in result: if v is not None and v != "": extra_metadata.set_field(k, v) yield extra_metadata
def test_metadata_no_dictionary_query(client: Client, suffix_source): dictionary_series = client.get_metadata( SeriesSelector(suffix_source("sql-no-dictionary-query"), "test-tag-6")) assert dictionary_series.description == "A dictionary series" assert dictionary_series.interpolation_type == InterpolationType.STEPPED assert dictionary_series.dictionary_name == "Active" assert dictionary_series.dictionary is None
def test_interpolation_type_mapping(client: Client, suffix_source): many_series = list(client.search(SeriesSelector( suffix_source("sql-list")))) interpolation_types = [(metadata.series.name, metadata.interpolation_type) for metadata in many_series] assert ("test-tag-1", InterpolationType.LINEAR) in interpolation_types assert ("test-tag-4", InterpolationType.STEPPED) in interpolation_types
def get_data(self, _, request) -> Any: """Return time series data as Arrow data.""" selector = SeriesSelector(request["selector"]["source"], request["selector"]["name"]) start_date = parse_date(request["start_date"]) end_date = parse_date(request["end_date"]) data = self.__source.get_data(selector, start_date, end_date) return fl.RecordBatchStream(data)
def test_metadata(client: Client): dictionary_series = client.get_metadata(SeriesSelector("row", "test-tag-6")) assert dictionary_series.description == "Valve X" assert dictionary_series.dictionary_name == "Active" assert dictionary_series.dictionary is not None assert len(dictionary_series.dictionary.mapping) == 2 assert dictionary_series.dictionary.mapping[0] == "OFF" assert dictionary_series.dictionary.mapping[1] == "ON"
def get_metadata(self, selector: SeriesSelector) -> Metadata: """Get metadata from the Flight service.""" if selector.name is None: raise InvalidDataError("No series name") remote_selector = SeriesSelector(self.__source_name, selector.name) metadata = self.__client.get_metadata(remote_selector) metadata.series = selector return metadata
def test_data(client: Client): start_date = datetime.fromisoformat("2020-01-01T00:00:00+00:00") end_date = datetime.fromisoformat("2021-01-01T00:00:00+00:00") data = client.get_data(SeriesSelector("row", "test-tag-6"), start_date, end_date) assert len(data) == 7 assert data["ts"][0].as_py() == start_date assert data["value"][0].as_py() == 1.0 assert data["ts"][6].as_py() == datetime.fromisoformat("2020-07-01T00:00:00+00:00") assert data["value"][6].as_py() == 1.0
def metadata(source: Source, source_name: str, series_name: str) -> Generator[List[Any], None, None]: """Test fetching metadata from a source. This does not store the metadata.""" logger.info('Requesting metadata for "%s (%s)"', series_name, source_name) result = source.get_metadata(SeriesSelector(source_name, series_name)) yield _get_metadata_header(result) yield _get_metadata(result)
def __search_names( self, selector: SeriesSelector) -> Generator[SeriesSelector, None, None]: connection = self.connect() cursor = connection.cursor() cursor.execute(self._config.list_query) for (series_name, ) in cursor: yield SeriesSelector(selector.source, series_name)
def test_search(client: Client): many_series = list(client.search(SeriesSelector(suffix_source("noaa")))) assert len(many_series) == 16 series = [ series for series in many_series if series.series.name == "h2o_feet,location=coyote_creek::water_level" ][0] assert series.limit_low == 6 assert series.limit_high == 9
def test_metadata_string_query(client: Client, suffix_source): dictionary_series = client.get_metadata( SeriesSelector(suffix_source("sql-string"), "test-tag-6")) assert dictionary_series.description == "A dictionary series" assert dictionary_series.interpolation_type == InterpolationType.STEPPED assert dictionary_series.dictionary_name == "Active" assert dictionary_series.dictionary is not None assert len(dictionary_series.dictionary.mapping) == 2 assert dictionary_series.dictionary.mapping[0] == "OFF" assert dictionary_series.dictionary.mapping[1] == "ON"
def get_metadata(self, _, action: fl.Action) -> List[bytes]: """Return metadata for the given time series as JSON.""" request = json.loads(action.body.to_pybytes()) selector = SeriesSelector(request["source"], request["name"]) metadata = self.__source.get_metadata(selector).camelcase() metadata["series"] = { "source": selector.source, "name": selector.name, } return [json.dumps(metadata).encode()]
def test_data_string_query(client: Client, suffix_source): start_date = datetime.fromisoformat("2020-01-01T00:00:00+00:00") end_date = datetime.fromisoformat("2021-01-01T00:00:00+00:00") data = client.get_data( SeriesSelector(suffix_source("sql-string"), "test-tag-6"), start_date, end_date) assert len(data) == 5 assert data["ts"][0].as_py() == start_date assert data["value"][0].as_py() == 1.0 assert data["ts"][4].as_py() == datetime.fromisoformat( "2020-05-01T00:00:00+00:00") assert data["value"][4].as_py() == 1.0
def search(self, selector: SeriesSelector) -> Generator[Metadata, None, None]: """Search for series matching the given selector.""" many_series = self.__client.get_list_series() fields = self.__client.query("SHOW FIELD KEYS") for series in many_series: series_name = series.replace("\\", "") measurement = series_name.split(",")[0] for field in fields.get_points(measurement=measurement): yield Metadata( SeriesSelector(selector.source, f'{series_name}::{field["fieldKey"]}'))
def test_search(client: Client): many_series = list(client.search(SeriesSelector("row"))) assert len(many_series) == 5 dictionary_series = [ series for series in many_series if series.series.name == "test-tag-6" ][0] assert dictionary_series.description == "Valve X" assert dictionary_series.dictionary_name == "Active" assert dictionary_series.dictionary is not None assert len(dictionary_series.dictionary.mapping) == 2 assert dictionary_series.dictionary.mapping[0] == "OFF" assert dictionary_series.dictionary.mapping[1] == "ON"
def search(self, selector: SeriesSelector) -> Generator[Metadata, None, None]: """Search for series matching the given selector.""" if self.__loaders.metadata is None: return with self.__loaders.metadata.open() as metadata_file: reader = csv.DictReader(metadata_file) for row in reader: if self.__metadata_mapper.from_kukur("series name") not in row: raise InvalidMetadataError( 'column "series name" not found') series_name = row[self.__metadata_mapper.from_kukur( "series name")] metadata = None if selector.name is not None: if series_name == selector.name: metadata = Metadata( SeriesSelector(selector.source, series_name)) else: metadata = Metadata( SeriesSelector(selector.source, series_name)) if metadata is not None: for field, _ in metadata: if self.__metadata_mapper.from_kukur(field) in row: try: value = row[self.__metadata_mapper.from_kukur( field)] metadata.set_field( field, self.__metadata_value_mapper.from_source( field, value), ) except ValueError: pass if metadata.dictionary_name is not None: metadata.dictionary = self.__get_dictionary( metadata.dictionary_name) yield metadata
def _read_metadata(data: Dict[str, Any]) -> Metadata: series = SeriesSelector(data["series"]["source"], data["series"]["name"]) metadata = Metadata(series) for k, v in data.items(): if v is None: continue if k == "series": continue if k == "dictionary": metadata.set_field(k, Dictionary(dict(v))) continue metadata.set_field(k, v) return metadata
def test_search(client: Client, suffix_source): many_series = list(client.search(SeriesSelector( suffix_source("sql-list")))) assert len(many_series) == 4 dictionary_series = [ series for series in many_series if series.series.name == "test-tag-6" ][0] assert dictionary_series.description == "A dictionary series" assert dictionary_series.interpolation_type == InterpolationType.STEPPED assert dictionary_series.dictionary_name == "Active" assert dictionary_series.dictionary is not None assert len(dictionary_series.dictionary.mapping) == 2 assert dictionary_series.dictionary.mapping[0] == "OFF" assert dictionary_series.dictionary.mapping[1] == "ON"
def test_data(client: Client): start_date = datetime.fromisoformat("2019-09-17T00:00:00+00:00") end_date = datetime.fromisoformat("2019-09-17T16:24:00+00:00") data = client.get_data( SeriesSelector(suffix_source("noaa"), "h2o_feet,location=coyote_creek::water_level"), start_date, end_date, ) assert len(data) == 165 assert data["ts"][0].as_py() == start_date assert data["value"][0].as_py() == 8.412 assert data["ts"][164].as_py() == end_date assert data["value"][164].as_py() == 3.235
def search(source: Source, source_name: str) -> Generator[List[Any], None, None]: """Test listing all series (or metadata) in a source.""" header_printed = False logger.info('Searching for time series in "%s"', source_name) for result in source.search(SeriesSelector(source_name)): if isinstance(result, SeriesSelector): if not header_printed: yield ["series name"] header_printed = True yield [result.name] else: if not header_printed: yield _get_metadata_header(result) header_printed = True yield _get_metadata(result)
def data( source: Source, source_name: str, series_name: str, start_date: datetime, end_date: datetime, ) -> Generator[List[Any], None, None]: """Test fetching data for a time series.""" start_date = _make_aware(start_date) end_date = _make_aware(end_date) logger.info( 'Requesting data for "%s (%s)" from %s to %s', series_name, source_name, start_date, end_date, ) table = source.get_data(SeriesSelector(source_name, series_name), start_date, end_date) for ts, value in zip(table["ts"], table["value"]): yield [ts.as_py().isoformat(), value.as_py()]
def search( self, selector: SeriesSelector ) -> Generator[Union[Metadata, SeriesSelector], None, None]: """Search Kukur for time series matching the given ``SeriesSelector``. Args: selector: return time series matching the given selector. Use ``name = None`` (the default) to select all series in a source. Returns: A generator that returns either ``Metadata`` or ``SeriesSelector``s. The return value depends on the search that is supported by the source. """ body = dict(source=selector.source, name=selector.name) results = list(self._get_client().do_action( ("search", json.dumps(body).encode()))) for result in results: data = json.loads(result.body.to_pybytes()) if "series" not in data: yield SeriesSelector(data["source"], data["name"]) else: yield _read_metadata(data)
def search(self, _, action: fl.Action) -> Generator[bytes, None, None]: """Search a data source for time series. This returns either a SeriesSelector or Metadata as JSON, depending on what is supported by the source.""" request = json.loads(action.body.to_pybytes()) selector = SeriesSelector(request["source"], request["name"]) for result in self.__source.search(selector): if isinstance(result, Metadata): assert result.series.name is not None metadata = result.camelcase() del metadata["series"] metadata["series"] = { "source": result.series.source, "name": result.series.name, } yield json.dumps(metadata).encode() else: assert result.name is not None series = { "source": result.source, "name": result.name, } yield json.dumps(series).encode()
def test_metadata_mapping(): metadata = get_source("mapping").get_metadata(make_series("mapping")) assert metadata.series == SeriesSelector("mapping", "test-tag-1") assert metadata.unit == "kg" assert metadata.limit_low == 1 assert metadata.interpolation_type == InterpolationType.LINEAR
def get_data(self, selector: SeriesSelector, start_date: datetime, end_date: datetime) -> pa.Table: """Get data from the Flight service.""" remote_selector = SeriesSelector(self.__source_name, selector.name) return self.__client.get_data(remote_selector, start_date, end_date)
def make_series(source: str, name: str = "test-tag-1") -> SeriesSelector: return SeriesSelector(source, name)
def test_metadata(client: Client): series = client.get_metadata( SeriesSelector(suffix_source("noaa"), "h2o_feet,location=coyote_creek::water_level")) assert series.limit_low == 6 assert series.limit_high == 9