예제 #1
0
    def open_child(self, name: str):
        """Open the file <name> that is in the directory path for reading.

        If files_as_path is True, return the path of the child."""
        if not self.__path.is_dir():
            raise InvalidDataError(f'"{self.__path}" is not a directory')
        path = self.__path / name
        if not path.exists():
            raise InvalidDataError(f"'{path}' does not exist")
        if self.__files_as_path:
            return path
        return path.open(mode=self.__mode)
예제 #2
0
    def get_data(self, selector: SeriesSelector, start_date: datetime,
                 end_date: datetime) -> pa.Table:
        """Return data for the given time series in the given time period."""
        if selector.name is None:
            raise InvalidDataError("No series name")
        measurement, tags, field_key = _parse_influx_series(selector.name)

        query = f"""SELECT time, "{_escape(field_key)}"
                    FROM "{_escape(measurement)}"
                    WHERE time >= $start_date and time <= $end_date"""

        bind_params = {
            "start_date": start_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
            "end_date": end_date.strftime("%Y-%m-%dT%H:%M:%SZ"),
        }

        for i, (tag_key, tag_value) in enumerate(tags):
            bind_params[str(i)] = tag_value
            query = query + f' and "{_escape(tag_key)}" = ${str(i)}'

        timestamps = []
        values = []
        for item in self.__client.query(query=query,
                                        bind_params=bind_params).get_points():
            timestamps.append(dateutil.parser.parse(item["time"]))
            values.append(item[field_key])

        return pa.Table.from_pydict({"ts": timestamps, "value": values})
예제 #3
0
def _read_pivot_data(loader: Loader, selector: SeriesSelector) -> pa.Table:
    all_data = pyarrow.csv.read_csv(loader.open())
    if selector.name not in all_data.column_names:
        raise InvalidDataError(f'column "{selector.name}" not found')
    schema = pa.schema([("ts", pa.timestamp("us", "utc")),
                        ("value", pa.float64())])
    return (all_data.select([0, selector.name
                             ]).rename_columns(["ts", "value"]).cast(schema))
예제 #4
0
 def get_metadata(self, selector: SeriesSelector) -> Metadata:
     """Get metadata from the Flight service."""
     if selector.name is None:
         raise InvalidDataError("No series name")
     remote_selector = SeriesSelector(self.__source_name, selector.name)
     metadata = self.__client.get_metadata(remote_selector)
     metadata.series = selector
     return metadata
예제 #5
0
 def _read_pivot_data(self, selector: SeriesSelector) -> pa.Table:
     all_data = self.read_file(self.__loader.open())
     if selector.name not in all_data.column_names:
         raise InvalidDataError(f'column "{selector.name}" not found')
     data = all_data.select([0,
                             selector.name]).rename_columns(["ts", "value"])
     schema = pa.schema([("ts", pa.timestamp("us", "utc")),
                         ("value", _get_value_schema_type(data))])
     return data.cast(schema)
예제 #6
0
    def open(self):
        """Open the file at path for reading.

        If files_as_path is True, return the path."""
        if not self.__path.exists():
            raise InvalidDataError(f"'{self.__path}' does not exist")
        if self.__files_as_path:
            return self.__path
        return self.__path.open(mode=self.__mode)
예제 #7
0
 def has_child(self, name: str) -> bool:
     """Test if the file <name> is in the directory pointed to by <path>."""
     if not self.__path.is_dir():
         raise InvalidDataError(f'"{self.__path}" is not a directory')
     path = self.__path / name
     return path.exists()