def to_excel(self, path: str, *, sheet_name: str = "Analytics") -> None: """Write the report data to an Excel spreadsheet. Args: path: The path the file should be saved to. Keyword Args: sheet_name: The name for the worksheet. .. versionadded:: 3.1.0 """ if analytix.can_use("openpyxl"): from openpyxl import Workbook else: raise errors.MissingOptionalComponents("openpyxl") if not path.endswith(".xlsx"): path += ".xlsx" wb = Workbook() ws = wb.active ws.title = sheet_name ws.append(self.columns) for row in self.data["rows"]: ws.append(row) wb.save(path) _log.info(f"Saved report as spreadsheet to {Path(path).resolve()}")
def to_dataframe(self, *, skip_date_conversion: bool = False) -> pd.DataFrame: """Export the report data to a pandas or Modin DataFrame. If you wish to use Modin, you are responsible for selecting and initialising your desired engine. Keyword Args: skip_date_conversion: Whether to skip automatically converting date columns to the ``datetime64[ns]`` format. Defaults to ``False``. Returns: The newly created DataFrame. """ if analytix.can_use("modin"): import modin.pandas as pd elif analytix.can_use("pandas"): import pandas as pd else: raise errors.MissingOptionalComponents("pandas") if not self._shape[0]: raise errors.DataFrameConversionError( "cannot convert to DataFrame as the returned data has no rows") df = pd.DataFrame(self.data["rows"], columns=self.columns) if not skip_date_conversion: s = {"day", "month"} & set(df.columns) if len(s): col = next(iter(s)) df[col] = pd.to_datetime(df[col], format="%Y-%m-%d") _log.info(f"Converted {col!r} column to datetime64[ns] format") return df
def to_parquet(self, path: str) -> None: """Write the report data to an Apache Parquet file. Args: path: The path the file should be saved to. .. versionadded:: 3.2.0 """ if analytix.can_use("pyarrow"): import pyarrow.parquet as pq else: raise errors.MissingOptionalComponents("pyarrow") if not path.endswith(".parquet"): path += ".parquet" pq.write_table(self.to_arrow_table(), path) _log.info( f"Saved report as Apache Parquet file to {Path(path).resolve()}")
def to_polars(self, *, skip_date_conversion: bool = False) -> pl.DataFrame: """Export the report data to a Polars DataFrame. Keyword Args: skip_date_conversion: Whether to skip automatically converting date columns to the ``datetime[ns]`` format. Defaults to ``False``. Returns: The newly created DataFrame. .. versionadded:: 3.6.0 """ if analytix.can_use("polars"): import polars as pl else: raise errors.MissingOptionalComponents("polars") return pl.from_arrow( self.to_arrow_table(skip_date_conversion=skip_date_conversion))
def to_arrow_table(self, *, skip_date_conversion: bool = False) -> pa.Table: """Export the report data to an Apache Arrow Table. Keyword Args: skip_date_conversion: Whether to skip automatically converting date columns to the ``timestamp[ns]`` format. Defaults to ``False``. Returns: The newly constructed Apache Arrow Table. .. versionadded:: 3.2.0 .. versionchanged:: 3.6.0 Time series columns are now converted to ``timestamp[ns]`` format instead of ``timestamp[us]`` format. """ if analytix.can_use("pyarrow"): import pyarrow as pa import pyarrow.compute as pc else: raise errors.MissingOptionalComponents("pyarrow") table = pa.table(list(zip(*self.data["rows"])), names=self.columns) if not skip_date_conversion: s = {"day", "month"} & set(table.column_names) if len(s): col = next(iter(s)) fmt = {"day": "%Y-%m-%d", "month": "%Y-%m"}[col] dt_series = pc.strptime(table.column(col), format=fmt, unit="ns") table = table.set_column(0, "day", dt_series) _log.info(f"Converted {col!r} column to timestamp[ns] format") return table
DataType, JSONReportWriter, Report, ) from tests.paths import ( CSV_OUTPUT_PATH, EXCEL_OUTPUT_PATH, FEATHER_OUTPUT_PATH, JSON_OUTPUT_PATH, MOCK_CSV_PATH, MOCK_DATA_PATH, PARQUET_OUTPUT_PATH, TSV_OUTPUT_PATH, ) if analytix.can_use("openpyxl"): from openpyxl import load_workbook if analytix.can_use("pandas"): import pandas as pd if analytix.can_use("polars"): import polars as pl @pytest.fixture() def request_data(): with open(MOCK_DATA_PATH) as f: return json.load(f)