Ejemplo n.º 1
0
    def to_excel(self, path: str, *, sheet_name: str = "Analytics") -> None:
        """Write the report data to an Excel spreadsheet.

        Args:
            path:
                The path the file should be saved to.

        Keyword Args:
            sheet_name:
                The name for the worksheet.

        .. versionadded:: 3.1.0
        """

        if analytix.can_use("openpyxl"):
            from openpyxl import Workbook
        else:
            raise errors.MissingOptionalComponents("openpyxl")

        if not path.endswith(".xlsx"):
            path += ".xlsx"

        wb = Workbook()
        ws = wb.active
        ws.title = sheet_name

        ws.append(self.columns)
        for row in self.data["rows"]:
            ws.append(row)

        wb.save(path)
        _log.info(f"Saved report as spreadsheet to {Path(path).resolve()}")
Ejemplo n.º 2
0
    def to_dataframe(self,
                     *,
                     skip_date_conversion: bool = False) -> pd.DataFrame:
        """Export the report data to a pandas or Modin DataFrame. If you
        wish to use Modin, you are responsible for selecting and
        initialising your desired engine.

        Keyword Args:
            skip_date_conversion:
                Whether to skip automatically converting date columns to
                the ``datetime64[ns]`` format. Defaults to ``False``.

        Returns:
            The newly created DataFrame.
        """

        if analytix.can_use("modin"):
            import modin.pandas as pd
        elif analytix.can_use("pandas"):
            import pandas as pd
        else:
            raise errors.MissingOptionalComponents("pandas")

        if not self._shape[0]:
            raise errors.DataFrameConversionError(
                "cannot convert to DataFrame as the returned data has no rows")

        df = pd.DataFrame(self.data["rows"], columns=self.columns)

        if not skip_date_conversion:
            s = {"day", "month"} & set(df.columns)
            if len(s):
                col = next(iter(s))
                df[col] = pd.to_datetime(df[col], format="%Y-%m-%d")
                _log.info(f"Converted {col!r} column to datetime64[ns] format")

        return df
Ejemplo n.º 3
0
    def to_parquet(self, path: str) -> None:
        """Write the report data to an Apache Parquet file.

        Args:
            path:
                The path the file should be saved to.

        .. versionadded:: 3.2.0
        """

        if analytix.can_use("pyarrow"):
            import pyarrow.parquet as pq
        else:
            raise errors.MissingOptionalComponents("pyarrow")

        if not path.endswith(".parquet"):
            path += ".parquet"

        pq.write_table(self.to_arrow_table(), path)
        _log.info(
            f"Saved report as Apache Parquet file to {Path(path).resolve()}")
Ejemplo n.º 4
0
    def to_polars(self, *, skip_date_conversion: bool = False) -> pl.DataFrame:
        """Export the report data to a Polars DataFrame.

        Keyword Args:
            skip_date_conversion:
                Whether to skip automatically converting date columns to
                the ``datetime[ns]`` format. Defaults to ``False``.

        Returns:
            The newly created DataFrame.

        .. versionadded:: 3.6.0
        """

        if analytix.can_use("polars"):
            import polars as pl
        else:
            raise errors.MissingOptionalComponents("polars")

        return pl.from_arrow(
            self.to_arrow_table(skip_date_conversion=skip_date_conversion))
Ejemplo n.º 5
0
    def to_arrow_table(self,
                       *,
                       skip_date_conversion: bool = False) -> pa.Table:
        """Export the report data to an Apache Arrow Table.

        Keyword Args:
            skip_date_conversion:
                Whether to skip automatically converting date columns to
                the ``timestamp[ns]`` format. Defaults to ``False``.

        Returns:
            The newly constructed Apache Arrow Table.

        .. versionadded:: 3.2.0

        .. versionchanged:: 3.6.0
            Time series columns are now converted to ``timestamp[ns]``
            format instead of ``timestamp[us]`` format.
        """

        if analytix.can_use("pyarrow"):
            import pyarrow as pa
            import pyarrow.compute as pc
        else:
            raise errors.MissingOptionalComponents("pyarrow")

        table = pa.table(list(zip(*self.data["rows"])), names=self.columns)

        if not skip_date_conversion:
            s = {"day", "month"} & set(table.column_names)
            if len(s):
                col = next(iter(s))
                fmt = {"day": "%Y-%m-%d", "month": "%Y-%m"}[col]
                dt_series = pc.strptime(table.column(col),
                                        format=fmt,
                                        unit="ns")
                table = table.set_column(0, "day", dt_series)
                _log.info(f"Converted {col!r} column to timestamp[ns] format")

        return table
Ejemplo n.º 6
0
    DataType,
    JSONReportWriter,
    Report,
)
from tests.paths import (
    CSV_OUTPUT_PATH,
    EXCEL_OUTPUT_PATH,
    FEATHER_OUTPUT_PATH,
    JSON_OUTPUT_PATH,
    MOCK_CSV_PATH,
    MOCK_DATA_PATH,
    PARQUET_OUTPUT_PATH,
    TSV_OUTPUT_PATH,
)

if analytix.can_use("openpyxl"):
    from openpyxl import load_workbook

if analytix.can_use("pandas"):
    import pandas as pd

if analytix.can_use("polars"):
    import polars as pl


@pytest.fixture()
def request_data():
    with open(MOCK_DATA_PATH) as f:
        return json.load(f)