Example #1
0
def test_strptime():
    arr = pa.array(["5/1/2020", None, "12/13/1900"])

    got = pc.strptime(arr, format='%m/%d/%Y', unit='s')
    expected = pa.array([datetime(2020, 5, 1), None, datetime(1900, 12, 13)],
                        type=pa.timestamp('s'))
    assert got == expected
Example #2
0
    def __init__(self, schema, table):
        super(InMemoryColumnarTable, self).__init__(schema)
        self.num_rows = table.num_rows
        self.columns = table.columns
        for idx in range(len(self.columns)):
            if self.columns[idx].type.equals(pa.int64()):
                self.columns[idx] = self.columns[idx].cast(pa.float64())
            if self.columns[idx].type.equals(pa.string()):

                if re.match("^[0-9]{4}[\-][0-9]{2}[\-][0-9]{2}$",
                            self.columns[idx][0].as_py()):
                    self.columns[idx] = compute.strptime(self.columns[idx],
                                                         format="%Y-%m-%d",
                                                         unit='us')

        self.attr_to_idx = {a.aname: i for i, a in enumerate(self.schema)}
Example #3
0
    def to_arrow_table(self,
                       *,
                       skip_date_conversion: bool = False) -> pa.Table:
        """Export the report data to an Apache Arrow Table.

        Keyword Args:
            skip_date_conversion:
                Whether to skip automatically converting date columns to
                the ``timestamp[ns]`` format. Defaults to ``False``.

        Returns:
            The newly constructed Apache Arrow Table.

        .. versionadded:: 3.2.0

        .. versionchanged:: 3.6.0
            Time series columns are now converted to ``timestamp[ns]``
            format instead of ``timestamp[us]`` format.
        """

        if analytix.can_use("pyarrow"):
            import pyarrow as pa
            import pyarrow.compute as pc
        else:
            raise errors.MissingOptionalComponents("pyarrow")

        table = pa.table(list(zip(*self.data["rows"])), names=self.columns)

        if not skip_date_conversion:
            s = {"day", "month"} & set(table.column_names)
            if len(s):
                col = next(iter(s))
                fmt = {"day": "%Y-%m-%d", "month": "%Y-%m"}[col]
                dt_series = pc.strptime(table.column(col),
                                        format=fmt,
                                        unit="ns")
                table = table.set_column(0, "day", dt_series)
                _log.info(f"Converted {col!r} column to timestamp[ns] format")

        return table