def test_arrow_timestamp_column(self): dataframe, columns = arrow_table_to_dataframe( arrow_table( { "A": pyarrow.array( [dt.fromisoformat("2019-09-17T21:21:00.123456"), None], type=pyarrow.timestamp(unit="ns", tz=None), ) }, [atypes.Column("A", ColumnType.Timestamp())], )) assert_frame_equal( dataframe, pd.DataFrame({"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"), ) self.assertEqual(columns, [Column("A", ColumnType.Timestamp())])
def test_dataframe_datetime_column(self): self._test_dataframe_to_arrow_table( pd.DataFrame( {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]" ), [Column("A", ColumnType.Timestamp())], make_table( make_column("A", [dt.fromisoformat("2019-09-17T21:21:00.123456"), None]) ), )
def test_dataframe_datetime_column(self): assert_arrow_table_equals( dataframe_to_arrow_table( pd.DataFrame({"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"), [Column("A", ColumnType.Timestamp())], self.path, ), arrow_table( { "A": pyarrow.array( [dt.fromisoformat("2019-09-17T21:21:00.123456"), None], type=pyarrow.timestamp(unit="ns", tz=None), ) }, [atypes.Column("A", ColumnType.Timestamp())], ), )
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame({ "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], })) self.assertEqual( result.columns, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Text()), Column("C", ColumnType.Timestamp()), ], )
def test_columns(self): df = pd.DataFrame({ "A": [1], # number "B": ["foo"], # str "C": dt(2018, 8, 20), # datetime64 }) df["D"] = pd.Series(["cat"], dtype="category") result = ProcessResult(df) self.assertEqual(result.column_names, ["A", "B", "C", "D"]) self.assertEqual( result.columns, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Text()), Column("C", ColumnType.Timestamp()), Column("D", ColumnType.Text()), ], )
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame( { "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], "D": [pd.Period("2021-01-01", freq="D"), pd.NaT], } ) ) self.assertEqual( result.columns, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Text()), Column("C", ColumnType.Timestamp()), Column("D", ColumnType.Date("day")), ], )