Example #1
0
 def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(
         table,
         try_fallback_columns=[
             Column("A", ColumnType.TEXT()),
             Column("B", ColumnType.NUMBER()),
         ],
     )
     self.assertEqual(
         result.columns,
         [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())],
     )
Example #2
0
 def test_coerce_infer_columns(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(table)
     self.assertEqual(
         result.columns,
         [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())],
     )
Example #3
0
 def test_arrow_all_null_text_column(self):
     dataframe, columns = arrow_table_to_dataframe(
         arrow_table(
             {"A": pyarrow.array(["a", "b", None, "c"])},
             columns=[atypes.Column("A", atypes.ColumnType.Text())],
         )
     )
     assert_frame_equal(dataframe, pd.DataFrame({"A": ["a", "b", np.nan, "c"]}))
     self.assertEqual(columns, [Column("A", ColumnType.TEXT())])
Example #4
0
 def test_dataframe_all_null_text_column(self):
     assert_arrow_table_equals(
         dataframe_to_arrow_table(
             pd.DataFrame({"A": [None]}, dtype=str),
             [Column("A", ColumnType.TEXT())],
             self.path,
         ),
         arrow_table({"A": pyarrow.array([None], pyarrow.string())}),
     )
Example #5
0
 def test_columns(self):
     df = pd.DataFrame(
         {
             "A": [1],  # number
             "B": ["foo"],  # str
             "C": dt(2018, 8, 20),  # datetime64
         }
     )
     df["D"] = pd.Series(["cat"], dtype="category")
     result = ProcessResult(df)
     self.assertEqual(result.column_names, ["A", "B", "C", "D"])
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.TEXT()),
             Column("C", ColumnType.DATETIME()),
             Column("D", ColumnType.TEXT()),
         ],
     )
Example #6
0
 def test_coerce_infer_columns_with_format(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(
         {"dataframe": table, "column_formats": {"A": "{:,d}"}}
     )
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER(format="{:,d}")),
             Column("B", ColumnType.TEXT()),
         ],
     )
Example #7
0
 def test_arrow_all_null_category_column(self):
     atable = arrow_table(
         {
             "A": pyarrow.DictionaryArray.from_arrays(
                 pyarrow.array([None], type=pyarrow.int8()),
                 pyarrow.array([], type=pyarrow.string()),
             )
         }
     )
     dataframe, columns = arrow_table_to_dataframe(atable)
     self.assertEqual(columns, [Column("A", ColumnType.TEXT())])
     assert_frame_equal(
         dataframe, pd.DataFrame({"A": [None]}, dtype=str).astype("category")
     )
Example #8
0
 def test_arrow_category_column(self):
     atable = arrow_table(
         {
             "A": pyarrow.DictionaryArray.from_arrays(
                 pyarrow.array([0, 1, None, 0], type=pyarrow.int8()),
                 pyarrow.array(["A", "B"], type=pyarrow.string()),
             )
         }
     )
     dataframe, columns = arrow_table_to_dataframe(atable)
     self.assertEqual(columns, [Column("A", ColumnType.TEXT())])
     assert_frame_equal(
         dataframe, pd.DataFrame({"A": ["A", "B", None, "A"]}, dtype="category")
     )
Example #9
0
 def test_dataframe_all_null_category_column(self):
     assert_arrow_table_equals(
         dataframe_to_arrow_table(
             pd.DataFrame({"A": [None]}, dtype=str).astype("category"),
             [Column("A", ColumnType.TEXT())],
             self.path,
         ),
         arrow_table(
             {
                 "A": pyarrow.DictionaryArray.from_arrays(
                     pyarrow.array([None], type=pyarrow.int8()),
                     pyarrow.array([], type=pyarrow.string()),
                 )
             }
         ),
     )
Example #10
0
 def test_dataframe_category_column(self):
     assert_arrow_table_equals(
         dataframe_to_arrow_table(
             pd.DataFrame({"A": ["A", "B", None, "A"]}, dtype="category"),
             [Column("A", ColumnType.TEXT())],
             self.path,
         ),
         arrow_table(
             {
                 "A": pyarrow.DictionaryArray.from_arrays(
                     pyarrow.array([0, 1, None, 0], type=pyarrow.int8()),
                     pyarrow.array(["A", "B"], type=pyarrow.string()),
                 )
             }
         ),
     )
Example #11
0
 def test_to_arrow(self):
     self.assertEqual(
         TableShape(
             3,
             [
                 Column("A", ColumnType.NUMBER("{:,d}")),
                 Column("B", ColumnType.TEXT()),
             ],
         ).to_arrow(),
         atypes.TableMetadata(
             3,
             [
                 atypes.Column("A", atypes.ColumnType.Number("{:,d}")),
                 atypes.Column("B", atypes.ColumnType.Text()),
             ],
         ),
     )
Example #12
0
 def test_ctor_infer_columns(self):
     result = ProcessResult(
         pd.DataFrame(
             {
                 "A": [1, 2],
                 "B": ["x", "y"],
                 "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)],
             }
         )
     )
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.TEXT()),
             Column("C", ColumnType.DATETIME()),
         ],
     )
Example #13
0
 def test_to_arrow(self):
     self.assertEqual(ColumnType.TEXT().to_arrow(), atypes.ColumnType.Text())
Example #14
0
 def test_from_arrow(self):
     self.assertEqual(
         ColumnType.from_arrow(atypes.ColumnType.Text()), ColumnType.TEXT()
     )
Example #15
0
 def test_text_type(self):
     series = pd.Series(["x", np.nan, "z"])
     column_type = ColumnType.TEXT()
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["x", np.nan, "z"]))