Example #1
0
 def test_coerce_infer_columns_format_supercedes_try_fallback_columns(self):
     table = pd.DataFrame({"A": [1, 2]})
     result = ProcessResult.coerce(
         {"dataframe": table, "column_formats": {"A": "{:,d}"}},
         try_fallback_columns=[Column("A", ColumnType.NUMBER("{:,.2f}"))],
     )
     self.assertEqual(result.columns, [Column("A", ColumnType.NUMBER("{:,d}"))])
Example #2
0
 def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(
         table,
         try_fallback_columns=[
             Column("A", ColumnType.TEXT()),
             Column("B", ColumnType.NUMBER()),
         ],
     )
     self.assertEqual(
         result.columns,
         [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())],
     )
Example #3
0
 def test_coerce_infer_columns(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(table)
     self.assertEqual(
         result.columns,
         [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())],
     )
Example #4
0
 def test_format_whole_float_as_int(self):
     """
     Mimic d3-format, which cannot differentiate between float and int.
     """
     series = pd.Series([1.1, 2.0, 123456789.0])
     column_type = ColumnType.NUMBER("{:,}")
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["1.1", "2", "123,456,789"]))
Example #5
0
 def test_arrow_uint8_column(self):
     dataframe, columns = arrow_table_to_dataframe(
         arrow_table(
             {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())},
             columns=[atypes.Column("A", atypes.ColumnType.Number("{:,d}"))],
         )
     )
     assert_frame_equal(
         dataframe, pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8)
     )
     self.assertEqual(columns, [Column("A", ColumnType.NUMBER("{:,d}"))])
Example #6
0
 def test_coerce_infer_columns_with_format(self):
     table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]})
     result = ProcessResult.coerce(
         {"dataframe": table, "column_formats": {"A": "{:,d}"}}
     )
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER(format="{:,d}")),
             Column("B", ColumnType.TEXT()),
         ],
     )
Example #7
0
 def test_dataframe_uint8_column(self):
     assert_arrow_table_equals(
         dataframe_to_arrow_table(
             pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8),
             [Column("A", ColumnType.NUMBER("{:,d}"))],
             self.path,
         ),
         arrow_table(
             {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())},
             [atypes.Column("A", atypes.ColumnType.Number("{:,d}"))],
         ),
     )
Example #8
0
 def test_to_arrow(self):
     self.assertEqual(
         TableShape(
             3,
             [
                 Column("A", ColumnType.NUMBER("{:,d}")),
                 Column("B", ColumnType.TEXT()),
             ],
         ).to_arrow(),
         atypes.TableMetadata(
             3,
             [
                 atypes.Column("A", atypes.ColumnType.Number("{:,d}")),
                 atypes.Column("B", atypes.ColumnType.Text()),
             ],
         ),
     )
Example #9
0
 def test_ctor_infer_columns(self):
     result = ProcessResult(
         pd.DataFrame(
             {
                 "A": [1, 2],
                 "B": ["x", "y"],
                 "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)],
             }
         )
     )
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.TEXT()),
             Column("C", ColumnType.DATETIME()),
         ],
     )
Example #10
0
 def test_columns(self):
     df = pd.DataFrame(
         {
             "A": [1],  # number
             "B": ["foo"],  # str
             "C": dt(2018, 8, 20),  # datetime64
         }
     )
     df["D"] = pd.Series(["cat"], dtype="category")
     result = ProcessResult(df)
     self.assertEqual(result.column_names, ["A", "B", "C", "D"])
     self.assertEqual(
         result.columns,
         [
             Column("A", ColumnType.NUMBER()),
             Column("B", ColumnType.TEXT()),
             Column("C", ColumnType.DATETIME()),
             Column("D", ColumnType.TEXT()),
         ],
     )
Example #11
0
 def test_format_float_as_int(self):
     series = pd.Series([1.1])
     column_type = ColumnType.NUMBER(format="{:d}")
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["1"]))
Example #12
0
 def test_format_int_as_float(self):
     series = pd.Series([1, 2, 3, 4], dtype=int)
     column_type = ColumnType.NUMBER(format="{:.1f}")
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["1.0", "2.0", "3.0", "4.0"]))
Example #13
0
 def test_table_shape(self):
     df = pd.DataFrame({"A": [1, 2, 3]})
     result = ProcessResult(df)
     self.assertEqual(
         result.table_shape, TableShape(3, [Column("A", ColumnType.NUMBER())])
     )
Example #14
0
 def test_custom_format(self):
     series = pd.Series([1.1, 2231, np.nan, 0.123])
     column_type = ColumnType.NUMBER(format="${:0,.2f}")
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["$1.10", "$2,231.00", np.nan, "$0.12"]))
Example #15
0
 def test_to_arrow(self):
     self.assertEqual(
         Column("A", ColumnType.NUMBER("{:,d}")).to_arrow(),
         atypes.Column("A", atypes.ColumnType.Number("{:,d}")),
     )
Example #16
0
 def test_format_zero_length_becomes_str(self):
     # (even though there's no way for pandas to detect type of result)
     # (luckily, pandas defaults to `object`)
     series = pd.Series([], dtype=np.int64)
     result = ColumnType.NUMBER().format_series(series)
     assert_series_equal(result, pd.Series([], dtype=object))
Example #17
0
 def test_format_disallow_field_converter(self):
     with self.assertRaisesRegex(ValueError, "Field converters are not allowed"):
         ColumnType.NUMBER("{!r:f}")
Example #18
0
 def test_format_disallow_invalid_type(self):
     with self.assertRaisesRegex(ValueError, "Unknown format code 'T'"):
         ColumnType.NUMBER("{:T}")
Example #19
0
 def test_format_percent(self):
     series = pd.Series([0.3, 11.111, 0.0001, np.nan])
     column_type = ColumnType.NUMBER(format="{:,.1%}")
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["30.0%", "1,111.1%", "0.0%", np.nan]))
Example #20
0
 def test_format_disallow_non_format(self):
     with self.assertRaisesRegex(ValueError, 'Format must look like "{:...}"'):
         ColumnType.NUMBER("%d")
Example #21
0
 def test_format_int_as_percent(self):
     series = pd.Series([1, 11])
     column_type = ColumnType.NUMBER(format="{:,.1%}")
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["100.0%", "1,100.0%"]))
Example #22
0
 def test_format_disallow_field_name(self):
     with self.assertRaisesRegex(
         ValueError, "Field names or numbers are not allowed"
     ):
         ColumnType.NUMBER("{value:f}")
Example #23
0
 def test_default_format(self):
     series = pd.Series([1.1, 2.231, np.nan])
     column_type = ColumnType.NUMBER()
     result = column_type.format_series(series)
     assert_series_equal(result, pd.Series(["1.1", "2.231", np.nan]))
Example #24
0
 def test_from_arrow(self):
     self.assertEqual(
         Column.from_arrow(atypes.Column("A", atypes.ColumnType.Number("{:,d}"))),
         Column("A", ColumnType.NUMBER("{:,d}")),
     )
Example #25
0
 def test_format_nulls_becomes_str(self):
     series = pd.Series([np.nan, np.nan], dtype=np.float64)
     result = ColumnType.NUMBER().format_series(series)
     assert_series_equal(result, pd.Series([np.nan, np.nan], dtype=object))
Example #26
0
 def test_format_too_many_arguments(self):
     with self.assertRaisesRegex(ValueError, "Can only format one number"):
         ColumnType.NUMBER("{:d}{:f}")