Example #1
0
 def test_to_arrow_normal_dataframe(self):
     fd, filename = tempfile.mkstemp()
     os.close(fd)
     # Remove the file. Then we'll test that ProcessResult.to_arrow() does
     # not write it (because the result is an error)
     os.unlink(filename)
     try:
         process_result = ProcessResult.coerce(pd.DataFrame({"A": [1, 2]}))
         result = process_result.to_arrow(Path(filename))
         self.assertEqual(
             result,
             atypes.RenderResult(
                 atypes.ArrowTable(
                     Path(filename),
                     pyarrow.table({"A": [1, 2]}),
                     atypes.TableMetadata(
                         2,
                         [
                             atypes.Column(
                                 "A",
                                 ColumnType.Number(
                                     # Whatever .format
                                     # ProcessResult.coerce() gave
                                     process_result.columns[0].type.format),
                             )
                         ],
                     ),
                 ),
                 [],
                 {},
             ),
         )
     finally:
         os.unlink(filename)
 def test_column_from_thrift(self):
     self.assertEqual(
         types.Column.from_thrift(
             ttypes.Column("A", ttypes.ColumnType(text_type=ttypes.ColumnTypeText()))
         ),
         types.Column("A", types.ColumnType.Text()),
     )
Example #3
0
 def test_params_to_thrift(self):
     self.assertEqual(
         types.Params({
             "str":
             "s",
             "int":
             2,
             "float":
             1.2,
             "null":
             None,
             "bool":
             False,
             "column":
             types.Column("A", types.ColumnType.Number(format="{:,.2f}")),
             "listofmaps": [{
                 "A": "a",
                 "B": "b"
             }, {
                 "C": "c",
                 "D": "d"
             }],
             "tab":
             "TODO tabs",
         }).to_thrift(),
         {
             "str":
             ttypes.ParamValue(string_value="s"),
             "int":
             ttypes.ParamValue(integer_value=2),
             "float":
             ttypes.ParamValue(float_value=1.2),
             "null":
             ttypes.ParamValue(),
             "bool":
             ttypes.ParamValue(boolean_value=False),
             "column":
             ttypes.ParamValue(column_value=ttypes.Column(
                 "A",
                 ttypes.ColumnType(number_type=ttypes.ColumnTypeNumber(
                     format="{:,.2f}")),
             )),
             "listofmaps":
             ttypes.ParamValue(list_value=[
                 ttypes.ParamValue(
                     map_value={
                         "A": ttypes.ParamValue(string_value="a"),
                         "B": ttypes.ParamValue(string_value="b"),
                     }),
                 ttypes.ParamValue(
                     map_value={
                         "C": ttypes.ParamValue(string_value="c"),
                         "D": ttypes.ParamValue(string_value="d"),
                     }),
             ]),
             "tab":
             ttypes.ParamValue(string_value="TODO tabs"),
         },
     )
Example #4
0
 def test_to_arrow(self):
     self.assertEqual(
         TableShape(
             3,
             [
                 Column("A", ColumnType.NUMBER("{:,d}")),
                 Column("B", ColumnType.TEXT()),
             ],
         ).to_arrow(),
         atypes.TableMetadata(
             3,
             [
                 atypes.Column("A", atypes.ColumnType.Number("{:,d}")),
                 atypes.Column("B", atypes.ColumnType.Text()),
             ],
         ),
     )
Example #5
0
 def test_arrow_all_null_text_column(self):
     dataframe, columns = arrow_table_to_dataframe(
         arrow_table(
             {"A": pyarrow.array(["a", "b", None, "c"])},
             columns=[atypes.Column("A", atypes.ColumnType.Text())],
         )
     )
     assert_frame_equal(dataframe, pd.DataFrame({"A": ["a", "b", np.nan, "c"]}))
     self.assertEqual(columns, [Column("A", ColumnType.TEXT())])
Example #6
0
 def test_arrow_uint8_column(self):
     dataframe, columns = arrow_table_to_dataframe(
         arrow_table(
             {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())},
             columns=[atypes.Column("A", ColumnType.Number("{:,d}"))],
         ))
     assert_frame_equal(dataframe,
                        pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8))
     self.assertEqual(columns, [Column("A", ColumnType.Number("{:,d}"))])
Example #7
0
    def test_render_happy_path(self):
        module = self.kernel.compile(
            MockPath(
                ["foo.py"],
                b"import pandas as pd\ndef render(table, params): return pd.DataFrame({'A': table['A'] * params['m'], 'B': table['B'] + params['s']})",
            ),
            "foo",
        )
        with arrow_table_context(
            {
                "A": [1, 2, 3],
                "B": ["a", "b", "c"]
            },
                columns=[
                    types.Column("A", types.ColumnType.Number("{:,d}")),
                    types.Column("B", types.ColumnType.Text()),
                ],
                dir=self.basedir,
        ) as input_table:
            input_table.path.chmod(0o644)
            with self.chroot_context.tempfile_context(
                    prefix="output-", dir=self.basedir) as output_path:
                result = self.kernel.render(
                    module,
                    self.chroot_context,
                    self.basedir,
                    input_table,
                    types.Params({
                        "m": 2.5,
                        "s": "XX"
                    }),
                    types.Tab("tab-1", "Tab 1"),
                    None,
                    output_filename=output_path.name,
                )

                self.assertEquals(
                    result.table.table.to_pydict(),
                    {
                        "A": [2.5, 5.0, 7.5],
                        "B": ["aXX", "bXX", "cXX"]
                    },
                )
 def test_table_metadata_to_thrift(self):
     self.assertEqual(
         types.TableMetadata(
             4,
             [
                 types.Column("A", types.ColumnType.Text()),
                 types.Column("B", types.ColumnType.Text()),
             ],
         ).to_thrift(),
         ttypes.TableMetadata(
             4,
             [
                 ttypes.Column(
                     "A", ttypes.ColumnType(text_type=ttypes.ColumnTypeText())
                 ),
                 ttypes.Column(
                     "B", ttypes.ColumnType(text_type=ttypes.ColumnTypeText())
                 ),
             ],
         ),
     )
Example #9
0
 def test_dataframe_uint8_column(self):
     assert_arrow_table_equals(
         dataframe_to_arrow_table(
             pd.DataFrame({"A": [1, 2, 3, 253]}, dtype=np.uint8),
             [Column("A", ColumnType.NUMBER("{:,d}"))],
             self.path,
         ),
         arrow_table(
             {"A": pyarrow.array([1, 2, 3, 253], type=pyarrow.uint8())},
             [atypes.Column("A", atypes.ColumnType.Number("{:,d}"))],
         ),
     )
Example #10
0
 def test_arrow_timestamp_column(self):
     dataframe, columns = arrow_table_to_dataframe(
         arrow_table(
             {
                 "A":
                 pyarrow.array(
                     [dt.fromisoformat("2019-09-17T21:21:00.123456"), None],
                     type=pyarrow.timestamp(unit="ns", tz=None),
                 )
             },
             [atypes.Column("A", ColumnType.Timestamp())],
         ))
     assert_frame_equal(
         dataframe,
         pd.DataFrame({"A": ["2019-09-17T21:21:00.123456Z", None]},
                      dtype="datetime64[ns]"),
     )
     self.assertEqual(columns, [Column("A", ColumnType.Timestamp())])
Example #11
0
 def test_dataframe_datetime_column(self):
     assert_arrow_table_equals(
         dataframe_to_arrow_table(
             pd.DataFrame(
                 {"A": ["2019-09-17T21:21:00.123456Z", None]}, dtype="datetime64[ns]"
             ),
             [Column("A", ColumnType.DATETIME())],
             self.path,
         ),
         arrow_table(
             {
                 "A": pyarrow.array(
                     [dt.fromisoformat("2019-09-17T21:21:00.123456"), None],
                     type=pyarrow.timestamp(unit="ns", tz=None),
                 )
             },
             [atypes.Column("A", atypes.ColumnType.Datetime())],
         ),
     )
Example #12
0
 def test_column_from_dict(self):
     self.assertEqual(
         fields._dict_to_column({"name": "A", "type": "number", "format": "{:d}"}),
         types.Column("A", types.ColumnType.Number("{:d}")),
     )
Example #13
0
 def test_to_arrow(self):
     self.assertEqual(
         Column("A", ColumnType.NUMBER("{:,d}")).to_arrow(),
         atypes.Column("A", atypes.ColumnType.Number("{:,d}")),
     )
Example #14
0
 def test_from_arrow(self):
     self.assertEqual(
         Column.from_arrow(atypes.Column("A", atypes.ColumnType.Number("{:,d}"))),
         Column("A", ColumnType.NUMBER("{:,d}")),
     )