コード例 #1
0
    def test_metadata_does_not_require_file_read(self):
        columns = [
            Column("A", ColumnType.Number(format="{:,.2f}")),
            Column("B", ColumnType.Timestamp()),
            Column("C", ColumnType.Text()),
            Column("D", ColumnType.Date("month")),
        ]
        with arrow_table_context(
            make_column("A", [1], format="{:,.2f}"),
            make_column("B", [datetime.datetime(2021, 4, 13)]),
            make_column("C", ["c"]),
            make_column("D", [datetime.date(2021, 4, 1)], unit="month"),
        ) as (path, table):
            result = LoadedRenderResult(
                path=path, table=table, columns=columns, errors=[], json={}
            )
            cache_render_result(self.workflow, self.step, 1, result)
        # Delete from disk entirely, to prove we did not read.
        s3.remove(BUCKET, crr_parquet_key(self.step.cached_render_result))

        # Load _new_ CachedRenderResult -- from DB columns, not memory
        fresh_step = Step.objects.get(id=self.step.id)
        cached_result = fresh_step.cached_render_result

        self.assertEqual(cached_result.table_metadata, TableMetadata(1, columns))
コード例 #2
0
    def test_render_with_input_columns(self):
        def render(table, params, *, input_columns):
            self.assertEqual(
                input_columns,
                {
                    "A": ptypes.RenderColumn("A", "text", None),
                    "B": ptypes.RenderColumn("B", "number", "{:,.3f}"),
                    "C": ptypes.RenderColumn("C", "timestamp", None),
                },
            )

        with arrow_table_context(
            {
                "A": ["x"],
                "B": [1],
                "C": pa.array([datetime.now()], pa.timestamp("ns"))
            },
                columns=[
                    Column("A", ColumnType.Text()),
                    Column("B", ColumnType.Number("{:,.3f}")),
                    Column("C", ColumnType.Timestamp()),
                ],
                dir=self.basedir,
        ) as arrow_table:
            self._test_render(render, arrow_table=arrow_table)
コード例 #3
0
 def render(arrow_table, params, output_path, *, columns, **kwargs):
     # Test the "columns" kwarg
     #
     # TODO nix this! The only module that uses it is `converttotext`.
     self.assertEqual(
         columns,
         [
             Column("A", ColumnType.Number("{:,.3f}")),
             Column("B", ColumnType.Number("{:,.3f}")),
             Column("C", ColumnType.Number("{:,.3f}")),
             Column("D", ColumnType.Timestamp()),
             Column("E", ColumnType.Timestamp()),
             Column("F", ColumnType.Timestamp()),
             Column("G", ColumnType.Text()),
             Column("H", ColumnType.Text()),
             Column("I", ColumnType.Text()),
             Column("J", ColumnType.Date(unit="day")),
             Column("K", ColumnType.Date(unit="week")),
             Column("L", ColumnType.Text()),
         ],
     )
     table = pa.table(
         {
             "A": [1],
             "B": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
             "C": ["a"],
             "D": [1],
             "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
             "F": ["a"],
             "G": [1],
             "H": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
             "I": ["a"],
             "J": pa.array([date(2021, 4, 1)]),
             "K": pa.array([date(2021, 4, 12)]),
             "L": pa.array([date(2021, 4, 1)]),
         }
     )
     schema = table.schema.set(
         table.schema.get_field_index("J"),
         pa.field("J", pa.date32(), metadata={"unit": "month"}),
     )
     with pa.ipc.RecordBatchFileWriter(output_path, schema) as writer:
         writer.write_table(pa.table(table.columns, schema=schema))
     return []
コード例 #4
0
def _arrow_column_to_column(name: str, column: pyarrow.ChunkedArray) -> Column:
    if pyarrow.types.is_floating(column.type) or pyarrow.types.is_integer(
            column.type):
        column_type = ColumnType.Number("{:,}")
    elif pyarrow.types.is_timestamp(column.type):
        column_type = ColumnType.Timestamp()
    elif pyarrow.types.is_string(column.type) or pyarrow.types.is_dictionary(
            column.type):
        column_type = ColumnType.Text()
    else:
        raise RuntimeError("Unknown column type %r" % column.type)
    return Column(name, column_type)
コード例 #5
0
 def test_read_cached_render_result_slice_as_text_timestamp(self):
     with arrow_table_context(
         make_column("A", [2134213412341232967, None], pa.timestamp("ns"))
     ) as (path, table):
         result = LoadedRenderResult(
             path=path,
             table=table,
             columns=[Column("A", ColumnType.Timestamp())],
             errors=[],
             json={},
         )
         cache_render_result(self.workflow, self.step, 1, result)
     crr = self.step.cached_render_result
     self.assertEqual(
         read_cached_render_result_slice_as_text(crr, "csv", range(2), range(3)),
         "A\n2037-08-18T13:03:32.341232967Z\n",
     )
コード例 #6
0
    def test_clean_multicolumn_prompting_error_convert_to_text(self):
        # TODO make this _automatic_ instead of quick-fix?
        # ... but for now: prompt for a Quick Fix.
        input_shape = TableMetadata(
            3,
            [
                Column("A", ColumnType.Number()),
                Column("B", ColumnType.Timestamp()),
                Column("C", ColumnType.Text()),
            ],
        )
        with self.assertRaises(PromptingError) as cm:
            schema = ParamDType.Multicolumn(column_types=frozenset({"text"}))
            clean_value(schema, "A,B", input_shape)

        self.assertEqual(
            cm.exception.errors,
            [
                PromptingError.WrongColumnType(["A", "B"], None,
                                               frozenset({"text"}))
            ],
        )
コード例 #7
0
def Timestamp(name: str) -> Column:
    return Column(name, ColumnType.Timestamp())
コード例 #8
0
 def test_timestamp_ok(self):
     table = pa.table({"A": pa.array([12312312314512], pa.timestamp("ns"))})
     self.assertEqual(read_columns(table),
                      [Column("A", ColumnType.Timestamp())])
コード例 #9
0
    def test_render_arrow_table_infer_output_column_formats_from_input(self):
        input_columns = [
            Column("A", ColumnType.Number("{:,.3f}")),
            Column("B", ColumnType.Number("{:,.3f}")),
            Column("C", ColumnType.Number("{:,.3f}")),
            Column("D", ColumnType.Timestamp()),
            Column("E", ColumnType.Timestamp()),
            Column("F", ColumnType.Timestamp()),
            Column("G", ColumnType.Text()),
            Column("H", ColumnType.Text()),
            Column("I", ColumnType.Text()),
        ]

        # The param name "arrow_table" is a special case
        def render(arrow_table, params, output_path, *, columns, **kwargs):
            # Test the "columns" kwarg
            self.assertEqual(columns, input_columns)
            table = pa.table({
                "A": [1],
                "B":
                pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "C": ["a"],
                "D": [1],
                "E":
                pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "F": ["a"],
                "G": [1],
                "H":
                pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "I": ["a"],
            })
            with pa.ipc.RecordBatchFileWriter(output_path,
                                              table.schema) as writer:
                writer.write_table(table)
            return []

        with arrow_table_context(
            {
                "A": [1],
                "B": [1],
                "C": [1],
                "D": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "F": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "G": ["a"],
                "H": ["a"],
                "I": ["a"],
            },
                columns=input_columns,
                dir=self.basedir,
        ) as arrow_table:
            result = self._test_render(render, arrow_table=arrow_table)
            self.assertEqual(
                result.table.metadata.columns,
                [
                    Column("A", ColumnType.Number("{:,.3f}")),  # recalled
                    Column("B", ColumnType.Timestamp()),  # inferred
                    Column("C", ColumnType.Text()),  # inferred
                    Column("D", ColumnType.Number("{:,}")),  # inferred
                    Column("E", ColumnType.Timestamp()),  # recalled
                    Column("F", ColumnType.Text()),  # inferred
                    Column("G", ColumnType.Number("{:,}")),  # inferred
                    Column("H", ColumnType.Timestamp()),  # inferred
                    Column("I", ColumnType.Text()),  # recalled
                ],
            )
コード例 #10
0
def TIMESTAMP(name: str):
    return Column(name, ColumnType.Timestamp())