Ejemplo n.º 1
0
    def test_render_using_tab_output(self):
        def render(table, params):
            self.assertEqual(params["tabparam"].name, "Tab 1")
            self.assertEqual(
                params["tabparam"].columns,
                {
                    "X": ptypes.RenderColumn("X", "number", "{:,d}"),
                    "Y": ptypes.RenderColumn("Y", "text", None),
                },
            )
            assert_frame_equal(params["tabparam"].dataframe,
                               pd.DataFrame({
                                   "X": [1],
                                   "Y": ["y"]
                               }))

        param_schema = ParamSchema.Dict({"tabparam": ParamSchema.Tab()})
        with ModuleTestEnv(param_schema=param_schema, render=render) as env:
            with arrow_table_context(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                    dir=env.basedir,
            ) as (path, _):
                env.call_render(
                    make_table(),
                    params={"tabparam": "tab-1"},
                    tab_outputs={
                        "tab-1":
                        TabOutput(tab_name="Tab 1", table_filename=path.name)
                    },
                )
Ejemplo n.º 2
0
    def test_render_using_tab_output(self):
        def render(table, params):
            self.assertEqual(params["tabparam"].slug, "tab-1")
            self.assertEqual(params["tabparam"].name, "Tab 1")
            self.assertEqual(
                params["tabparam"].columns,
                {
                    "X": ptypes.RenderColumn("X", "number", "{:,d}"),
                    "Y": ptypes.RenderColumn("Y", "text", None),
                },
            )
            assert_frame_equal(params["tabparam"].dataframe,
                               pd.DataFrame({
                                   "X": [1],
                                   "Y": ["y"]
                               }))

        with arrow_table_context(
            {
                "X": [1],
                "Y": ["y"]
            },
                columns=[
                    Column("X", ColumnType.Number("{:,d}")),
                    Column("Y", ColumnType.Text()),
                ],
                dir=self.basedir,
        ) as atable:
            self._test_render(
                render,
                params={"tabparam": TabOutput(Tab("tab-1", "Tab 1"), atable)})
Ejemplo n.º 3
0
    def test_render_exception(self):
        module = self.kernel.compile(
            MockPath(
                ["foo.py"],
                b"import os\ndef render(table, params): raise RuntimeError('fail')",
            ),
            "foo",
        )
        with self.assertRaises(ModuleExitedError) as cm:
            with arrow_table_context({"A": [1]},
                                     dir=self.basedir) as input_table:
                input_table.path.chmod(0o644)
                with self.chroot_context.tempfile_context(
                        prefix="output-", dir=self.basedir) as output_path:
                    self.kernel.render(
                        module,
                        self.chroot_context,
                        self.basedir,
                        input_table,
                        types.Params({
                            "m": 2.5,
                            "s": "XX"
                        }),
                        types.Tab("tab-1", "Tab 1"),
                        None,
                        output_filename=output_path.name,
                    )

        self.assertEquals(cm.exception.exit_code, 1)  # Python exit code
        self.assertRegex(cm.exception.log, r"\bRuntimeError\b")
        self.assertRegex(cm.exception.log, r"\bfail\b")
        # Regression test: [2019-10-02], the "pyspawner_main()->spawn_child()"
        # process would raise _another_ exception while exiting. It would try to
        # close an already-closed socket.
        self.assertNotRegex(cm.exception.log, r"Bad file descriptor")
Ejemplo n.º 4
0
 def _test_render(
         self,
         render_fn,
         arrow_table_dict={},
         arrow_table=None,
         params={},
         tab=Tab("tab-1", "Tab 1"),
         fetch_result=None,
         output_filename=None,
 ):
     with ExitStack() as ctx:
         if arrow_table is None:
             arrow_table = ctx.enter_context(
                 arrow_table_context(arrow_table_dict, dir=self.basedir))
         ctx.enter_context(patch.object(module, "render", render_fn))
         out_filename = ctx.enter_context(
             tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 arrow_arrow_table_to_thrift(arrow_table),
                 arrow_params_to_thrift(Params(params)),
                 arrow_tab_to_thrift(tab),
                 arrow_fetch_result_to_thrift(fetch_result)
                 if fetch_result is not None else None,
                 out_filename,
             ))
         return thrift_render_result_to_arrow(thrift_result, self.basedir)
Ejemplo n.º 5
0
    def test_metadata_does_not_require_file_read(self):
        columns = [
            Column("A", ColumnType.Number(format="{:,.2f}")),
            Column("B", ColumnType.Timestamp()),
            Column("C", ColumnType.Text()),
            Column("D", ColumnType.Date("month")),
        ]
        with arrow_table_context(
            make_column("A", [1], format="{:,.2f}"),
            make_column("B", [datetime.datetime(2021, 4, 13)]),
            make_column("C", ["c"]),
            make_column("D", [datetime.date(2021, 4, 1)], unit="month"),
        ) as (path, table):
            result = LoadedRenderResult(
                path=path, table=table, columns=columns, errors=[], json={}
            )
            cache_render_result(self.workflow, self.step, 1, result)
        # Delete from disk entirely, to prove we did not read.
        s3.remove(BUCKET, crr_parquet_key(self.step.cached_render_result))

        # Load _new_ CachedRenderResult -- from DB columns, not memory
        fresh_step = Step.objects.get(id=self.step.id)
        cached_result = fresh_step.cached_render_result

        self.assertEqual(cached_result.table_metadata, TableMetadata(1, columns))
Ejemplo n.º 6
0
    def test_render_with_input_columns(self):
        def render(*args, input_columns):
            self.assertEqual(
                input_columns,
                {
                    "A": ptypes.RenderColumn("A", "text", None),
                    "B": ptypes.RenderColumn("B", "number", "{:,.3f}"),
                    "C": ptypes.RenderColumn("C", "datetime", None),
                },
            )

        with arrow_table_context(
            {
                "A": ["x"],
                "B": [1],
                "C": pa.array([datetime.now()], pa.timestamp("ns"))
            },
                columns=[
                    Column("A", ColumnType.Text()),
                    Column("B", ColumnType.Number("{:,.3f}")),
                    Column("C", ColumnType.Datetime()),
                ],
                dir=self.basedir,
        ) as arrow_table:
            self._test_render(render, arrow_table=arrow_table)
Ejemplo n.º 7
0
    def test_render_tab_outputs(self):
        def render_arrow_v1(table, params, *, tab_outputs, **kwargs):
            self.assertEqual(params["tab"], "tab-x")
            self.assertEqual(tab_outputs["tab-x"].tab_name, "Tab X")
            assert_arrow_table_equals(
                tab_outputs["tab-x"].table,
                make_table(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                ),
            )
            return ArrowRenderResult(make_table())

        param_schema = ParamSchema.Dict({"tab": ParamSchema.Tab()})
        with ModuleTestEnv(param_schema=param_schema,
                           render_arrow_v1=render_arrow_v1) as env:
            with arrow_table_context(
                    make_column("X", [1], format="{:,d}"),
                    make_column("Y", ["y"]),
                    dir=env.basedir,
            ) as (path, _):
                env.call_render(
                    make_table(),
                    params={"tab": "tab-x"},
                    tab_outputs={
                        "tab-x":
                        TabOutput(tab_name="Tab X", table_filename=path.name)
                    },
                )
Ejemplo n.º 8
0
 def test_default_render_returns_fetch_result(self):
     # Functionality used by libraryofcongress
     with ExitStack() as ctx:
         input_arrow_table = ctx.enter_context(
             arrow_table_context({"A": [1]}, dir=self.basedir)
         )
         parquet_filename = Path(
             ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name
         ).name
         out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 input_arrow_table.to_thrift(),
                 Params({}).to_thrift(),
                 ttypes.Tab("tab-1", "Tab 1"),
                 ttypes.FetchResult(
                     parquet_filename,
                     [RenderError(I18nMessage.TODO_i18n("A warning")).to_thrift()],
                 ),
                 out_filename,
             )
         )
         result = RenderResult.from_thrift(thrift_result, self.basedir)
         assert_render_result_equals(
             result,
             RenderResult(
                 arrow_table({"A": [2]}),
                 [RenderError(I18nMessage.TODO_i18n("A warning"))],
             ),
         )
Ejemplo n.º 9
0
    def test_render_use_input_columns_as_try_fallback_columns(self):
        def render(*args, input_columns):
            return pd.DataFrame({"A": [1]})

        with arrow_table_context({"A": [1]},
                                 [Column("A", ColumnType.Number("{:,.3f}"))],
                                 dir=self.basedir) as arrow_table:
            result = self._test_render(render, arrow_table=arrow_table)
            self.assertEqual(
                result.table.metadata.columns,
                [Column("A", ColumnType.Number("{:,.3f}"))],
            )
Ejemplo n.º 10
0
 def inner(
     module_zipfile,
     *,
     chroot_context,
     basedir,
     input_table,
     params,
     tab,
     fetch_result,
     output_filename,
 ):
     output_path = basedir / output_filename
     with arrow_table_context(arrow_table_dict) as arrow_table:
         shutil.copy(arrow_table.path, output_path)
         return RenderResult(table=replace(arrow_table, path=output_path))
Ejemplo n.º 11
0
 def test_invalid_parquet_is_corrupt_cache_error(self):
     with arrow_table_context(make_column("A", ["x"])) as (path, table):
         result = LoadedRenderResult(
             path=path,
             table=table,
             columns=[Column("A", ColumnType.Text())],
             errors=[],
             json={},
         )
         cache_render_result(self.workflow, self.step, 1, result)
     crr = self.step.cached_render_result
     s3.put_bytes(BUCKET, crr_parquet_key(crr), b"NOT PARQUET")
     with tempfile_context() as arrow_path:
         with self.assertRaises(CorruptCacheError):
             with open_cached_render_result(crr) as loaded:
                 pass
Ejemplo n.º 12
0
    def call_render(
        self,
        table: pa.Table,
        params: Dict[str, Any],
        tab_name: str = "Tab 1",
        tab_outputs: Dict[str, TabOutput] = {},
        fetch_result: Optional[FetchResult] = None,
        uploaded_files: Dict[str, UploadedFile] = {},
    ) -> RenderOutcome:
        """Conveniently call the module's `render_thrift()`.

        The calling convention is designed for ease of testing.
        """
        # tempfile will be deleted in __exit__().
        fd, output_filename = mkstemp(prefix="out-",
                                      suffix=".arrow",
                                      dir=self.basedir)
        os.close(fd)
        output_path = Path(output_filename)

        with arrow_table_context(table, dir=self.basedir) as (input_path, _):
            old_cwd = os.getcwd()
            os.chdir(self.basedir)
            try:
                thrift_result = cjwkernel.pandas.module.render_thrift(
                    ttypes.RenderRequest(
                        basedir=self.basedir,
                        input_filename=input_path.name,
                        params=pydict_to_thrift_json_object(params),
                        tab_name=tab_name,
                        tab_outputs={
                            k: arrow_tab_output_to_thrift(v)
                            for k, v in tab_outputs.items()
                        },
                        fetch_result=(
                            arrow_fetch_result_to_thrift(fetch_result)
                            if fetch_result is not None else None),
                        uploaded_files={
                            k: arrow_uploaded_file_to_thrift(v)
                            for k, v in uploaded_files.items()
                        },
                        output_filename=output_path.name,
                    ))
            finally:
                os.chdir(old_cwd)
            arrow_result = thrift_render_result_to_arrow(thrift_result)
            return RenderOutcome(arrow_result, output_path)
Ejemplo n.º 13
0
 def test_read_cached_render_result_slice_as_text_timestamp(self):
     with arrow_table_context(
         make_column("A", [2134213412341232967, None], pa.timestamp("ns"))
     ) as (path, table):
         result = LoadedRenderResult(
             path=path,
             table=table,
             columns=[Column("A", ColumnType.Timestamp())],
             errors=[],
             json={},
         )
         cache_render_result(self.workflow, self.step, 1, result)
     crr = self.step.cached_render_result
     self.assertEqual(
         read_cached_render_result_slice_as_text(crr, "csv", range(2), range(3)),
         "A\n2037-08-18T13:03:32.341232967Z\n",
     )
Ejemplo n.º 14
0
    def test_email_delta(self, email_delta):
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                self.run_with_async_db(
                    execute_wfmodule(
                        self.chroot_context,
                        workflow,
                        wf_module,
                        {},
                        Tab(tab.slug, tab.name),
                        RenderResult(),
                        {},
                        self.output_path,
                    ))

        email_delta.assert_called()
        delta = email_delta.call_args[0][0]
        self.assertEqual(delta.user, workflow.owner)
        self.assertEqual(delta.workflow, workflow)
        self.assertEqual(delta.wf_module, wf_module)
        self.assertEqual(delta.old_result, RenderResult(arrow_table({"A":
                                                                     [1]})))
        self.assertEqual(delta.new_result, RenderResult(arrow_table({"A":
                                                                     [2]})))
Ejemplo n.º 15
0
 def inner(
     module_zipfile,
     *,
     chroot_context,
     basedir,
     input_filename,
     params,
     tab_name,
     tab_outputs,
     uploaded_files,
     fetch_result,
     output_filename,
 ):
     output_path = basedir / output_filename
     with arrow_table_context(arrow_table) as (table_path, table):
         shutil.copy(table_path, output_path)
         return RenderResult(errors=[])
Ejemplo n.º 16
0
    def test_clear(self):
        with arrow_table_context(make_column("A", [1])) as (path, table):
            result = LoadedRenderResult(
                path=path,
                table=table,
                columns=[Column("A", ColumnType.Number(format="{:,}"))],
                errors=[],
                json={},
            )
            cache_render_result(self.workflow, self.step, 1, result)

        parquet_key = crr_parquet_key(self.step.cached_render_result)
        clear_cached_render_result_for_step(self.step)

        db_step = Step.objects.get(id=self.step.id)
        self.assertIsNone(db_step.cached_render_result)

        self.assertFalse(s3.exists(BUCKET, parquet_key))
Ejemplo n.º 17
0
    def test_email_delta_ignore_corrupt_cache_error(self, email_delta,
                                                    read_cache):
        read_cache.side_effect = rendercache.CorruptCacheError
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        wf_module = tab.wf_modules.create(
            order=0,
            slug="step-1",
            module_id_name="x",
            last_relevant_delta_id=workflow.last_delta_id - 1,
            notifications=True,
        )
        # We need to actually populate the cache to set up the test. The code
        # under test will only try to open the render result if the database
        # says there's something there.
        rendercache.cache_render_result(
            workflow,
            wf_module,
            workflow.last_delta_id - 1,
            RenderResult(arrow_table({"A": [1]})),
        )
        wf_module.last_relevant_delta_id = workflow.last_delta_id
        wf_module.save(update_fields=["last_relevant_delta_id"])

        with arrow_table_context({"A": [2]}) as table2:

            def render(*args, **kwargs):
                return RenderResult(table2)

            with self._stub_module(render):
                with self.assertLogs(level=logging.ERROR):
                    self.run_with_async_db(
                        execute_wfmodule(
                            self.chroot_context,
                            workflow,
                            wf_module,
                            {},
                            Tab(tab.slug, tab.name),
                            RenderResult(),
                            {},
                            self.output_path,
                        ))

        email_delta.assert_not_called()
Ejemplo n.º 18
0
    def test_cache_render_result(self):
        with arrow_table_context(make_column("A", [1])) as (table_path, table):
            result = LoadedRenderResult(
                path=table_path,
                table=table,
                columns=[Column("A", ColumnType.Number(format="{:,}"))],
                errors=[
                    RenderError(
                        I18nMessage("e1", {"text": "hi"}, None),
                        [
                            QuickFix(
                                I18nMessage("q1", {"var": 2}, None),
                                QuickFixAction.PrependStep("filter", {"a": "x"}),
                            )
                        ],
                    ),
                    RenderError(I18nMessage("e2", {}, None), []),
                ],
                json={"foo": "bar"},
            )
            cache_render_result(self.workflow, self.step, 1, result)

        cached = self.step.cached_render_result
        self.assertEqual(cached.step_id, self.step.id)
        self.assertEqual(cached.delta_id, 1)

        self.assertEqual(
            crr_parquet_key(cached),
            f"wf-{self.workflow.id}/wfm-{self.step.id}/delta-1.dat",
        )

        # Reading completely freshly from the DB should give the same thing
        db_step = Step.objects.get(id=self.step.id)
        from_db = db_step.cached_render_result
        self.assertEqual(from_db, cached)

        with open_cached_render_result(from_db) as result2:
            assert_arrow_table_equals(
                result2.table, make_table(make_column("A", [1], format="{:,}"))
            )
            self.assertEqual(
                result2.columns, [Column("A", ColumnType.Number(format="{:,}"))]
            )
Ejemplo n.º 19
0
    def test_load_input_cached_render_result(self):
        with arrow_table_context({"A": [1]}) as atable:
            input_render_result = RenderResult(atable)

            workflow = Workflow.create_and_init()
            step1 = workflow.tabs.first().steps.create(
                order=0, slug="step-1", last_relevant_delta_id=workflow.last_delta_id
            )
            step2 = workflow.tabs.first().steps.create(order=1, slug="step-2")
            rendercache.cache_render_result(
                workflow, step1, workflow.last_delta_id, input_render_result
            )
            result = self.run_with_async_db(
                fetch.load_database_objects(workflow.id, step2.id)
            )
            input_crr = step1.cached_render_result
            assert input_crr is not None
            self.assertEqual(result[4], input_crr)
            self.assertEqual(result.input_cached_render_result, input_crr)
Ejemplo n.º 20
0
 def test_default_render_returns_fetch_result(self):
     # Functionality used by libraryofcongress
     with ExitStack() as ctx:
         input_arrow_table = ctx.enter_context(
             arrow_table_context({"A": [1]}, dir=self.basedir)
         )
         parquet_filename = Path(
             ctx.enter_context(parquet_file({"A": [2]}, dir=self.basedir)).name
         ).name
         out_filename = ctx.enter_context(tempfile_context(dir=self.basedir)).name
         thrift_result = module.render_thrift(
             ttypes.RenderRequest(
                 str(self.basedir),
                 arrow_arrow_table_to_thrift(input_arrow_table),
                 {},  # params
                 ttypes.Tab("tab-1", "Tab 1"),
                 ttypes.FetchResult(
                     parquet_filename,
                     [
                         ttypes.RenderError(
                             ttypes.I18nMessage(
                                 "TODO_i18n",
                                 {
                                     "text": ttypes.I18nArgument(
                                         string_value="A warning"
                                     )
                                 },
                             ),
                             [],
                         )
                     ],
                 ),
                 out_filename,
             )
         )
         result = thrift_render_result_to_arrow(thrift_result, self.basedir)
         assert_render_result_equals(
             result,
             RenderResult(
                 arrow_table({"A": [2]}),
                 [RenderError(I18nMessage.TODO_i18n("A warning"))],
             ),
         )
Ejemplo n.º 21
0
    def test_render_happy_path(self):
        module = self.kernel.compile(
            MockPath(
                ["foo.py"],
                b"import pandas as pd\ndef render(table, params): return pd.DataFrame({'A': table['A'] * params['m'], 'B': table['B'] + params['s']})",
            ),
            "foo",
        )
        with arrow_table_context(
            {
                "A": [1, 2, 3],
                "B": ["a", "b", "c"]
            },
                columns=[
                    types.Column("A", types.ColumnType.Number("{:,d}")),
                    types.Column("B", types.ColumnType.Text()),
                ],
                dir=self.basedir,
        ) as input_table:
            input_table.path.chmod(0o644)
            with self.chroot_context.tempfile_context(
                    prefix="output-", dir=self.basedir) as output_path:
                result = self.kernel.render(
                    module,
                    self.chroot_context,
                    self.basedir,
                    input_table,
                    types.Params({
                        "m": 2.5,
                        "s": "XX"
                    }),
                    types.Tab("tab-1", "Tab 1"),
                    None,
                    output_filename=output_path.name,
                )

                self.assertEquals(
                    result.table.table.to_pydict(),
                    {
                        "A": [2.5, 5.0, 7.5],
                        "B": ["aXX", "bXX", "cXX"]
                    },
                )
Ejemplo n.º 22
0
 def test_render_kill_timeout(self):
     mod = _compile(
         "foo", "import time\ndef render(table, params):\n  time.sleep(2)")
     with patch.object(self.kernel, "render_timeout", 0.001):
         with self.assertRaises(ModuleTimeoutError):
             with arrow_table_context({"A": [1]},
                                      dir=self.basedir) as input_table:
                 input_table.path.chmod(0o644)
                 with self.chroot_context.tempfile_context(
                         prefix="output-", dir=self.basedir) as output_path:
                     self.kernel.render(
                         mod,
                         self.chroot_context,
                         self.basedir,
                         input_table,
                         types.Params({}),
                         types.Tab("tab-1", "Tab 1"),
                         None,
                         output_filename=output_path.name,
                     )
Ejemplo n.º 23
0
    def test_render_killed_hard_out_of_memory(self):
        # This is similar to out-of-memory kill (but with different exit_code).
        # Testing out-of-memory is slow because we have to force the kernel to,
        # er, run out of memory. On a typical dev machine, that means filling
        # swap space -- gumming up the whole system. Not practical.
        #
        # In case of out-of-memory, the Linux out-of-memory killer will find
        # and kill a process using SIGKILL.
        #
        # So let's simulate that SIGKILL.
        module = self.kernel.compile(
            MockPath(
                ["foo.py"],
                b"import os\nimport time\ndef render(table, params): os.kill(os.getpid(), 9); time.sleep(1)",
            ),
            "foo",
        )
        with self.assertRaises(ModuleExitedError) as cm:
            with arrow_table_context({"A": [1]},
                                     dir=self.basedir) as input_table:
                input_table.path.chmod(0o644)
                with tempfile_context(prefix="output-",
                                      dir=self.basedir) as output_path:
                    result = self.kernel.render(
                        module,
                        self.basedir,
                        input_table,
                        types.Params({
                            "m": 2.5,
                            "s": "XX"
                        }),
                        types.Tab("tab-1", "Tab 1"),
                        None,
                        output_filename=output_path.name,
                    )
                    print(repr(result))

        self.assertEquals(cm.exception.exit_code, -9)  # SIGKILL
        self.assertEquals(cm.exception.log, "")
Ejemplo n.º 24
0
    def test_load_dynamic(self):
        code = b"def render(table, params):\n    return table * 2"
        minio.client.put_object(
            Bucket=minio.ExternalModulesBucket,
            Key="imported/abcdef/imported.py",
            Body=code,
            ContentLength=len(code),
        )

        with self.assertLogs("cjwstate.modules.loaded_module"):
            lm = LoadedModule.for_module_version(
                MockModuleVersion("imported", "abcdef", ParamDType.Dict({}), "now")
            )

        self.assertEqual(lm.name, "imported:abcdef")

        # This ends up being kinda an integration test.
        with ExitStack() as ctx:
            basedir = Path(ctx.enter_context(tempdir_context(prefix="test-basedir-")))
            basedir.chmod(0o755)
            input_table = ctx.enter_context(
                arrow_table_context({"A": [1]}, dir=basedir)
            )
            input_table.path.chmod(0o644)
            output_tf = ctx.enter_context(tempfile.NamedTemporaryFile(dir=basedir))

            ctx.enter_context(self.assertLogs("cjwstate.modules.loaded_module"))

            result = lm.render(
                basedir=basedir,
                input_table=input_table,
                params=Params({"col": "A"}),
                tab=Tab("tab-1", "Tab 1"),
                fetch_result=None,
                output_filename=Path(output_tf.name).name,
            )

        assert_render_result_equals(result, RenderResult(arrow_table({"A": [2]})))
Ejemplo n.º 25
0
def write_to_rendercache(
    workflow: Workflow,
    step: Step,
    delta_id: int,
    table: pa.Table,
    errors: List[RenderError] = [],
    json: Dict[str, Any] = {},
) -> None:
    with arrow_table_context(table) as (path, table):
        result = LoadedRenderResult(
            path=path,
            table=table,
            columns=read_columns(table, full=False),
            errors=errors,
            json=json,
        )

        # use the caller-provided delta ID: no assertion
        old_last_relevant_delta_id = step.last_relevant_delta_id
        step.last_relevant_delta_id = delta_id
        try:
            cache_render_result(workflow, step, delta_id, result)
        finally:
            step.last_relevant_delta_id = old_last_relevant_delta_id
Ejemplo n.º 26
0
    def test_render_arrow_table_infer_output_column_formats_from_input(self):
        input_columns = [
            Column("A", ColumnType.Number("{:,.3f}")),
            Column("B", ColumnType.Number("{:,.3f}")),
            Column("C", ColumnType.Number("{:,.3f}")),
            Column("D", ColumnType.Datetime()),
            Column("E", ColumnType.Datetime()),
            Column("F", ColumnType.Datetime()),
            Column("G", ColumnType.Text()),
            Column("H", ColumnType.Text()),
            Column("I", ColumnType.Text()),
        ]
        # The param name "arrow_table" is a special case
        def render(arrow_table, params, output_path, *, columns, **kwargs):
            # Test the "columns" kwarg
            self.assertEqual(columns, input_columns)
            table = pa.table(
                {
                    "A": [1],
                    "B": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                    "C": ["a"],
                    "D": [1],
                    "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                    "F": ["a"],
                    "G": [1],
                    "H": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                    "I": ["a"],
                }
            )
            with pa.ipc.RecordBatchFileWriter(output_path, table.schema) as writer:
                writer.write_table(table)
            return []

        with arrow_table_context(
            {
                "A": [1],
                "B": [1],
                "C": [1],
                "D": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "F": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")),
                "G": ["a"],
                "H": ["a"],
                "I": ["a"],
            },
            columns=input_columns,
            dir=self.basedir,
        ) as arrow_table:
            result = self._test_render(render, arrow_table=arrow_table)
            self.assertEqual(
                result.table.metadata.columns,
                [
                    Column("A", ColumnType.Number("{:,.3f}")),  # recalled
                    Column("B", ColumnType.Datetime()),  # inferred
                    Column("C", ColumnType.Text()),  # inferred
                    Column("D", ColumnType.Number("{:,}")),  # inferred
                    Column("E", ColumnType.Datetime()),  # recalled
                    Column("F", ColumnType.Text()),  # inferred
                    Column("G", ColumnType.Number("{:,}")),  # inferred
                    Column("H", ColumnType.Datetime()),  # inferred
                    Column("I", ColumnType.Text()),  # recalled
                ],
            )
Ejemplo n.º 27
0
 def test_happy_path(self):
     with arrow_table_context(make_column("A", ["x"])) as (path, _):
         validate_arrow_file(path)  # do not raise