Esempio n. 1
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        create_module_zipfile(
            "mod",
            python_code=
            ("import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table"
             ),
        )
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", module_id_name="mod")
        cjwstate.modules.init_module_system()
        now = timezone.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id,
                            wf_module_id=wf_module.id,
                            now=now))
        wf_module.refresh_from_db()
        so = wf_module.stored_objects.get(
            stored_at=wf_module.stored_data_version)
        with minio.temporarily_download(minio.StoredObjectsBucket,
                                        so.key) as parquet_path:
            table = pyarrow.parquet.read_table(str(parquet_path),
                                               use_threads=False)
            assert_arrow_table_equals(table, {"A": [1]})

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
Esempio n. 2
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        create_module_zipfile(
            "mod",
            python_code=
            ("import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table"
             ),
        )
        step = workflow.tabs.first().steps.create(order=0,
                                                  slug="step-1",
                                                  module_id_name="mod")
        cjwstate.modules.init_module_system()
        now = datetime.datetime.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id, step_id=step.id, now=now))
        step.refresh_from_db()
        so = step.stored_objects.get(stored_at=step.stored_data_version)
        with s3.temporarily_download(s3.StoredObjectsBucket,
                                     so.key) as parquet_path:
            # fetch results are stored without a schema. Let's hard-code a
            # schema simply so we can test that the table data is the same.
            table = read_parquet_as_arrow(parquet_path,
                                          [Column("A", ColumnType.Number())])
            assert_arrow_table_equals(table, make_table(make_column("A", [1])))

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
Esempio n. 3
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []},
            source_version_hash="abc123",
        )
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", module_id_name="mod"
        )
        minio.put_bytes(
            minio.ExternalModulesBucket,
            "mod/abc123/code.py",
            b"import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table",
        )
        cjwstate.modules.init_module_system()
        now = timezone.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id, wf_module_id=wf_module.id, now=now)
            )
        wf_module.refresh_from_db()
        so = wf_module.stored_objects.get(stored_at=wf_module.stored_data_version)
        with minio.temporarily_download(so.bucket, so.key) as parquet_path:
            table = pyarrow.parquet.read_table(str(parquet_path), use_threads=False)
            assert_arrow_table_equals(table, {"A": [1]})

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
Esempio n. 4
0
 def test_fetch_integration_tempfiles_are_on_disk(self, create_result):
     # /tmp is RAM; /var/tmp is disk. Assert big files go on disk.
     workflow = Workflow.create_and_init()
     create_module_zipfile(
         "mod",
         python_code=
         ("import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table"
          ),
     )
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0, slug="step-1", module_id_name="mod")
     with self.assertLogs(level=logging.INFO):
         cjwstate.modules.init_module_system()
         self.run_with_async_db(
             fetch.fetch(workflow_id=workflow.id,
                         wf_module_id=wf_module.id))
     create_result.assert_called()
     saved_result: FetchResult = create_result.call_args[0][2]
     self.assertRegex(str(saved_result.path), r"/var/tmp/")
Esempio n. 5
0
 def test_fetch_integration_tempfiles_are_on_disk(self, create_result):
     # /tmp is RAM; /var/tmp is disk. Assert big files go on disk.
     workflow = Workflow.create_and_init()
     ModuleVersion.create_or_replace_from_spec(
         {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []},
         source_version_hash="abc123",
     )
     wf_module = workflow.tabs.first().wf_modules.create(
         order=0, slug="step-1", module_id_name="mod"
     )
     minio.put_bytes(
         minio.ExternalModulesBucket,
         "mod/abc123/code.py",
         b"import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table",
     )
     with self.assertLogs(level=logging.INFO):
         cjwstate.modules.init_module_system()
         self.run_with_async_db(
             fetch.fetch(workflow_id=workflow.id, wf_module_id=wf_module.id)
         )
     create_result.assert_called()
     saved_result: FetchResult = create_result.call_args[0][2]
     self.assertRegex(str(saved_result.path), r"/var/tmp/")