예제 #1
0
def downloaded_parquet_file(crr: CachedRenderResult, dir=None) -> ContextManager[Path]:
    """Context manager to download and yield `path`, a hopefully-Parquet file.

    This is cheaper than open_cached_render_result() because it does not parse
    the file. Use this function when you suspect you won't need the table data.

    Raise CorruptCacheError if the cached data is missing.

    Usage:

        try:
            with rendercache.downloaded_parquet_file(crr) as path:
                # do something with `path`, a `pathlib.Path`
        except rendercache.CorruptCacheError:
            # file does not exist....
    """
    with contextlib.ExitStack() as ctx:
        try:
            path = ctx.enter_context(
                s3.temporarily_download(BUCKET, crr_parquet_key(crr), dir=dir)
            )
        except FileNotFoundError:
            raise CorruptCacheError

        yield path
예제 #2
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        create_module_zipfile(
            "mod",
            python_code=(
                "import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table"
            ),
        )
        step = workflow.tabs.first().steps.create(
            order=0, slug="step-1", module_id_name="mod"
        )
        cjwstate.modules.init_module_system()
        now = datetime.datetime.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id, step_id=step.id, now=now)
            )
        step.refresh_from_db()
        so = step.stored_objects.get(stored_at=step.stored_data_version)
        with s3.temporarily_download(s3.StoredObjectsBucket, so.key) as parquet_path:
            table = pyarrow.parquet.read_table(str(parquet_path), use_threads=False)
            assert_arrow_table_equals(table, {"A": [1]})

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
예제 #3
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        create_module_zipfile(
            "mod",
            python_code=
            ("import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table"
             ),
        )
        step = workflow.tabs.first().steps.create(order=0,
                                                  slug="step-1",
                                                  module_id_name="mod")
        cjwstate.modules.init_module_system()
        now = datetime.datetime.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id, step_id=step.id, now=now))
        step.refresh_from_db()
        so = step.stored_objects.get(stored_at=step.stored_data_version)
        with s3.temporarily_download(s3.StoredObjectsBucket,
                                     so.key) as parquet_path:
            # fetch results are stored without a schema. Let's hard-code a
            # schema simply so we can test that the table data is the same.
            table = read_parquet_as_arrow(parquet_path,
                                          [Column("A", ColumnType.Number())])
            assert_arrow_table_equals(table, make_table(make_column("A", [1])))

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
예제 #4
0
def downloaded_file(stored_object: StoredObject,
                    dir=None) -> ContextManager[Path]:
    """Context manager to download and yield `path`, the StoredObject's file.

    Raise FileNotFoundError if the object is missing.

    Usage:

        try:
            with storedobjects.downloaded_file(stored_object) as path:
                # do something with `path`, a `pathlib.Path`
        except FileNotFoundError:
            # file does not exist....
    """
    if stored_object.size == 0:
        # Some stored objects with size=0 do not have key. These are valid:
        # they represent empty files.
        return tempfile_context(prefix="storedobjects-empty-file", dir=dir)
    else:
        # raises FileNotFoundError
        return s3.temporarily_download(s3.StoredObjectsBucket,
                                       stored_object.key,
                                       dir=dir)
예제 #5
0
 def test_file_not_found(self):
     with self.assertRaises(FileNotFoundError):
         with s3.temporarily_download(Bucket, Key) as _:
             pass
예제 #6
0
 def test_allows_reading_file(self):
     _put(b"1234")
     with s3.temporarily_download(Bucket, Key) as path:
         self.assertEqual(path.read_bytes(), b"1234")