Example #1
0
def downloaded_parquet_file(crr: CachedRenderResult,
                            dir=None) -> ContextManager[Path]:
    """
    Context manager to download and yield `path`, a hopefully-Parquet file.

    This is cheaper than open_cached_render_result() because it does not parse
    the file. Use this function when you suspect you won't need the table data.

    Raise CorruptCacheError if the cached data is missing.

    Usage:

        try:
            with rendercache.downloaded_parquet_file(crr) as path:
                # do something with `path`, a `pathlib.Path`
        except rendercache.CorruptCacheError:
            # file does not exist....
    """
    with contextlib.ExitStack() as ctx:
        try:
            path = ctx.enter_context(
                minio.temporarily_download(BUCKET,
                                           crr_parquet_key(crr),
                                           dir=dir))
        except FileNotFoundError:
            raise CorruptCacheError

        yield path
Example #2
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        create_module_zipfile(
            "mod",
            python_code=
            ("import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table"
             ),
        )
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", module_id_name="mod")
        cjwstate.modules.init_module_system()
        now = timezone.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id,
                            wf_module_id=wf_module.id,
                            now=now))
        wf_module.refresh_from_db()
        so = wf_module.stored_objects.get(
            stored_at=wf_module.stored_data_version)
        with minio.temporarily_download(minio.StoredObjectsBucket,
                                        so.key) as parquet_path:
            table = pyarrow.parquet.read_table(str(parquet_path),
                                               use_threads=False)
            assert_arrow_table_equals(table, {"A": [1]})

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
Example #3
0
    def test_fetch_integration(self, send_update, queue_render):
        queue_render.side_effect = async_value(None)
        send_update.side_effect = async_value(None)
        workflow = Workflow.create_and_init()
        ModuleVersion.create_or_replace_from_spec(
            {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []},
            source_version_hash="abc123",
        )
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", module_id_name="mod"
        )
        minio.put_bytes(
            minio.ExternalModulesBucket,
            "mod/abc123/code.py",
            b"import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table",
        )
        cjwstate.modules.init_module_system()
        now = timezone.now()
        with self.assertLogs(level=logging.INFO):
            self.run_with_async_db(
                fetch.fetch(workflow_id=workflow.id, wf_module_id=wf_module.id, now=now)
            )
        wf_module.refresh_from_db()
        so = wf_module.stored_objects.get(stored_at=wf_module.stored_data_version)
        with minio.temporarily_download(so.bucket, so.key) as parquet_path:
            table = pyarrow.parquet.read_table(str(parquet_path), use_threads=False)
            assert_arrow_table_equals(table, {"A": [1]})

        workflow.refresh_from_db()
        queue_render.assert_called_with(workflow.id, workflow.last_delta_id)
        send_update.assert_called()
Example #4
0
def _download_module_zipfile_deprecated(
    output_path: Path,  # tempdir/module.abcd12.zip
    module_id: ModuleId,
    version: ModuleVersion,
    spec: Dict[str, Any],
    js_module: str,
) -> None:
    prefix = "%s/%s/" % (module_id, version)
    all_keys = minio.list_file_keys(minio.ExternalModulesBucket, prefix)
    try:
        python_code_key = next(k for k in all_keys
                               if _is_basename_python_code(k[len(prefix):]))
    except StopIteration:
        raise FileNotFoundError
    try:
        html_key = next(k for k in all_keys if k.endswith(".html"))
    except StopIteration:
        html_key = None  # there is no HTML file

    # Write to a temporary file and then move. That makes it safe to read a
    # zipfile from one thread while downloading it from another. (".develop"
    # zipfiles are mutable.)
    with zipfile.ZipFile(output_path, mode="w") as zf:
        # Write .yaml spec
        zf.writestr("%s.yaml" % module_id, json.dumps(spec).encode("utf-8"))

        # Write .js js_module
        if js_module:
            zf.writestr("%s.js" % module_id, js_module.encode("utf-8"))

        # Write .py module code
        # raise FileNotFoundError
        with minio.temporarily_download(minio.ExternalModulesBucket,
                                        python_code_key) as py_path:
            zf.write(py_path, "%s.py" % module_id)

        # Write .html file
        if html_key:
            with minio.temporarily_download(minio.ExternalModulesBucket,
                                            html_key) as html_path:
                zf.write(html_path, "%s.html" % module_id)
def _load_external_module_uncached(module_id_name: str,
                                   version_sha1: str) -> CompiledModule:
    """
    Load a Python Module given a name and version.
    """
    prefix = "%s/%s/" % (module_id_name, version_sha1)
    all_keys = minio.list_file_keys(minio.ExternalModulesBucket, prefix)
    python_code_key = next(k for k in all_keys
                           if _is_basename_python_code(k[len(prefix):]))

    # Now we can load the code into memory.
    name = "%s.%s" % (module_id_name, version_sha1)
    with minio.temporarily_download(minio.ExternalModulesBucket,
                                    python_code_key) as path:
        logger.info(f"Loading {name} from {path}")
        return cjwstate.modules.kernel.compile(path, name)
Example #6
0
def downloaded_file(stored_object: StoredObject,
                    dir=None) -> ContextManager[Path]:
    """
    Context manager to download and yield `path`, the StoredObject's file.

    Raise FileNotFoundError if the object is missing.

    Usage:

        try:
            with storedobjects.downloaded_file(stored_object) as path:
                # do something with `path`, a `pathlib.Path`
        except FileNotFoundError:
            # file does not exist....
    """
    if stored_object.size == 0:
        # Some stored objects with size=0 do not have bucket/key. These are
        # valid -- they represent empty files.
        return tempfile_context(prefix="storedobjects-empty-file", dir=dir)
    else:
        # raises FileNotFoundError
        return minio.temporarily_download(stored_object.bucket,
                                          stored_object.key,
                                          dir=dir)
Example #7
0
 def test_file_not_found(self):
     with self.assertRaises(FileNotFoundError):
         with minio.temporarily_download(Bucket, Key) as _:
             pass
Example #8
0
 def test_allows_reading_file(self):
     _put(b"1234")
     with minio.temporarily_download(Bucket, Key) as path:
         self.assertEqual(path.read_bytes(), b"1234")