def downloaded_parquet_file(crr: CachedRenderResult, dir=None) -> ContextManager[Path]: """ Context manager to download and yield `path`, a hopefully-Parquet file. This is cheaper than open_cached_render_result() because it does not parse the file. Use this function when you suspect you won't need the table data. Raise CorruptCacheError if the cached data is missing. Usage: try: with rendercache.downloaded_parquet_file(crr) as path: # do something with `path`, a `pathlib.Path` except rendercache.CorruptCacheError: # file does not exist.... """ with contextlib.ExitStack() as ctx: try: path = ctx.enter_context( minio.temporarily_download(BUCKET, crr_parquet_key(crr), dir=dir)) except FileNotFoundError: raise CorruptCacheError yield path
def test_fetch_integration(self, send_update, queue_render): queue_render.side_effect = async_value(None) send_update.side_effect = async_value(None) workflow = Workflow.create_and_init() create_module_zipfile( "mod", python_code= ("import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table" ), ) wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="mod") cjwstate.modules.init_module_system() now = timezone.now() with self.assertLogs(level=logging.INFO): self.run_with_async_db( fetch.fetch(workflow_id=workflow.id, wf_module_id=wf_module.id, now=now)) wf_module.refresh_from_db() so = wf_module.stored_objects.get( stored_at=wf_module.stored_data_version) with minio.temporarily_download(minio.StoredObjectsBucket, so.key) as parquet_path: table = pyarrow.parquet.read_table(str(parquet_path), use_threads=False) assert_arrow_table_equals(table, {"A": [1]}) workflow.refresh_from_db() queue_render.assert_called_with(workflow.id, workflow.last_delta_id) send_update.assert_called()
def test_fetch_integration(self, send_update, queue_render): queue_render.side_effect = async_value(None) send_update.side_effect = async_value(None) workflow = Workflow.create_and_init() ModuleVersion.create_or_replace_from_spec( {"id_name": "mod", "name": "Mod", "category": "Clean", "parameters": []}, source_version_hash="abc123", ) wf_module = workflow.tabs.first().wf_modules.create( order=0, slug="step-1", module_id_name="mod" ) minio.put_bytes( minio.ExternalModulesBucket, "mod/abc123/code.py", b"import pandas as pd\ndef fetch(params): return pd.DataFrame({'A': [1]})\ndef render(table, params): return table", ) cjwstate.modules.init_module_system() now = timezone.now() with self.assertLogs(level=logging.INFO): self.run_with_async_db( fetch.fetch(workflow_id=workflow.id, wf_module_id=wf_module.id, now=now) ) wf_module.refresh_from_db() so = wf_module.stored_objects.get(stored_at=wf_module.stored_data_version) with minio.temporarily_download(so.bucket, so.key) as parquet_path: table = pyarrow.parquet.read_table(str(parquet_path), use_threads=False) assert_arrow_table_equals(table, {"A": [1]}) workflow.refresh_from_db() queue_render.assert_called_with(workflow.id, workflow.last_delta_id) send_update.assert_called()
def _download_module_zipfile_deprecated( output_path: Path, # tempdir/module.abcd12.zip module_id: ModuleId, version: ModuleVersion, spec: Dict[str, Any], js_module: str, ) -> None: prefix = "%s/%s/" % (module_id, version) all_keys = minio.list_file_keys(minio.ExternalModulesBucket, prefix) try: python_code_key = next(k for k in all_keys if _is_basename_python_code(k[len(prefix):])) except StopIteration: raise FileNotFoundError try: html_key = next(k for k in all_keys if k.endswith(".html")) except StopIteration: html_key = None # there is no HTML file # Write to a temporary file and then move. That makes it safe to read a # zipfile from one thread while downloading it from another. (".develop" # zipfiles are mutable.) with zipfile.ZipFile(output_path, mode="w") as zf: # Write .yaml spec zf.writestr("%s.yaml" % module_id, json.dumps(spec).encode("utf-8")) # Write .js js_module if js_module: zf.writestr("%s.js" % module_id, js_module.encode("utf-8")) # Write .py module code # raise FileNotFoundError with minio.temporarily_download(minio.ExternalModulesBucket, python_code_key) as py_path: zf.write(py_path, "%s.py" % module_id) # Write .html file if html_key: with minio.temporarily_download(minio.ExternalModulesBucket, html_key) as html_path: zf.write(html_path, "%s.html" % module_id)
def _load_external_module_uncached(module_id_name: str, version_sha1: str) -> CompiledModule: """ Load a Python Module given a name and version. """ prefix = "%s/%s/" % (module_id_name, version_sha1) all_keys = minio.list_file_keys(minio.ExternalModulesBucket, prefix) python_code_key = next(k for k in all_keys if _is_basename_python_code(k[len(prefix):])) # Now we can load the code into memory. name = "%s.%s" % (module_id_name, version_sha1) with minio.temporarily_download(minio.ExternalModulesBucket, python_code_key) as path: logger.info(f"Loading {name} from {path}") return cjwstate.modules.kernel.compile(path, name)
def downloaded_file(stored_object: StoredObject, dir=None) -> ContextManager[Path]: """ Context manager to download and yield `path`, the StoredObject's file. Raise FileNotFoundError if the object is missing. Usage: try: with storedobjects.downloaded_file(stored_object) as path: # do something with `path`, a `pathlib.Path` except FileNotFoundError: # file does not exist.... """ if stored_object.size == 0: # Some stored objects with size=0 do not have bucket/key. These are # valid -- they represent empty files. return tempfile_context(prefix="storedobjects-empty-file", dir=dir) else: # raises FileNotFoundError return minio.temporarily_download(stored_object.bucket, stored_object.key, dir=dir)
def test_file_not_found(self): with self.assertRaises(FileNotFoundError): with minio.temporarily_download(Bucket, Key) as _: pass
def test_allows_reading_file(self): _put(b"1234") with minio.temporarily_download(Bucket, Key) as path: self.assertEqual(path.read_bytes(), b"1234")