def assign_wf_module(wf_module: 'WfModule', delta_id: int, result: ProcessResult) -> 'CachedRenderResult': """ Write `result` to `wf_module`'s fields and to disk. """ assert delta_id == wf_module.last_relevant_delta_id assert result is not None json_bytes = json.dumps(result.json).encode('utf-8') quick_fixes = result.quick_fixes wf_module.cached_render_result_delta_id = delta_id wf_module.cached_render_result_error = result.error wf_module.cached_render_result_status = result.status wf_module.cached_render_result_json = json_bytes wf_module.cached_render_result_quick_fixes = [ qf.to_dict() for qf in quick_fixes ] wf_module.cached_render_result_columns = result.columns wf_module.cached_render_result_nrows = len(result.dataframe) CachedRenderResult.delete_parquet_files_for_wf_module(wf_module) ret = CachedRenderResult.from_wf_module(wf_module) ret._result = result # no need to read from disk parquet.write(minio.CachedRenderResultsBucket, ret.parquet_key, result.dataframe) wf_module.save(update_fields=WfModuleFields) return ret
def assign_wf_module( wf_module: 'WfModule', delta_id: Optional[int], result: Optional[ProcessResult]) -> Optional['CachedRenderResult']: """ Write `result` to `wf_module`'s fields and to disk. If either argument is None, clear the fields. """ if delta_id is None or result is None: return CachedRenderResult._clear_wf_module(wf_module) if wf_module.workflow_id is None: raise ValueError('Cannot cache render result on orphan WfModule') if result: error = result.error status = result.status json_dict = result.json json_bytes = json.dumps(result.json).encode('utf-8') quick_fixes = result.quick_fixes else: error = '' status = None json_dict = None json_bytes = '' quick_fixes = [] wf_module.cached_render_result_workflow_id = wf_module.workflow_id wf_module.cached_render_result_delta_id = delta_id wf_module.cached_render_result_error = error wf_module.cached_render_result_status = status wf_module.cached_render_result_json = json_bytes wf_module.cached_render_result_quick_fixes = [ qf.to_dict() for qf in quick_fixes ] parquet_path = _parquet_path(wf_module.workflow_id, wf_module.id) os.makedirs(os.path.dirname(parquet_path), exist_ok=True) if result is None: try: os.remove(parquet_path) except FileNotFoundError: pass return None else: parquet.write(parquet_path, result.dataframe) ret = CachedRenderResult(workflow_id=wf_module.workflow_id, wf_module_id=wf_module.id, delta_id=delta_id, status=status, error=error, json=json_dict, quick_fixes=quick_fixes) ret._result = result # no need to read from disk return ret
def test_empty_categorical_has_object_dtype(self): expected = pd.DataFrame({'A': []}, dtype='str').astype('category') assert expected['A'].cat.categories.dtype == object try: parquet.write(bucket, key, expected) result = parquet.read(bucket, key) finally: minio.remove(bucket, key) assert_frame_equal(result, expected)
def __create_table_internal(wf_module, table, metadata, hash): path = StoredObject._storage_filename(wf_module.id) parquet.write(path, table) return StoredObject.objects.create(wf_module=wf_module, metadata=metadata, file=path, size=os.stat(path).st_size, stored_at=timezone.now(), hash=hash)
def test_na_only_categorical_has_object_dtype(self): # Start with a Categorical with no values. (In Workbench, all # Categoricals are text.) expected = pd.DataFrame({'A': [np.nan]}, dtype=str).astype('category') assert expected['A'].cat.categories.dtype == object try: parquet.write(bucket, key, expected) result = parquet.read(bucket, key) finally: minio.remove(bucket, key) assert_frame_equal(result, expected)
def __create_table_internal(cls, wf_module, table, hash): # Write to minio bucket/key key = _build_key(wf_module.workflow_id, wf_module.id) size = parquet.write(minio.StoredObjectsBucket, key, table) # Create the object that references the bucket/key return wf_module.stored_objects.create( bucket=minio.StoredObjectsBucket, key=key, size=size, hash=hash)