Esempio n. 1
0
    def assign_wf_module(wf_module: 'WfModule', delta_id: int,
                         result: ProcessResult) -> 'CachedRenderResult':
        """
        Write `result` to `wf_module`'s fields and to disk.
        """
        assert delta_id == wf_module.last_relevant_delta_id
        assert result is not None

        json_bytes = json.dumps(result.json).encode('utf-8')
        quick_fixes = result.quick_fixes

        wf_module.cached_render_result_delta_id = delta_id
        wf_module.cached_render_result_error = result.error
        wf_module.cached_render_result_status = result.status
        wf_module.cached_render_result_json = json_bytes
        wf_module.cached_render_result_quick_fixes = [
            qf.to_dict() for qf in quick_fixes
        ]
        wf_module.cached_render_result_columns = result.columns
        wf_module.cached_render_result_nrows = len(result.dataframe)

        CachedRenderResult.delete_parquet_files_for_wf_module(wf_module)

        ret = CachedRenderResult.from_wf_module(wf_module)
        ret._result = result  # no need to read from disk
        parquet.write(minio.CachedRenderResultsBucket, ret.parquet_key,
                      result.dataframe)

        wf_module.save(update_fields=WfModuleFields)

        return ret
Esempio n. 2
0
    def assign_wf_module(
            wf_module: 'WfModule', delta_id: Optional[int],
            result: Optional[ProcessResult]) -> Optional['CachedRenderResult']:
        """
        Write `result` to `wf_module`'s fields and to disk.

        If either argument is None, clear the fields.
        """
        if delta_id is None or result is None:
            return CachedRenderResult._clear_wf_module(wf_module)

        if wf_module.workflow_id is None:
            raise ValueError('Cannot cache render result on orphan WfModule')

        if result:
            error = result.error
            status = result.status
            json_dict = result.json
            json_bytes = json.dumps(result.json).encode('utf-8')
            quick_fixes = result.quick_fixes
        else:
            error = ''
            status = None
            json_dict = None
            json_bytes = ''
            quick_fixes = []

        wf_module.cached_render_result_workflow_id = wf_module.workflow_id
        wf_module.cached_render_result_delta_id = delta_id
        wf_module.cached_render_result_error = error
        wf_module.cached_render_result_status = status
        wf_module.cached_render_result_json = json_bytes
        wf_module.cached_render_result_quick_fixes = [
            qf.to_dict() for qf in quick_fixes
        ]

        parquet_path = _parquet_path(wf_module.workflow_id, wf_module.id)

        os.makedirs(os.path.dirname(parquet_path), exist_ok=True)

        if result is None:
            try:
                os.remove(parquet_path)
            except FileNotFoundError:
                pass
            return None
        else:
            parquet.write(parquet_path, result.dataframe)

            ret = CachedRenderResult(workflow_id=wf_module.workflow_id,
                                     wf_module_id=wf_module.id,
                                     delta_id=delta_id,
                                     status=status,
                                     error=error,
                                     json=json_dict,
                                     quick_fixes=quick_fixes)
            ret._result = result  # no need to read from disk
            return ret
 def test_empty_categorical_has_object_dtype(self):
     expected = pd.DataFrame({'A': []}, dtype='str').astype('category')
     assert expected['A'].cat.categories.dtype == object
     try:
         parquet.write(bucket, key, expected)
         result = parquet.read(bucket, key)
     finally:
         minio.remove(bucket, key)
     assert_frame_equal(result, expected)
Esempio n. 4
0
 def __create_table_internal(wf_module, table, metadata, hash):
     path = StoredObject._storage_filename(wf_module.id)
     parquet.write(path, table)
     return StoredObject.objects.create(wf_module=wf_module,
                                        metadata=metadata,
                                        file=path,
                                        size=os.stat(path).st_size,
                                        stored_at=timezone.now(),
                                        hash=hash)
 def test_na_only_categorical_has_object_dtype(self):
     # Start with a Categorical with no values. (In Workbench, all
     # Categoricals are text.)
     expected = pd.DataFrame({'A': [np.nan]}, dtype=str).astype('category')
     assert expected['A'].cat.categories.dtype == object
     try:
         parquet.write(bucket, key, expected)
         result = parquet.read(bucket, key)
     finally:
         minio.remove(bucket, key)
     assert_frame_equal(result, expected)
    def __create_table_internal(cls, wf_module, table, hash):
        # Write to minio bucket/key
        key = _build_key(wf_module.workflow_id, wf_module.id)
        size = parquet.write(minio.StoredObjectsBucket, key, table)

        # Create the object that references the bucket/key
        return wf_module.stored_objects.create(
            bucket=minio.StoredObjectsBucket, key=key, size=size, hash=hash)