def duplicate(self, to_wf_module): key = _build_key(to_wf_module.workflow_id, to_wf_module.id) minio.copy(self.bucket, key, f'{self.bucket}/{self.key}') return to_wf_module.stored_objects.create(stored_at=self.stored_at, hash=self.hash, bucket=self.bucket, key=key, size=self.size)
def convert_to_uploaded_file(self, filename): """ Generate an UploadedFile and delete this InProgressUpload. Raise FileNotFoundError if the user never finished uploading. That's right: we throw an exception if the _end user_ doesn't do what we want. The user is meant to upload the file (putObject or multipart) and _then_ convert it. Callers should handle the case in which the end user asks to convert the file before the upload is complete. """ assert not self.is_completed # this InProgressUpload should not be visible key = self.get_upload_key() suffix = PurePath(filename).suffix final_key = self.wf_module.uploaded_file_prefix + str(self.id) + suffix try: minio.copy( minio.UserFilesBucket, final_key, f"{self.Bucket}/{key}", ACL="private", MetadataDirective="REPLACE", ContentDisposition=minio.encode_content_disposition(filename), ContentType="application/octet-stream", ) except minio.error.NoSuchKey: raise FileNotFoundError # Potential tricky leak here: if there's an exception, then final_key # is in S3 but nothing in the database refers to it. Careful coding of # delete_s3_data() solves this. size = minio.stat(minio.UserFilesBucket, final_key).size uploaded_file = self.wf_module.uploaded_files.create( name=filename, size=size, uuid=str(self.id), bucket=minio.UserFilesBucket, key=final_key, ) self.is_completed = True self.save(update_fields=["is_completed"]) self.delete_s3_data() return uploaded_file
def move_uploaded_file(workflow, wf_module, uploaded_file): """ Move files from /uuid.ext to /wf-1/wfm-2/uuid.ext. This helps delete leaked files and find problem files. """ from server import minio bucket = uploaded_file.bucket old_key = uploaded_file.key if '/' in old_key: return new_key = f'wf-{workflow.id}/wfm-{wf_module.id}/{old_key}' logger.info(f'Move %s/%s to %s/%s', bucket, old_key, bucket, new_key) try: minio.copy(bucket, new_key, f'{bucket}/{old_key}') minio.remove(bucket, old_key) except minio.error.NoSuchKey: # old_key is missing. Two possibilities: # # 1. We're re-running this script after it failed once with # atomic=True (which used to be set, by accident); the move already # succeeded but the DB doesn't know it. In that case, continue # because this error actually means, "all is well." # 2. The file didn't exist to begin with. In that case, write a blank # file in its stead. That way the user will remark, "hey, Workbench # ate my file!" instead of undefined behavior (which is worse). # https://www.pivotaltracker.com/story/show/163336822 if minio.exists(bucket, new_key): pass # "all is well" else: # write an empty file minio.put_bytes(bucket, new_key, b'') uploaded_file.size = 0 uploaded_file.save(update_fields=['size']) uploaded_file.key = new_key uploaded_file.save(update_fields=['key'])
def duplicate(self, to_tab): to_workflow = to_tab.workflow # Initialize but don't save new_wfm = WfModule( tab=to_tab, module_id_name=self.module_id_name, fetch_error=self.fetch_error, stored_data_version=self.stored_data_version, order=self.order, notes=self.notes, is_collapsed=self.is_collapsed, auto_update_data=False, next_update=self.next_update, update_interval=self.update_interval, last_update_check=self.last_update_check, # to_workflow has exactly one delta, and that's the version of all # its modules. This is so we can cache render results. (Cached # render results require a delta ID.) last_relevant_delta_id=to_workflow.last_delta_id, params=self.params, secrets={} # DO NOT COPY SECRETS ) # Copy cached render result, if there is one. # # If we duplicate a Workflow mid-render, the cached render result might # not have any useful data. But that's okay: just kick off a new # render. The common case (all-rendered Workflow) will produce a # fully-rendered duplicate Workflow. cached_result = self.cached_render_result if cached_result is not None: # assuming file-copy succeeds, copy cached results. # Not using `new_wfm.cache_render_result(cached_result.result)` # because that would involve reading the whole thing. new_wfm.cached_render_result_delta_id = \ new_wfm.last_relevant_delta_id for attr in ('status', 'error', 'json', 'quick_fixes', 'columns', 'nrows'): full_attr = f'cached_render_result_{attr}' setattr(new_wfm, full_attr, getattr(self, full_attr)) new_wfm.save() # so there is a new_wfm.id for parquet_key # Now new_wfm.cached_render_result will return a # CachedRenderResult, because all the DB values are set. It'll have # a .parquet_key ... but there won't be a file there (because we # never wrote it). parquet_key = new_wfm.cached_render_result.parquet_key try: minio.copy( minio.CachedRenderResultsBucket, parquet_key, '%(Bucket)s/%(Key)s' % { 'Bucket': minio.CachedRenderResultsBucket, 'Key': cached_result.parquet_key, }) except minio.error.NoSuchKey: # DB and filesystem are out of sync. CachedRenderResult handles # such cases gracefully. So `new_result` will behave exactly # like `cached_result`. pass else: new_wfm.save() # Duplicate the current stored data only, not the history if self.stored_data_version is not None: self.stored_objects.get(stored_at=self.stored_data_version) \ .duplicate(new_wfm) # Duplicate the "selected" file, if there is one; otherwise, duplicate # the most-recently-uploaded file. # # We special-case the 'upload' module because it's the only one that # has 'file' params right now. (If that ever changes, we'll want to # change a few things: upload paths should include param name, and this # test will need to check module_version to find the param name of the # file.) if self.module_id_name == 'upload': uuid = self.params['file'] uploaded_file = self.uploaded_files.filter(uuid=uuid).first() if uploaded_file is not None: new_key = uploaded_file.key.replace( self.uploaded_file_prefix, new_wfm.uploaded_file_prefix, ) assert new_key != uploaded_file.key # TODO handle file does not exist minio.copy(minio.UserFilesBucket, new_key, f'{uploaded_file.bucket}/{uploaded_file.key}') new_wfm.uploaded_files.create( created_at=uploaded_file.created_at, name=uploaded_file.name, size=uploaded_file.size, uuid=uploaded_file.uuid, bucket=minio.UserFilesBucket, key=new_key, ) return new_wfm
def duplicate(self, to_tab): to_workflow = to_tab.workflow # Initialize but don't save new_wfm = WfModule( tab=to_tab, module_id_name=self.module_id_name, fetch_error=self.fetch_error, stored_data_version=self.stored_data_version, order=self.order, notes=self.notes, is_collapsed=self.is_collapsed, auto_update_data=False, next_update=self.next_update, update_interval=self.update_interval, last_update_check=self.last_update_check, # to_workflow has exactly one delta, and that's the version of all # its modules. This is so we can cache render results. (Cached # render results require a delta ID.) last_relevant_delta_id=to_workflow.last_delta_id, params=self.params, secrets={} # DO NOT COPY SECRETS ) # Copy cached render result, if there is one. # # If we duplicate a Workflow mid-render, the cached render result might # not have any useful data. But that's okay: just kick off a new # render. The common case (all-rendered Workflow) will produce a # fully-rendered duplicate Workflow. cached_result = self.cached_render_result if cached_result is not None: # assuming file-copy succeeds, copy cached results. # Not using `new_wfm.cache_render_result(cached_result.result)` # because that would involve reading the whole thing. new_wfm.cached_render_result_delta_id = \ new_wfm.last_relevant_delta_id for attr in ('status', 'error', 'json', 'quick_fixes', 'columns', 'nrows'): full_attr = f'cached_render_result_{attr}' setattr(new_wfm, full_attr, getattr(self, full_attr)) new_wfm.save() # so there is a new_wfm.id for parquet_key # Now new_wfm.cached_render_result will return a # CachedRenderResult, because all the DB values are set. It'll have # a .parquet_key ... but there won't be a file there (because we # never wrote it). parquet_key = new_wfm.cached_render_result.parquet_key try: minio.copy( minio.CachedRenderResultsBucket, parquet_key, '%(Bucket)s/%(Key)s' % { 'Bucket': minio.CachedRenderResultsBucket, 'Key': cached_result.parquet_key, } ) except minio.error.NoSuchKey: # DB and filesystem are out of sync. CachedRenderResult handles # such cases gracefully. So `new_result` will behave exactly # like `cached_result`. pass else: new_wfm.save() # Duplicate the current stored data only, not the history if self.stored_data_version is not None: self.stored_objects.get(stored_at=self.stored_data_version) \ .duplicate(new_wfm) return new_wfm