def upload_unlock(upload_id: int) -> Response: """ Unlock upload workspace. Parameters ---------- upload_id : int The unique identifier for upload workspace. Returns ------- Standard Response tuple containing response content, HTTP status, and HTTP headers. """ # response_data = ERROR_REQUEST_NOT_IMPLEMENTED # status_code = status.INTERNAL_SERVER_ERROR logger.info("%s: Unlock upload workspace.", upload_id) try: # Make sure we have an upload_db_data to work with upload_db_data: Optional[Upload] = uploads.retrieve(upload_id) if upload_db_data is None: # Invalid workspace identifier raise NotFound(UPLOAD_NOT_FOUND) # Lock upload workspace # update database if upload_db_data.lock == Upload.UNLOCKED: logger.info("%s: Unlock: Workspace is already unlocked.", upload_id) else: upload_db_data.lock = Upload.UNLOCKED # Store in DB uploads.update(upload_db_data) response_data = { 'reason': UPLOAD_UNLOCKED_WORKSPACE } # Get rid of pylint error status_code = status.OK except IOError: logger.error("%s: Unlock workspace request failed ", upload_id) raise InternalServerError(CANT_DELETE_FILE) except NotFound as nf: logger.info("%s: Unlock workspace: %s", upload_id, nf) raise except Exception as ue: logger.info( "Unknown error in unlock workspace. " " Add except clauses for '%s'. DO IT NOW!", ue) raise InternalServerError(UPLOAD_UNKNOWN_ERROR) headers = {'ARXIV-OWNER': upload_db_data.owner_user_id} return response_data, status_code, headers
def sanitize_upload(upload_id: int, file: FileStorage, with_sleep: int = 15) -> Dict[str, Any]: """ Perform some expen$ive mutations on a :class:`.Thing`. Parameters ---------- upload_id : int file : FileStorage Upload file/archive to be processed. Returns ------- Still TBD """ print(f'Task: Upload task for {upload_id}') upload: Optional[Upload] = uploads.retrieve(upload_id) if upload is None: # Revisit how to handle error raise RuntimeError('No such thing! %s' % upload_id) start_datetime = datetime.now(UTC) #uploadObj = filemanager.process.Upload.process_upload(upload) uploadObj = filemanager.process.upload.Upload(upload_id) # TODO: Remember to get rid of this sleep statement time.sleep(with_sleep) # Process upload uploadObj.process_upload(file) completion_datetime = datetime.now(UTC) # Colect information we want to retain upload.lastupload_logs = str(uploadObj.get_warnings()) upload.lastupload_start_datetime = start_datetime upload.lastupload_completion_datetime = completion_datetime # Don't forget about storing file list upload.state = 'Active' # Save to DB uploads.update(upload) print(f'Task: Completed upload task for {upload_id}') return {'upload_id': upload_id, 'result': len(upload.name)}
def upload_unrelease(upload_id: int) -> Response: """ Unrelease returns released workspace to active state. Reverses previous request to release workspace. Note that unrelease request does NOT restore workspace that has already been removed from filesystem. Parameters ---------- upload_id : int The unique identifier for upload workspace. Returns ------- dict Detailed information about the upload_db_data. logs - Errors and Warnings files - list of file details int An HTTP status code. dict Some extra headers to add to the response. """ # Again, as with delete workspace, authentication, authorization, and # existence of workspace is verified in route level # Expect workspace to be in RELEASED state. logger.info("%s: Unrelease upload workspace.", upload_id) try: # Make sure we have an upload_db_data to work with upload_db_data: Optional[Upload] = uploads.retrieve(upload_id) if upload_db_data is None: # Invalid workspace identifier raise NotFound(UPLOAD_NOT_FOUND) # Unrelease upload workspace # update database if upload_db_data.state == Upload.DELETED: # logger.info(f"{upload_id}: Unrelease Failed: Workspace has been deleted.") # response_data = {'reason': UPLOAD_WORKSPACE_ALREADY_DELETED} # tatus_code = status.OK raise NotFound(UPLOAD_WORKSPACE_ALREADY_DELETED) if upload_db_data.state == Upload.ACTIVE: logger.info("%s: Unrelease: Workspace is already active.", upload_id) response_data = { 'reason': UPLOAD_UNRELEASED_WORKSPACE } # Should this be an error? status_code = status.OK elif upload_db_data.state == Upload.RELEASED: logger.info("%s: Unrelease upload workspace.", upload_id) upload_db_data.state = Upload.ACTIVE # Store in DB uploads.update(upload_db_data) response_data = {'reason': UPLOAD_UNRELEASED_WORKSPACE} status_code = status.OK except IOError: logger.error("%s: Unrelease workspace request failed.", upload_id) raise InternalServerError(CANT_DELETE_FILE) except NotFound as nf: logger.info("%s: Unrelease workspace: '%s'", upload_id, nf) raise except Exception as ue: logger.info( "Unknown error in unrelease workspace. " " Add except clauses for '%s'. DO IT NOW!", ue) raise InternalServerError(UPLOAD_UNKNOWN_ERROR) headers = {'ARXIV-OWNER': upload_db_data.owner_user_id} return response_data, status_code, headers
def upload_lock(upload_id: int) -> Response: """ Lock upload workspace. Prohibit all client operations on upload workspace. Lock may indicate process is using workspace content that otherwise might produce unknown results if workspace is updated during this process. Compile and publish are examples. Admins will be able to unlock upload workspace. Parameters ---------- upload_id : int The unique identifier for upload workspace. Returns ------- Standard Response tuple containing response content, HTTP status, and HTTP headers. """ logger.info("%s: Lock upload workspace.", upload_id) try: # Make sure we have an upload_db_data to work with upload_db_data: Optional[Upload] = uploads.retrieve(upload_id) if upload_db_data is None: # Invalid workspace identifier raise NotFound(UPLOAD_NOT_FOUND) # Lock upload workspace # update database if upload_db_data.lock == Upload.LOCKED: logger.info("%s: Lock: Workspace is already locked.", upload_id) else: upload_db_data.lock = Upload.LOCKED # Store in DB uploads.update(upload_db_data) response_data = { 'reason': UPLOAD_LOCKED_WORKSPACE } # Get rid of pylint error status_code = status.OK except IOError: logger.error("%s: Lock workspace request failed ", upload_id) raise InternalServerError(CANT_DELETE_FILE) except NotFound as nf: logger.info("%s: Lock: %s", upload_id, nf) raise except Exception as ue: logger.info( "Unknown error lock workspace. " " Add except clauses for '%s'. DO IT NOW!", ue) raise InternalServerError(UPLOAD_UNKNOWN_ERROR) headers = {'ARXIV-OWNER': upload_db_data.owner_user_id} return response_data, status_code, headers
def upload(upload_id: Optional[int], file: FileStorage, archive: str, user: auth_domain.User, ancillary: bool = False) -> Response: """ Upload individual files or compressed archive into specified workspace. Unpack, sanitize, and add files to upload workspace. Parameters ---------- upload_id : int The unique identifier for the upload_db_data in question. file : :class:`FileStorage` File archive to be processed. archive : str Archive submission is targeting. Oversize thresholds are curently specified at the archive level. ancillary : bool If ``True``, the file is to be treated as an ancillary file. This means (presently) that the file is stored in a special subdirectory within the source package. Returns ------- dict Complete summary of upload processing. int An HTTP status code. dict Some extra headers to add to the response. """ # TODO: Hook up async processing (celery/redis) - doesn't work now # TODO: Will likely delete this code if processing time is reasonable # print(f'Controller: Schedule upload_db_data task for {upload_id}') # # result = sanitize_upload.delay(upload_id, file) # # headers = {'Location': url_for('upload_api.upload_status', # task_id=result.task_id)} # return ACCEPTED, status.ACCEPTED, headers # End delete # Check arguments for basic qualities like existing and such. # File argument is required to exist and have a name associated with it. # It is standard practice that if user fails to select file the filename is null. logger.debug('Handling upload request for %s', upload_id) if file is None: # Crash and burn...not quite...do we need info about client? logger.error('Upload request is missing file/archive payload.') raise BadRequest(UPLOAD_MISSING_FILE) if file.filename == '': # Client needs to select file, or provide name to upload payload logger.error( 'Upload file is missing filename. File to upload may not be selected.' ) raise BadRequest(UPLOAD_MISSING_FILENAME) # What about archive argument. if archive is None: # TODO: Discussion about how to treat omission of archive argument. # Is this an HTTP exception? Oversize limits are configured per archive. # Or is this a warning/error returned in upload summary? # # Most submissions can get by with default size limitations so we'll add a warning # message for the upload (this will appear on upload page and get logged). This # warning will get generated in process/upload.py and not here. logger.error("Upload 'archive' not specified. Oversize calculation " "will use default values.") # If this is a new upload then we need to create a workspace and add to database. if upload_id is None: logger.debug('This is a new upload workspace.') try: logger.info( "Create new workspace: Upload request: " "file='%s' archive='%s'", file.filename, archive) user_id = str(user.user_id) if archive is None: arch = '' else: arch = archive current_time = datetime.now(UTC) new_upload = Upload(owner_user_id=user_id, archive=arch, created_datetime=current_time, modified_datetime=current_time, state=Upload.ACTIVE) # Store in DB uploads.store(new_upload) upload_id = new_upload.upload_id except IOError as e: logger.info("Error creating new workspace: %s", e) raise InternalServerError(f'{UPLOAD_IO_ERROR}: {e}') except (TypeError, ValueError) as dbe: logger.info("Error adding new workspace to database: '%s'.", dbe) raise InternalServerError(UPLOAD_DB_ERROR) except Exception as ue: logger.info( "Unknown error in upload for new workspace. " " Add except clauses for '%s'. DO IT NOW!", ue) raise InternalServerError(UPLOAD_UNKNOWN_ERROR) # At this point we expect upload to exist in system try: upload_db_data: Optional[Upload] = uploads.retrieve(upload_id) if upload_db_data is None: # Invalid workspace identifier raise NotFound(UPLOAD_NOT_FOUND) if upload_db_data.state != Upload.ACTIVE: # Do we log anything for these requests logger.debug('Forbidden, workspace not active') raise Forbidden(UPLOAD_NOT_ACTIVE) if upload_db_data.lock == Upload.LOCKED: logger.debug('Forbidden, workspace locked') raise Forbidden(UPLOAD_WORKSPACE_LOCKED) # Now handle upload package - process file or gzipped tar archive # NOTE: This will need to be migrated to task.py using Celery at # some point in future. Depends in time it takes to process # uploads.retrieve logger.info("%s: Upload files to existing " "workspace: file='%s'", upload_db_data.upload_id, file.filename) # Keep track of how long processing upload_db_data takes start_datetime = datetime.now(UTC) # Create Upload object upload_workspace = UploadWorkspace(upload_id) # Process upload_db_data upload_workspace.process_upload(file, ancillary=ancillary) completion_datetime = datetime.now(UTC) # Keep track of files processed (this included deleted files) file_list = upload_workspace.create_file_upload_summary() # Determine readiness state of upload content upload_status = Upload.READY if upload_workspace.has_errors(): upload_status = Upload.ERRORS elif upload_workspace.has_warnings(): upload_status = Upload.READY_WITH_WARNINGS # Create combine list of errors and warnings # TODO: Should I do this in Upload package?? Likely... all_errors_and_warnings = [] for warn in upload_workspace.get_warnings(): public_filepath, warning_message = warn all_errors_and_warnings.append( ['warn', public_filepath, warning_message]) for error in upload_workspace.get_errors(): public_filepath, warning_message = error # TODO: errors renamed fatal. Need to review 'errors' as to whether they are 'fatal' all_errors_and_warnings.append( ['fatal', public_filepath, warning_message]) # Prepare upload_db_data details (DB). I'm assuming that in memory Redis # is not sufficient for results that may be needed in the distant future. # errors_and_warnings = upload_workspace.get_errors() + upload_workspace.get_warnings() errors_and_warnings = all_errors_and_warnings upload_db_data.lastupload_logs = json.dumps(errors_and_warnings) upload_db_data.lastupload_start_datetime = start_datetime upload_db_data.lastupload_completion_datetime = completion_datetime upload_db_data.lastupload_file_summary = json.dumps(file_list) upload_db_data.lastupload_upload_status = upload_status upload_db_data.state = Upload.ACTIVE # Store in DB uploads.update(upload_db_data) logger.info( "%s: Processed upload. " "Saved to DB. Preparing upload summary.", upload_db_data.upload_id) # Do we want affirmative log messages after processing each request # or maybe just report errors like: # logger.info(f"{upload_db_data.upload_id}: Finished processing ...") # Upload action itself has very simple response headers = { 'Location': url_for('upload_api.upload_files', upload_id=upload_db_data.upload_id) } status_code = status.CREATED response_data = _status_data(upload_db_data, upload_workspace) logger.info("%s: Generating upload summary.", upload_db_data.upload_id) logger.debug('Response data: %s', response_data) headers.update({'ARXIV-OWNER': upload_db_data.owner_user_id}) return response_data, status_code, headers except IOError as e: logger.error("%s: File upload_db_data request failed " "for file='%s'", upload_id, file.filename) raise InternalServerError(f'{UPLOAD_IO_ERROR}: {e}') from e except (TypeError, ValueError) as dbe: logger.info("Error updating database: '%s'", dbe) raise InternalServerError(UPLOAD_DB_ERROR) except BadRequest as breq: logger.info("%s: '%s'.", upload_id, breq) raise except NotFound as nfdb: logger.info("%s: Upload: '{nfdb}'.", upload_id) raise nfdb except Forbidden as forb: logger.info("%s: Upload failed: '{forb}'.", upload_id) raise forb except Exception as ue: logger.info( "Unknown error with existing workspace." " Add except clauses for '%s'. DO IT NOW!", ue) raise InternalServerError(UPLOAD_UNKNOWN_ERROR) return None
def delete_workspace(upload_id: int) -> Response: """ Delete workspace. Parameters ---------- upload_id : int The unique identifier for the upload workspace. Returns ------- dict Complete summary of upload processing. int An HTTP status code. dict Some extra headers to add to the response. """ logger.info('%s: Deleting upload workspace.', upload_id) # Need to add several checks here # At this point I believe we know that caller is authorized to delete the # workspace. This is checked at the routes level. # Does workspace exist? Has it already been deleted? Generate 400:NotFound error. # Do we care is workspace is ACTIVE state? And not released? NO. But log it... # Do we want to stash source.log somewhere? # Do we care if workspace was modified recently...NO. Log it try: # Make sure we have an existing upload workspace to work with upload_db_data: Optional[Upload] = uploads.retrieve(upload_id) if upload_db_data is None: # invalid workspace identifier # Note: DB entry will exist for workspace that has already been # deleted raise NotFound(UPLOAD_NOT_FOUND) # Actually remove entire workspace directory structure. Log # everything to global log since source log is being removed! # Initiate workspace deletion # Update database (but keep around) for historical reference. Does not # consume very much space. What about source log? # Create Upload object if upload_db_data.state == Upload.DELETED: logger.info( "%s: Workspace has already been deleted:" "current state is '%s'", upload_id, upload_db_data.state) raise NotFound(UPLOAD_WORKSPACE_NOT_FOUND) upload_workspace = UploadWorkspace(upload_id) # Call routine that will do the actual work upload_workspace.remove_workspace() # update database if upload_db_data.state != Upload.RELEASED: logger.info("%s: Workspace currently in '%s' state.", upload_id, upload_db_data.state) upload_db_data.state = Upload.DELETED # Store in DB uploads.update(upload_db_data) except IOError: logger.error("%s: Delete workspace request failed ", upload_id) raise InternalServerError(CANT_DELETE_FILE) except NotFound as nf: logger.info("%s: Delete Workspace: '%s'", upload_id, nf) raise except Exception as ue: logger.info( "Unknown error in delete workspace. " " Add except clauses for '%s'. DO IT NOW!", ue) raise InternalServerError(UPLOAD_UNKNOWN_ERROR) # API doesn't provide for returning errors resulting from delete. # 401-unautorized and 403-forbidden are handled at routes level. # Add 400 response to openapi.yaml response_data = { 'reason': UPLOAD_DELETED_WORKSPACE } # Get rid of pylint error status_code = status.OK return response_data, status_code, {}