def _transfer_resource(self): """ Transfer resources from the source target to the destination target. """ # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) ####### DOWNLOAD THE RESOURCES ####### download_status = self._download_resource() # If download failed then don't continue if not download_status: return ####### PREPARE UPLOAD FROM DOWNLOAD BAG ####### # Validate the 'bag' and check for checksum mismatches self.bag = bagit.Bag(self.resource_main_dir) try: validate_bag(self.bag) except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Create a hash dictionary to compare with the hashes returned from the target after upload # If the destination target supports a hash provided by the self. then use those hashes, # otherwise create new hashes with a target supported hash. self.file_hashes, self.hash_algorithm = get_or_create_hashes_from_bag( self) ####### UPLOAD THE RESOURCES ####### upload_status = self._upload_resource() # If upload failed then don't continue if not upload_status: return ####### TRANSFER COMPLETE ####### # Transfer was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['failed_fixity'] = list( set(self.download_failed_fixity + self.upload_failed_fixity)) transfer_fixity = False if not self.download_fixity or not self.upload_fixity else True self.process_info_obj['message'] = get_action_message( 'Transfer', transfer_fixity, self.metadata_validation, self.action_metadata) write_file(self.process_info_path, self.process_info_obj, True) return
def test_action_message_with_fixity_and_metadata_errors(self): """ If get_action_message is called and fixity and metadata has failed, we need to make the user aware. """ error_message = get_action_message('Download', False, False, { "sourceTargetName": "egg", "destinationTargetName": "egg2" }) self.assertEqual( error_message, 'Download successful but with fixity and metadata errors.')
def test_action_message_with_fixity_and_metadata_errors(self): """ If get_action_message is called and fixity and metadata has failed, we need to make the user aware. """ self.keyword_enhancement_successful = True error_message = get_action_message( self, 'Download', False, False, { "sourceTargetName": "egg", "destinationTargetName": "egg2", "files": { "created": [], "updated": [], "ignored": [] } }) self.assertEqual( error_message, 'Download successful but with metadata, fixity errors.')
def _transfer_resource(self): """ Transfer resources from the source target to the destination target. """ # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) ####### DOWNLOAD THE RESOURCES ####### download_status = self._download_resource() # If download failed then don't continue if not download_status: return ####### PREPARE UPLOAD FROM DOWNLOAD BAG ####### # Validate the 'bag' and check for checksum mismatches self.bag = bagit.Bag(self.resource_main_dir) try: validate_bag(self.bag) except PresQTValidationError as e: return Response(data={'error': e.data}, status=e.status_code) # Create a hash dictionary to compare with the hashes returned from the target after upload # If the destination target supports a hash provided by the self. then use those hashes, # otherwise create new hashes with a target supported hash. self.file_hashes, self.hash_algorithm = get_or_create_hashes_from_bag( self) ####### UPLOAD THE RESOURCES ####### upload_status = self._upload_resource() # If upload failed then don't continue if not upload_status: return ####### TRANSFER COMPLETE ####### # Transfer was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['failed_fixity'] = list( set(self.download_failed_fixity + self.upload_failed_fixity)) self.process_info_obj['source_resource_id'] = self.source_resource_id self.process_info_obj[ 'destination_resource_id'] = self.destination_resource_id if self.keyword_action == 'automatic': self.process_info_obj[ 'enhanced_keywords'] = self.enhanced_keywords + self.keywords self.process_info_obj['initial_keywords'] = self.initial_keywords elif self.keyword_action == 'manual': self.process_info_obj['enhanced_keywords'] = self.keywords self.process_info_obj['initial_keywords'] = self.initial_keywords else: # no enhancement self.process_info_obj['enhanced_keywords'] = [] self.process_info_obj['initial_keywords'] = [] transfer_fixity = False if not self.download_fixity or not self.upload_fixity else True if self.fairshare_evaluator_action: self.process_info_obj[ 'message'] = "Running FAIRshare Evaluator Service, this may take several minutes..." else: self.process_info_obj[ 'message'] = "Making final metadata updates..." update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.fairshare_evaluator_action: # Do the evaluation on the newly created project's url data = { 'resource': self.func_dict["project_link"], 'executor': "PresQT", 'title': "PresQT Fair Evaluation" } # 16 is the id of the PresQT test collection response = requests.post( 'https://w3id.org/FAIR_Evaluator/collections/16/evaluate', headers={ "Content-Type": "application/json", "Accept": "application/json" }, data=json.dumps(data)) if response.status_code != 200: results = [{ "error": "FAIRshare returned an error trying to process your request." }] else: results = fairshare_results( response.json(), [1, 2, 4, 5, 6, 7, 8, 10, 13, 17, 19, 22]) else: results = [] self.process_info_obj['message'] = get_action_message( self, 'Transfer', transfer_fixity, self.metadata_validation, self.action_metadata) self.process_info_obj['status'] = 'finished' self.process_info_obj['fairshare_evaluation_results'] = results self.process_info_obj['link_to_resource'] = self.func_dict[ "project_link"] update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.email: context = { "transfer_url": self.func_dict["project_link"], "transfer_message": self.process_info_obj['message'], "enhanced_keywords": self.process_info_obj['enhanced_keywords'], "failed_fixity": self.process_info_obj['failed_fixity'], "fairshare_results_list": results } email_blaster(self.email, "PresQT Transfer Complete", context, "emails/transfer_email.html") return
def _upload_resource(self): """ Upload resources to the target and perform a fixity check on the resulting hashes. """ action = 'resource_upload' # This doesn't happen during an upload, so it won't be an error. If there is an error during # transfer this will be overwritten. self.keyword_enhancement_successful = True # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) # Data directory in the bag self.data_directory = '{}/data'.format(self.resource_main_dir) # If we are uploading (not transferring) then create the initial metadata based on the # zipped bag provided. if self.action == 'resource_upload': update_process_info_message(self.process_info_path, self.action, "Creating PRESQT_FTS_METADATA...") self.new_fts_metadata_files = [] for path, subdirs, files in os.walk(self.data_directory): for name in files: self.new_fts_metadata_files.append({ 'destinationHashes': {}, 'destinationPath': os.path.join(path, name)[len(self.data_directory):], 'failedFixityInfo': [], 'title': name, 'sourceHashes': { self.hash_algorithm: self.file_hashes[os.path.join(path, name)] }, 'sourcePath': os.path.join(path, name)[len(self.data_directory):], 'extra': {} }) destination_target_data = get_target_data( self.destination_target_name) self.details = "PresQT Upload to {}".format( destination_target_data['readable_name']) self.action_metadata = { 'id': str(uuid4()), 'details': self.details, 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': 'Local Machine', 'sourceUsername': None, 'destinationTargetName': self.destination_target_name, 'destinationUsername': None, 'keywords': {}, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # If the target destination's storage hierarchy has a finite depth then zip the resources # to be uploaded along with their metadata. # Also, create metadata files for the new zip file to be uploaded. if self.infinite_depth is False: try: structure_validation(self) finite_depth_upload_helper(self) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['upload_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta( hours=1)) update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) return False # Fetch the proper function to call func = FunctionRouter.get_function(self.destination_target_name, action) # Upload the resources. func_dict has the following format: # { # 'resources_ignored': resources_ignored, # 'resources_updated': resources_updated, # 'action_metadata': action_metadata, # 'file_metadata_list': file_metadata_list, # 'project_id': title # } try: structure_validation(self) self.func_dict = func(self.destination_token, self.destination_resource_id, self.data_directory, self.hash_algorithm, self.file_duplicate_action, self.process_info_path, self.action) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['upload_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 hours to 1 hour from now. We can delete this faster # because it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) return False self.process_info_obj = read_file(self.process_info_path, True)[self.action] # Check if fixity has failed on any files during a transfer. If so, update the # process_info_data file. self.upload_fixity = True self.upload_failed_fixity = [] for resource in self.func_dict['file_metadata_list']: resource['failed_fixity_info'] = [] if resource['destinationHash'] != self.file_hashes[resource['actionRootPath']] \ and resource['actionRootPath'] not in self.func_dict['resources_ignored']: self.upload_fixity = False self.upload_failed_fixity.append( resource['actionRootPath'][len(self.data_directory):]) resource['failed_fixity_info'].append({ 'NewGeneratedHash': self.file_hashes[resource['actionRootPath']], 'algorithmUsed': self.hash_algorithm, 'reasonFixityFailed': "Either the destination did not provide a hash " "or fixity failed during upload." }) # Strip the server created directory prefix of the file paths for ignored and updated files resources_ignored = [ file[len(self.data_directory):] for file in self.func_dict['resources_ignored'] ] self.process_info_obj['resources_ignored'] = resources_ignored resources_updated = [ file[len(self.data_directory):] for file in self.func_dict['resources_updated'] ] self.process_info_obj['resources_updated'] = resources_updated if self.action == 'resource_transfer_in': self.keyword_enhancement_successful = True if not self.destination_resource_id: self.destination_resource_id = self.func_dict['project_id'] if self.supports_keywords: self.keyword_enhancement_successful, self.destination_initial_keywords = update_targets_keywords( self, self.func_dict['project_id']) # Add the destination initial keywords to all keywords for accurate metadata list self.all_keywords = self.all_keywords + self.destination_initial_keywords self.metadata_validation = create_upload_metadata( self, self.func_dict['file_metadata_list'], self.func_dict['action_metadata'], self.func_dict['project_id'], resources_ignored, resources_updated) # Increment process_info one last time increment_process_info(self.process_info_path, self.action, 'upload') # Validate the final metadata upload_message = get_action_message(self, 'Upload', self.upload_fixity, self.metadata_validation, self.action_metadata) self.process_info_obj['message'] = upload_message if self.action == 'resource_upload': # Update server process file self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['hash_algorithm'] = self.hash_algorithm self.process_info_obj['failed_fixity'] = self.upload_failed_fixity self.process_info_obj['upload_status'] = upload_message self.process_info_obj['link_to_resource'] = self.func_dict[ "project_link"] update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.email: context = { "upload_url": self.func_dict["project_link"], "upload_message": upload_message, "failed_fixity": self.upload_failed_fixity } email_blaster(self.email, "PresQT Upload Complete", context, "emails/upload_email.html") return True
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id, self.process_info_path, self.action) # If the resource is being transferred, has only one file, and that file is the # PresQT metadata then raise an error. if self.action == 'resource_transfer_in' and \ len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # TODO: Functionalize this error section # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) return False # Get the latest contents of the job's process_info.json file self.process_info_obj = read_file(self.process_info_path, True)[self.action] # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) update_process_info_message( self.process_info_path, self.action, 'Performing fixity checks and gathering metadata...') self.extra_metadata = func_dict['extra_metadata'] # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.download_failed_fixity = [] self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.all_keywords = [] self.initial_keywords = [] self.manual_keywords = [] self.enhanced_keywords = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. # Note: This method of calling the function needs to stay this way for test Mock fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource or validate the metadata file if resource['title'] == 'PRESQT_FTS_METADATA.json': is_valid = validate_metadata(self, resource) if not is_valid: resource['path'] = resource['path'].replace( 'PRESQT_FTS_METADATA.json', 'INVALID_PRESQT_FTS_METADATA.json') create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) else: create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Enhance the source keywords self.keyword_dict = {} if self.action == 'resource_transfer_in': if self.supports_keywords: if self.keyword_action == 'automatic': self.keyword_dict = automatic_keywords(self) elif self.keyword_action == 'manual': self.keyword_dict = manual_keywords(self) self.keyword_enhancement_successful = True # Create PresQT action metadata update_process_info_message(self.process_info_path, self.action, "Creating PRESQT_FTS_METADATA...") self.source_username = func_dict['action_metadata']['sourceUsername'] if self.action == 'resource_transfer_in': source_target_data = get_target_data(self.source_target_name) destination_target_data = get_target_data( self.destination_target_name) self.details = "PresQT Transfer from {} to {}".format( source_target_data['readable_name'], destination_target_data['readable_name']) else: source_target_data = get_target_data(self.source_target_name) self.details = "PresQT Download from {}".format( source_target_data['readable_name']) self.action_metadata = { 'id': str(uuid4()), 'details': self.details, 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': self.source_username, 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'keywords': self.keyword_dict, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write # resources # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( self, 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create Metadata file final_fts_metadata_data = create_fts_metadata( self.all_keywords, self.action_metadata, self.source_fts_metadata_actions, self.extra_metadata) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( self, 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Write metadata file. write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.email: # Build link to retrieve the download download_reverse = reverse('job_status', kwargs={ "action": "download", "response_format": "zip" }) download_url = self.request.build_absolute_uri( download_reverse) final_download_url = "{}?ticket_number={}".format( download_url, self.ticket_number) context = { "download_url": final_download_url, "download_message": self.process_info_obj['message'], "failed_fixity": self.process_info_obj['failed_fixity'] } email_blaster(self.email, "PresQT Download Complete", context, "emails/download_email.html") return True
def _upload_resource(self): """ Upload resources to the target and perform a fixity check on the resulting hashes. """ action = 'resource_upload' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) # Data directory in the bag self.data_directory = '{}/data'.format(self.resource_main_dir) # If we are uploading (not transferring) then create the initial metadata based on the # zipped bag provided. if self.action == 'resource_upload': self.new_fts_metadata_files = [] for path, subdirs, files in os.walk(self.data_directory): for name in files: self.new_fts_metadata_files.append({ 'destinationHashes': {}, 'destinationPath': os.path.join(path, name)[len(self.data_directory):], 'failedFixityInfo': [], 'title': name, 'sourceHashes': { self.hash_algorithm: self.file_hashes[os.path.join(path, name)] }, 'sourcePath': os.path.join(path, name)[len(self.data_directory):], 'extra': {} }) self.action_metadata = { 'id': str(uuid4()), 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': 'Local Machine', 'sourceUsername': None, 'destinationTargetName': self.destination_target_name, 'destinationUsername': None, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # If the target destination's storage hierarchy has a finite depth then zip the resources # to be uploaded along with their metadata. # Also, create metadata files for the new zip file to be uploaded. if self.infinite_depth is False: try: structure_validation(self) finite_depth_upload_helper(self) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['upload_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta( hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # Fetch the proper function to call func = FunctionRouter.get_function(self.destination_target_name, action) # Upload the resources. func_dict has the following format: # { # 'resources_ignored': resources_ignored, # 'resources_updated': resources_updated, # 'action_metadata': action_metadata, # 'file_metadata_list': file_metadata_list, # 'project_id': title # } try: structure_validation(self) func_dict = func(self.destination_token, self.destination_resource_id, self.data_directory, self.hash_algorithm, self.file_duplicate_action) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['upload_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster # because it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # Check if fixity has failed on any files during a transfer. If so, update the # process_info_data file. self.upload_fixity = True self.upload_failed_fixity = [] for resource in func_dict['file_metadata_list']: resource['failed_fixity_info'] = [] if resource['destinationHash'] != self.file_hashes[resource['actionRootPath']] \ and resource['actionRootPath'] not in func_dict['resources_ignored']: self.upload_fixity = False self.upload_failed_fixity.append( resource['actionRootPath'][len(self.data_directory):]) resource['failed_fixity_info'].append({ 'NewGeneratedHash': self.file_hashes[resource['actionRootPath']], 'algorithmUsed': self.hash_algorithm, 'reasonFixityFailed': "Either the destination did not provide a hash " "or fixity failed during upload." }) # Strip the server created directory prefix of the file paths for ignored and updated files resources_ignored = [ file[len(self.data_directory):] for file in func_dict['resources_ignored'] ] self.process_info_obj['resources_ignored'] = resources_ignored resources_updated = [ file[len(self.data_directory):] for file in func_dict['resources_updated'] ] self.process_info_obj['resources_updated'] = resources_updated self.metadata_validation = create_upload_metadata( self, func_dict['file_metadata_list'], func_dict['action_metadata'], func_dict['project_id'], resources_ignored, resources_updated) # Validate the final metadata upload_message = get_action_message('Upload', self.upload_fixity, self.metadata_validation, self.action_metadata) self.process_info_obj['message'] = upload_message if self.action == 'resource_upload': # Update server process file self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['hash_algorithm'] = self.hash_algorithm self.process_info_obj['failed_fixity'] = self.upload_failed_fixity write_file(self.process_info_path, self.process_info_obj, True) else: self.process_info_obj['upload_status'] = upload_message return True
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id) # If the resource is being transferred, has only one file, and that file is PresQT # metadata then raise an error. if self.action == 'resource_transfer_in' \ and len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.download_failed_fixity = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource. Return True if a valid FTS metadata file is found. if create_download_metadata(self, resource, fixity_obj): # Don't write valid FTS metadata file. continue # Save the file to the disk. write_file('{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Create PresQT action metadata self.action_metadata = { 'id': str(uuid4()), 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': func_dict['action_metadata']['sourceUsername'], 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create and write metadata file. final_fts_metadata_data = create_fts_metadata( self.action_metadata, self.source_fts_metadata_actions) write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity write_file(self.process_info_path, self.process_info_obj, True) return True