def write_and_validate_metadata(instance, project_id, fts_metadata_data): """ Write FTS metadata to the correct place in the target's project. Also validate the FTS metadata. Parameters ---------- instance: BaseResource Class Instance Class instance for the action project_id: str ID of the project the resource metadata should be uploaded to fts_metadata_data: dict Full FTS metadata to be written. Returns ------- Returns the result of schema validation against the final FTS metadata. Will be True if valid and an error string if invalid. """ from presqt.api_v1.utilities import FunctionRouter # Get the action's metadata upload function metadata_func = FunctionRouter.get_function( instance.destination_target_name, 'metadata_upload') try: metadata_func(instance.destination_token, project_id, fts_metadata_data) except PresQTError as e: # If the upload fails then return that error metadata_validation = e else: # If the upload succeeds then return the metadata's validation string metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', fts_metadata_data) return metadata_validation
def get_upload_source_metadata(instance, bag): """ Get all FTS metadata files in the bag. If they are valid then get their contents, otherwise rename the invalid metadata file. Parameters ---------- instance: BaseResource class instance Class we want to add the attributes to bag: Bag Class instance The bag we want to traverse and update. """ instance.source_fts_metadata_actions = [] for bag_file in bag.payload_files(): if os.path.split(bag_file)[-1] == 'PRESQT_FTS_METADATA.json': metadata_path = os.path.join(instance.resource_main_dir, bag_file) source_metadata_content = read_file(metadata_path, True) # If the FTS metadata is valid then remove it from the bag and save the actions. if schema_validator('presqt/json_schemas/metadata_schema.json', source_metadata_content) is True: instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \ source_metadata_content['actions'] os.remove(os.path.join(instance.resource_main_dir, bag_file)) bag.save(manifests=True) # If the FTS metadata is invalid then rename the file in the bag. else: invalid_metadata_path = os.path.join( os.path.split(metadata_path)[0], 'INVALID_PRESQT_FTS_METADATA.json') os.rename(metadata_path, invalid_metadata_path) bag.save(manifests=True)
def test_invalid_json(self): """ Return ValidationError if invalid JSON file and JSONSchema are provided """ invalid_json = [{'name': 3}] invalid_path = 'presqt/json_schemas/tests/test_json_file.json' # Create the test JSON file with open(invalid_path, 'w') as json_file: json.dump(invalid_json, json_file) schema_validator(self.schema_path, invalid_path) # Verify that the schema_validator returns a ValidationError self.assertRaises(ValidationError) # Delete the test JSON file os.remove(invalid_path)
def create_download_metadata(instance, resource, fixity_obj): """ Add metadata for a given resource to the list of file metadata. If the resource is a valid FTS metadata file, grab its contents and don't write metadata for it. Parameters ---------- instance: BaseResource Class Instance Class instance we save metadata to. resource: Dict Resource dictionary we want metadata for. fixity_obj: Dict Dictionary of fixity information for this resource. Returns ------- True if the resource is a valid FTS metadata file. False if the resource is not a valid FTS metadata file. """ # If this is the PresQT FTS Metadata file, don't write it to disk but get its contents if resource['title'] == 'PRESQT_FTS_METADATA.json': source_fts_metadata_content = json.loads(resource['file'].decode()) # If the metadata is valid then grab it's contents and don't save it if schema_validator('presqt/json_schemas/metadata_schema.json', source_fts_metadata_content) is True: instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \ source_fts_metadata_content['actions'] return True # If the metadata is invalid rename and write it. We don't want invalid contents. else: resource['path'] = resource['path'].replace( 'PRESQT_FTS_METADATA.json', 'INVALID_PRESQT_FTS_METADATA.json') metadata = { 'destinationPath': resource['path'], 'destinationHashes': {}, 'failedFixityInfo': [], 'title': resource['title'], 'sourceHashes': resource['hashes'], 'sourcePath': resource['source_path'], 'extra': resource['extra_metadata'] } # Add fixity info to metadata if not fixity_obj['fixity']: metadata['failedFixityInfo'].append({ 'NewGeneratedHash': fixity_obj['presqt_hash'], 'algorithmUsed': fixity_obj['hash_algorithm'], 'reasonFixityFailed': fixity_obj['fixity_details'] }) # Append file metadata to fts metadata list instance.new_fts_metadata_files.append(metadata) return False
def validate_metadata(instance, resource): source_fts_metadata_content = json.loads(resource['file'].decode()) # If the metadata is valid then grab it's contents and don't save it if schema_validator('presqt/json_schemas/metadata_schema.json', source_fts_metadata_content) is True: instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \ source_fts_metadata_content['actions'] instance.all_keywords = instance.all_keywords + \ source_fts_metadata_content['allKeywords'] return True # If the metadata is invalid rename and write it. We don't want invalid contents. else: return False
def handle(self, *args, **options): """ Verify that the Target JSON file is valid against our JSON Schema """ keys_to_validate = [ 'resource_collection', 'resource_detail', 'resource_download', 'resource_upload', 'keywords', 'keywords_upload' ] validation = schema_validator('presqt/json_schemas/target_schema.json', 'presqt/specs/targets.json') failure_string = "Target JSON Schema Validation Failed!\n" \ "You've modified the targets.json in such a way that it is incorrectly " \ "formatted.\nPlease refer to the project docs." # If JSON Schema validation has failed if validation is not True: print(validation) print(failure_string) exit(0) else: # Further validation json_data = read_file('presqt/specs/targets.json', True) name_list = [] for data in json_data: # Verify that there are no duplicate name values if data['name'] in name_list: print(failure_string) exit(1) break # Verify that all actions for this target which are 'true' have a corresponding # function in FunctionRouter for it. for key, value in data['supported_actions'].items(): if key in keys_to_validate and value is True: try: getattr(FunctionRouter, f"{data['name']}_{key}") except AttributeError: print( f"{data['name']} does not have a corresponding function in FunctionRouter for " f"the attribute {key}") exit(2) else: name_list.append(data['name']) # Validation has passed! print("Target JSON Schema Validation Passed")
def get_upload_source_metadata(instance, bag): """ Get all FTS metadata files in the bag. If they are valid then get their contents, otherwise rename the invalid metadata file. Parameters ---------- instance: BaseResource class instance Class we want to add the attributes to bag: Bag Class instance The bag we want to traverse and update. """ instance.source_fts_metadata_actions = [] instance.all_keywords = [] instance.extra_metadata = {} for bag_file in bag.payload_files(): if os.path.split(bag_file)[-1] == 'PRESQT_FTS_METADATA.json': metadata_path = os.path.join(instance.resource_main_dir, bag_file) try: source_metadata_content = read_file(metadata_path, True) except JSONDecodeError: print('error!!!') raise PresQTValidationError( "PRESQT_FTS_METADATA.json is not valid JSON", status.HTTP_400_BAD_REQUEST) # If the FTS metadata is valid then remove it from the bag and save the actions. if schema_validator('presqt/json_schemas/metadata_schema.json', source_metadata_content) is True: instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \ source_metadata_content['actions'] instance.all_keywords = instance.all_keywords + \ source_metadata_content['allKeywords'] if 'extra_metadata' in source_metadata_content.keys(): instance.extra_metadata = source_metadata_content[ 'extra_metadata'] os.remove(os.path.join(instance.resource_main_dir, bag_file)) bag.save(manifests=True) # If the FTS metadata is invalid then rename the file in the bag. else: invalid_metadata_path = os.path.join( os.path.split(metadata_path)[0], 'INVALID_PRESQT_FTS_METADATA.json') os.rename(metadata_path, invalid_metadata_path) bag.save(manifests=True)
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id, self.process_info_path, self.action) # If the resource is being transferred, has only one file, and that file is the # PresQT metadata then raise an error. if self.action == 'resource_transfer_in' and \ len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # TODO: Functionalize this error section # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) return False # Get the latest contents of the job's process_info.json file self.process_info_obj = read_file(self.process_info_path, True)[self.action] # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) update_process_info_message( self.process_info_path, self.action, 'Performing fixity checks and gathering metadata...') self.extra_metadata = func_dict['extra_metadata'] # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.download_failed_fixity = [] self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.all_keywords = [] self.initial_keywords = [] self.manual_keywords = [] self.enhanced_keywords = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. # Note: This method of calling the function needs to stay this way for test Mock fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource or validate the metadata file if resource['title'] == 'PRESQT_FTS_METADATA.json': is_valid = validate_metadata(self, resource) if not is_valid: resource['path'] = resource['path'].replace( 'PRESQT_FTS_METADATA.json', 'INVALID_PRESQT_FTS_METADATA.json') create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) else: create_download_metadata(self, resource, fixity_obj) write_file( '{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Enhance the source keywords self.keyword_dict = {} if self.action == 'resource_transfer_in': if self.supports_keywords: if self.keyword_action == 'automatic': self.keyword_dict = automatic_keywords(self) elif self.keyword_action == 'manual': self.keyword_dict = manual_keywords(self) self.keyword_enhancement_successful = True # Create PresQT action metadata update_process_info_message(self.process_info_path, self.action, "Creating PRESQT_FTS_METADATA...") self.source_username = func_dict['action_metadata']['sourceUsername'] if self.action == 'resource_transfer_in': source_target_data = get_target_data(self.source_target_name) destination_target_data = get_target_data( self.destination_target_name) self.details = "PresQT Transfer from {} to {}".format( source_target_data['readable_name'], destination_target_data['readable_name']) else: source_target_data = get_target_data(self.source_target_name) self.details = "PresQT Download from {}".format( source_target_data['readable_name']) self.action_metadata = { 'id': str(uuid4()), 'details': self.details, 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': self.source_username, 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'keywords': self.keyword_dict, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write # resources # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( self, 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create Metadata file final_fts_metadata_data = create_fts_metadata( self.all_keywords, self.action_metadata, self.source_fts_metadata_actions, self.extra_metadata) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( self, 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Write metadata file. write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity update_or_create_process_info(self.process_info_obj, self.action, self.ticket_number) if self.email: # Build link to retrieve the download download_reverse = reverse('job_status', kwargs={ "action": "download", "response_format": "zip" }) download_url = self.request.build_absolute_uri( download_reverse) final_download_url = "{}?ticket_number={}".format( download_url, self.ticket_number) context = { "download_url": final_download_url, "download_message": self.process_info_obj['message'], "failed_fixity": self.process_info_obj['failed_fixity'] } email_blaster(self.email, "PresQT Download Complete", context, "emails/download_email.html") return True
def gitlab_upload_metadata(token, project_id, metadata_dict): """ Upload the metadata of this PresQT action at the top level of the repo. Parameters ---------- token : str The user's GitLab token project_id : str The id of the top level project that the upload took place on metadata_dict : dict The metadata to be written to the repo """ headers, user_id = validation_check(token) # Check if metadata exists base_post_url = "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA.json?ref=master".format( project_id) metadata_file_response = requests.get(base_post_url, headers=headers) metadata_file_data = metadata_file_response.json() request_type = requests.post # If a metadata file already exists then grab its contents if metadata_file_response.status_code == 200: base64_metadata = base64.b64decode(metadata_file_data['content']) updated_metadata = json.loads(base64_metadata) if schema_validator('presqt/json_schemas/metadata_schema.json', updated_metadata) is not True: # We need to change the file name, this metadata is improperly formatted and # therefore invalid. invalid_base64_metadata = base64.b64encode(base64_metadata) data = {"branch": "master", "commit_message": "PresQT Invalid Metadata Upload", "encoding": "base64", "content": invalid_base64_metadata} invalid_metadata_response = requests.post( 'https://gitlab.com/api/v4/projects/{}/repository/files/INVALID_PRESQT_FTS_METADATA%2Ejson'.format( project_id), headers=headers, data=data) if invalid_metadata_response.status_code != 201: raise PresQTError( "The request to rename the invalid metadata file has returned a {} error code from Gitlab.".format( invalid_metadata_response.status_code)) request_type = requests.put else: # Loop through each 'action' in both metadata files and make a new list of them. joined_actions = [entry for entry in itertools.chain(metadata_dict['actions'], updated_metadata['actions'])] joined_keywords = [entry for entry in itertools.chain(metadata_dict['allKeywords'], updated_metadata['allKeywords'])] updated_metadata['actions'] = joined_actions updated_metadata['allKeywords'] = list(set(joined_keywords)) updated_metadata_bytes = json.dumps(updated_metadata, indent=4).encode('utf-8') updated_base64_metadata = base64.b64encode(updated_metadata_bytes) data = {"branch": "master", "commit_message": "Updated PresQT Metadata Upload", "encoding": "base64", "content": updated_base64_metadata} metadata_response = requests.put( "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA%2Ejson".format( project_id), headers=headers, data=data) if metadata_response.status_code != 200: raise PresQTError( "The request to update the metadata file has returned a {} error code from Gitlab.".format( metadata_response.status_code)) return metadata_bytes = json.dumps(metadata_dict, indent=4).encode('utf-8') base64_metadata = base64.b64encode(metadata_bytes) post_url = "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA%2Ejson".format( project_id) data = {"branch": "master", "commit_message": "PresQT Metadata Upload", "encoding": "base64", "content": base64_metadata} response = request_type(post_url, headers=headers, data=data) if response.status_code != 201: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from GitLab.".format( response.status_code)) # Add extra metadata to the top level resource if 'extra_metadata' in metadata_dict.keys() and metadata_dict['extra_metadata']: attribute_url = "https://gitlab.com/api/v4/projects/{}".format(project_id) upload_extra_metadata(metadata_dict['extra_metadata'], headers, attribute_url)
def osf_upload_metadata(token, project_id, metadata_dict): """ Upload the metadata of this PresQT action at the top level of the repo. Parameters ---------- token : str The user's OSF token project_id : str The id of the top level project that the upload took place on metadata_dict : dict The metadata to be written to the project """ osf_instance = OSF(token) header = {'Authorization': 'Bearer {}'.format(token)} file_name = 'PRESQT_FTS_METADATA.json' encoded_metadata = json.dumps(metadata_dict, indent=4).encode('utf-8') put_url = "https://files.osf.io/v1/resources/{}/providers/osfstorage/" # We need to find out if this project already has metadata associated with it. project_data = osf_instance._get_all_paginated_data( 'https://api.osf.io/v2/nodes/{}/files/osfstorage'.format(project_id)) for data in project_data: if data['attributes']['name'] == file_name: old_metadata_file = requests.get(data['links']['move'], headers=header).content # Update the existing metadata updated_metadata = json.loads(old_metadata_file) if schema_validator('presqt/json_schemas/metadata_schema.json', updated_metadata) is not True: # We need to change the file name, this metadata is improperly formatted and # therefore invalid. rename_payload = { "action": "rename", "rename": "INVALID_PRESQT_FTS_METADATA.json" } response = requests.post( data['links']['move'], headers=header, data=json.dumps(rename_payload).encode('utf-8')) if response.status_code != 201: raise PresQTError( "The request to rename the invalid metadata file has returned a {} error code from OSF." .format(response.status_code)) break # Loop through each 'action' in both metadata files and make a new list of them. joined_actions = [ entry for entry in itertools.chain(metadata_dict['actions'], updated_metadata['actions']) ] joined_keywords = [ entry for entry in itertools.chain(metadata_dict['allKeywords'], updated_metadata['allKeywords']) ] updated_metadata['actions'] = joined_actions updated_metadata['allKeywords'] = list(set(joined_keywords)) encoded_metadata = json.dumps(updated_metadata, indent=4).encode('utf-8') # Now we need to update the metadata file with this updated metadata response = requests.put(data['links']['upload'], headers=header, params={'kind': 'file'}, data=encoded_metadata) # When updating an existing metadata file, OSF returns a 200 status if response.status_code != 200: raise PresQTError( "The request to update the metadata file has returned a {} error code from OSF." .format(response.status_code)) return # If there is no existing metadata file, then create a new one. response = requests.put(put_url.format(project_id), headers=header, params={"name": file_name}, data=encoded_metadata) if response.status_code != 201: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from OSF" .format(response.status_code)) # Add extra metadata to the top level resource if 'extra_metadata' in metadata_dict.keys( ) and metadata_dict['extra_metadata']: attribute_url = "https://api.osf.io/v2/nodes/{}/".format(project_id) upload_extra_metadata(metadata_dict['extra_metadata'], header, attribute_url, project_id)
def test_valid_json(self): """ Return True if valid JSON file and JSONSchema are provided """ validation = schema_validator(self.schema_path, self.target_json_path) self.assertEqual(validation, True)
def zenodo_upload_metadata(token, project_id, metadata_dict): """ Upload the metadata of this PresQT action at the top level of the project. Parameters ---------- token : str The user's Zenodo token project_id : str The id of the top level project that the upload took place on metadata_dict : dict The metadata to be written to the repo """ auth_parameter = zenodo_validation_check(token) post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( project_id) file_name = 'PRESQT_FTS_METADATA.json' project_files = requests.get(post_url, params=auth_parameter).json() for file in project_files: if file['filename'] == file_name: # Download the metadata old_metadata_file = requests.get(file['links']['download'], params=auth_parameter).content # Load the existing metadata to be updated. updated_metadata = json.loads(old_metadata_file) if schema_validator('presqt/json_schemas/metadata_schema.json', updated_metadata) is not True: # We need to change the file name, this metadata is improperly formatted and # therefore invalid. Zenodo is having issues with their put method atm....... # Need to delete the old metadata file. requests.delete(file['links']['self'], params=auth_parameter) response_status = metadata_post_request( 'INVALID_PRESQT_FTS_METADATA.json', updated_metadata, auth_parameter, post_url) if response_status != 201: raise PresQTError( "The request to rename the invalid metadata file has returned a {} error code from Zenodo." .format(response_status)) break # Need to delete the old metadata file. requests.delete(file['links']['self'], params=auth_parameter) # Loop through each 'action' in both metadata files and make a new list of them. joined_actions = [ entry for entry in itertools.chain(metadata_dict['actions'], updated_metadata['actions']) ] joined_keywords = [ entry for entry in itertools.chain(metadata_dict['allKeywords'], updated_metadata['allKeywords']) ] updated_metadata['actions'] = joined_actions updated_metadata['allKeywords'] = list(set(joined_keywords)) response_status = metadata_post_request(file_name, updated_metadata, auth_parameter, post_url) # When updating an existing metadata file, Zenodo returns a 201 status if response_status != 201: raise PresQTError( "The request to update the metadata file has returned a {} error code from Zenodo." .format(response_status)) return response_status = metadata_post_request(file_name, metadata_dict, auth_parameter, post_url) if response_status != 201: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from Zenodo." .format(response_status)) # Add extra metadata to the top level resource if 'extra_metadata' in metadata_dict.keys( ) and metadata_dict['extra_metadata']: attribute_url = "https://zenodo.org/api/deposit/depositions/{}".format( project_id) upload_extra_metadata(metadata_dict['extra_metadata'], auth_parameter, attribute_url)
def github_upload_metadata(token, project_id, metadata_dict): """ Upload the metadata of this PresQT action at the top level of the repo. Parameters ---------- token : str The user's GitHub token project_id : str The id of the top level project that the upload took place on metadata_dict : dict The metadata to be written to the repo """ header, username = validation_check(token) project_data = requests.get( "https://api.github.com/repositories/{}".format(project_id), headers=header) if project_data.status_code == 200: project_name = project_data.json()['name'] else: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from GitHub." .format(project_data.status_code)) base_put_url = "https://api.github.com/repos/{}/{}/contents/".format( username, project_name) metadata_file_data = requests.get( '{}PRESQT_FTS_METADATA.json'.format(base_put_url), headers=header).json() try: sha = metadata_file_data['sha'] except KeyError: sha = None # If a metadata file already exists then grab its contents if sha: base64_metadata = base64.b64decode(metadata_file_data['content']) updated_metadata = json.loads(base64_metadata) if schema_validator('presqt/json_schemas/metadata_schema.json', updated_metadata) is not True: # We need to change the file name, this metadata is improperly formatted and # therefore invalid. invalid_base64_metadata = base64.b64encode(base64_metadata).decode( 'utf-8') rename_payload = { "message": "PresQT Invalid Upload", "committer": { "name": "PresQT", "email": "N/A" }, "content": invalid_base64_metadata } response = requests.put('{}{}'.format( base_put_url, 'INVALID_PRESQT_FTS_METADATA.json'), headers=header, data=json.dumps(rename_payload)) if response.status_code != 201: raise PresQTError( "The request to rename the invalid metadata file has returned a {} error code from Github." .format(response.status_code)) else: # Loop through each 'action' in both metadata files and make a new list of them. joined_actions = [ entry for entry in itertools.chain(metadata_dict['actions'], updated_metadata['actions']) ] joined_keywords = [ entry for entry in itertools.chain(metadata_dict['allKeywords'], updated_metadata['allKeywords']) ] updated_metadata['actions'] = joined_actions updated_metadata['allKeywords'] = list(set(joined_keywords)) updated_metadata_bytes = json.dumps(updated_metadata, indent=4).encode('utf-8') updated_base64_metadata = base64.b64encode( updated_metadata_bytes).decode('utf-8') update_payload = { "message": "PresQT Update", "committer": { "name": "PresQT", "email": "N/A" }, "branch": "master", "content": updated_base64_metadata, "sha": sha } # Now we need to update the metadata file with this updated metadata response = requests.put('{}{}'.format(base_put_url, 'PRESQT_FTS_METADATA.json'), headers=header, data=json.dumps(update_payload)) if response.status_code != 200: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from GitHub." .format(response.status_code)) return metadata_bytes = json.dumps(metadata_dict, indent=4).encode('utf-8') base64_metadata = base64.b64encode(metadata_bytes).decode('utf-8') payload = { "message": "PresQT Upload", "sha": sha, "committer": { "name": "PresQT", "email": "N/A" }, "content": base64_metadata } response = requests.put('{}{}'.format(base_put_url, 'PRESQT_FTS_METADATA.json'), headers=header, data=json.dumps(payload)) if response.status_code != 201 and response.status_code != 200: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from GitHub." .format(response.status_code)) # Add extra metadata to the top level resource if 'extra_metadata' in metadata_dict.keys( ) and metadata_dict['extra_metadata']: attribute_url = "https://api.github.com/repos/{}/{}".format( username, project_name) upload_extra_metadata(metadata_dict['extra_metadata'], header, attribute_url)
def _download_resource(self): """ Downloads the resources from the target, performs a fixity check, zips them up in BagIt format. """ action = 'resource_download' # Write the process id to the process_info file self.process_info_obj[ 'function_process_id'] = self.function_process.pid write_file(self.process_info_path, self.process_info_obj, True) # Fetch the proper function to call func = FunctionRouter.get_function(self.source_target_name, action) # Fetch the resources. func_dict is in the format: # { # 'resources': files, # 'empty_containers': empty_containers, # 'action_metadata': action_metadata # } try: func_dict = func(self.source_token, self.source_resource_id) # If the resource is being transferred, has only one file, and that file is PresQT # metadata then raise an error. if self.action == 'resource_transfer_in' \ and len(func_dict['resources']) == 1 \ and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json': raise PresQTResponseException( 'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.', status.HTTP_400_BAD_REQUEST) except PresQTResponseException as e: # Catch any errors that happen within the target fetch. # Update the server process_info file appropriately. self.process_info_obj['status_code'] = e.status_code self.process_info_obj['status'] = 'failed' if self.action == 'resource_transfer_in': self.process_info_obj['download_status'] = 'failed' self.process_info_obj['message'] = e.data # Update the expiration from 5 days to 1 hour from now. We can delete this faster because # it's an incomplete/failed directory. self.process_info_obj['expiration'] = str(timezone.now() + relativedelta(hours=1)) write_file(self.process_info_path, self.process_info_obj, True) return False # The directory all files should be saved in. self.resource_main_dir = os.path.join(self.ticket_path, self.base_directory_name) # For each resource, perform fixity check, gather metadata, and save it to disk. fixity_info = [] self.download_fixity = True self.source_fts_metadata_actions = [] self.new_fts_metadata_files = [] self.download_failed_fixity = [] for resource in func_dict['resources']: # Perform the fixity check and add extra info to the returned fixity object. fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker( resource) fixity_info.append(fixity_obj) if not fixity_obj['fixity']: self.download_failed_fixity.append(resource['path']) # Create metadata for this resource. Return True if a valid FTS metadata file is found. if create_download_metadata(self, resource, fixity_obj): # Don't write valid FTS metadata file. continue # Save the file to the disk. write_file('{}{}'.format(self.resource_main_dir, resource['path']), resource['file']) # Create PresQT action metadata self.action_metadata = { 'id': str(uuid4()), 'actionDateTime': str(timezone.now()), 'actionType': self.action, 'sourceTargetName': self.source_target_name, 'sourceUsername': func_dict['action_metadata']['sourceUsername'], 'destinationTargetName': 'Local Machine', 'destinationUsername': None, 'files': { 'created': self.new_fts_metadata_files, 'updated': [], 'ignored': [] } } # Write empty containers to disk for container_path in func_dict['empty_containers']: # Make sure the container_path has a '/' and the beginning and end if container_path[-1] != '/': container_path += '/' if container_path[0] != '/': container_path = '/' + container_path os.makedirs( os.path.dirname('{}{}'.format(self.resource_main_dir, container_path))) # If we are transferring the downloaded resource then bag it for the resource_upload method if self.action == 'resource_transfer_in': self.action_metadata[ 'destinationTargetName'] = self.destination_target_name # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) self.process_info_obj['download_status'] = get_action_message( 'Download', self.download_fixity, True, self.action_metadata) return True # If we are only downloading the resource then create metadata, bag, zip, # and update the server process file. else: # Create and write metadata file. final_fts_metadata_data = create_fts_metadata( self.action_metadata, self.source_fts_metadata_actions) write_file( os.path.join(self.resource_main_dir, 'PRESQT_FTS_METADATA.json'), final_fts_metadata_data, True) # Validate the final metadata metadata_validation = schema_validator( 'presqt/json_schemas/metadata_schema.json', final_fts_metadata_data) self.process_info_obj['message'] = get_action_message( 'Download', self.download_fixity, metadata_validation, self.action_metadata) # Add the fixity file to the disk directory write_file( os.path.join(self.resource_main_dir, 'fixity_info.json'), fixity_info, True) # Make a BagIt 'bag' of the resources. bagit.make_bag(self.resource_main_dir, checksums=['md5', 'sha1', 'sha256', 'sha512']) # Zip the BagIt 'bag' to send forward. zip_directory(self.resource_main_dir, "{}.zip".format(self.resource_main_dir), self.ticket_path) # Everything was a success so update the server metadata file. self.process_info_obj['status_code'] = '200' self.process_info_obj['status'] = 'finished' self.process_info_obj['zip_name'] = '{}.zip'.format( self.base_directory_name) self.process_info_obj[ 'failed_fixity'] = self.download_failed_fixity write_file(self.process_info_path, self.process_info_obj, True) return True