def transfer_post_body_validation(request): """ Extract target_name and resource_id from the request body. Parameters ---------- request: HTTP Request Object Returns ------- source_target: Name of the target that owns the resource to be transferred. source_resource_id: ID of the resource to transfer. """ request_data = request.data try: source_target = request_data['source_target_name'] except KeyError: raise PresQTValidationError( "PresQT Error: source_target_name was not found in the request body.", status.HTTP_400_BAD_REQUEST) try: source_resource_id = request_data['source_resource_id'] except KeyError: raise PresQTValidationError( "PresQT Error: source_resource_id was not found in the request body.", status.HTTP_400_BAD_REQUEST) if source_resource_id is None or source_resource_id == "": raise PresQTValidationError( "PresQT Error: source_resource_id can't be None or blank.", status.HTTP_400_BAD_REQUEST) return source_target, source_resource_id
def fairshare_request_validator(request): """ Validate the request made by the user. Parameters ---------- request: dict The request made by the user. Returns ------- The resource_id and list of tests. """ try: resource_id = request.data['resource_id'] except KeyError: raise PresQTValidationError( "PresQT Error: 'resource_id' missing in the request body.", status.HTTP_400_BAD_REQUEST) try: tests = request.data['tests'] except KeyError: raise PresQTValidationError( "PresQT Error: 'tests' missing in the request body.", status.HTTP_400_BAD_REQUEST) if type(tests) is not list: raise PresQTValidationError( "PresQT Error: 'tests' must be in list format.", status.HTTP_400_BAD_REQUEST) return resource_id, tests
def keyword_post_validation(request): """ Validate that the correct keyword lists are in the POST body. Parameters ---------- request: POST request Returns ------- The list of keywords. """ try: keywords = request.data['keywords'] except KeyError: raise PresQTValidationError( "PresQT Error: 'keywords' is missing from the request body.", status.HTTP_400_BAD_REQUEST) if type(keywords) is not list: raise PresQTValidationError( "PresQT Error: 'keywords' must be in list format.", status.HTTP_400_BAD_REQUEST) return keywords
def fairshare_evaluator_validation(request): """ Perform fairshare evaluator validation for the presqt-fairshare-evaluator-opt-in header. Parameters ---------- request : HTTP request object Returns ------- Returns whether the user wants to run fairshare tests during transfer """ try: choice = request.META['HTTP_PRESQT_FAIRSHARE_EVALUATOR_OPT_IN'] except KeyError: raise PresQTValidationError( "PresQT Error: 'presqt-fairshare-evaluator-opt-in' missing in the request headers.", status.HTTP_400_BAD_REQUEST) if choice not in ['yes', 'no']: raise PresQTValidationError( "PresQT Error: 'presqt-fairshare-evaluator-opt-in' must be 'yes' or 'no'.", status.HTTP_400_BAD_REQUEST) if choice == 'yes': return True else: return False
def file_validation(request): """ Verify that the file, 'presqt-file' exists in the body of the request. Parameters ---------- request : HTTP request object Returns ------- Returns the file provided in the body named 'presqt-file' """ try: file = request.FILES['presqt-file'] except MultiValueDictKeyError: raise PresQTValidationError( "PresQT Error: The file, 'presqt-file', is not found in the body of the request.", status.HTTP_400_BAD_REQUEST) # Check if the file provided is a zip file if not zipfile.is_zipfile(file): raise PresQTValidationError( "PresQT Error: The file provided, 'presqt-file', is not a zip file.", status.HTTP_400_BAD_REQUEST) return file
def target_validation(target_name, action): """ Given a Target name and an action, determine if the target_name is a valid target in target.json and if the target supports the action. Parameters ---------- target_name : str Name of the Target. action : str Type of action the API is looking to perform on the Target Returns ------- True if the validation passes. Raises a custom ValidationException error if validation fails. """ json_data = read_file('presqt/specs/targets.json', True) for data in json_data: if data['name'] == target_name: if data["supported_actions"][action] is False: raise PresQTValidationError( "PresQT Error: '{}' does not support the action '{}'.". format(target_name, action), status.HTTP_400_BAD_REQUEST) return True, data['infinite_depth'] else: raise PresQTValidationError( "PresQT Error: '{}' is not a valid Target name.".format( target_name), status.HTTP_404_NOT_FOUND)
def fairshare_test_validator(test_list, valid_tests): """ Validate the list of tests passed by the user. Parameters ---------- test_list: list List of tests the user wants to check valid_tests: dict The tests that PresQT has identified for this project. Returns ------- The users list of tests. """ list_of_valid_tests = [ int(key.rpartition('/')[2]) for key, value in valid_tests.items() ] # Check if empty list if len(test_list) == 0: raise PresQTValidationError( "PresQT Error: At least one test is required. Options are: {}". format(list_of_valid_tests), status.HTTP_400_BAD_REQUEST) # Ensure all tests in passed in list are valid for test in test_list: if test not in list_of_valid_tests: raise PresQTValidationError( "PresQT Error: '{}' not a valid test id. Options are: {}". format(test, list_of_valid_tests), status.HTTP_400_BAD_REQUEST) return test_list
def fairshake_assessment_validator(request, rubric_id): """ Perform fairshake validation for required fields. Parameters ---------- request : HTTP request object rubric_id: str The ID of the rubric the requesting user would like to use Returns ------- Returns the answers to the rubric. """ rubric_answers = None if rubric_id != '96': try: rubric_answers = request.data['rubric_answers'] except KeyError: raise PresQTValidationError( "PresQT Error: 'rubric_answers' missing in POST body.", status.HTTP_400_BAD_REQUEST ) # Validate that rubric answers is a dict... if type(rubric_answers) is not dict: raise PresQTValidationError( "PresQT Error: 'rubric_answers' must be an object with the metric id's as the keys and answer values as the values.", status.HTTP_400_BAD_REQUEST ) test_translator = read_file( 'presqt/specs/services/fairshake/fairshake_test_fetch.json', True)[rubric_id] score_translator = read_file( 'presqt/specs/services/fairshake/fairshake_score_translator.json', True) for key, value in test_translator.items(): if key not in rubric_answers.keys(): raise PresQTValidationError( f"Missing response for metric '{key}'. Required metrics are: {list(test_translator.keys())}", status.HTTP_400_BAD_REQUEST) for key, value in rubric_answers.items(): if value not in score_translator.keys(): raise PresQTValidationError( f"'{value}' is not a valid answer. Options are: {list(score_translator.keys())}", status.HTTP_400_BAD_REQUEST) if key not in test_translator.keys(): raise PresQTValidationError( f"'{key}' is not a valid metric. Required metrics are: {list(test_translator.keys())}", status.HTTP_400_BAD_REQUEST) return rubric_answers
def keyword_action_validation(request): try: keyword_action = request.META['HTTP_PRESQT_KEYWORD_ACTION'] except KeyError: raise PresQTValidationError( "PresQT Error: 'presqt-keyword-action' missing in the request headers.", status.HTTP_400_BAD_REQUEST) if keyword_action not in ['automatic', 'manual', "none"]: raise PresQTValidationError( "PresQT Error: '{}' is not a valid keyword_action. " "The options are 'automatic', 'manual', or 'none'.".format( keyword_action), status.HTTP_400_BAD_REQUEST) return keyword_action
async def async_get(self_instance, url, session): """ Coroutine that uses aiohttp to make a GET request. This is the method that will be called asynchronously with other GETs. Parameters ---------- self_instance: Target Class Instance Instance of the Target class we are using for async calls. url: str URL to call session: ClientSession object aiohttp ClientSession Object Returns ------- Response JSON """ async with session.get(url, headers=self_instance.session.headers) as response: try: assert response.status == 200 return await response.json() except AssertionError: raise PresQTValidationError( "The source target API returned an error. Please try again.", status.HTTP_500_INTERNAL_SERVER_ERROR)
def file_duplicate_action_validation(request): try: file_duplicate_action = request.META[ 'HTTP_PRESQT_FILE_DUPLICATE_ACTION'] except KeyError: raise PresQTValidationError( "PresQT Error: 'presqt-file-duplicate-action' missing in the request headers.", status.HTTP_400_BAD_REQUEST) if file_duplicate_action not in ['ignore', 'update']: raise PresQTValidationError( "PresQT Error: '{}' is not a valid file_duplicate_action. " "The options are 'ignore' or 'update'.".format( file_duplicate_action), status.HTTP_400_BAD_REQUEST) return file_duplicate_action
def zenodo_validation_check(token): """ Ensure a proper Zenodo API token has been provided. Parameters ---------- token : str User's Zenodo token Returns ------- Properly formatted Zenodo Auth parameter. """ auth_parameter = {'access_token': token} # Gonna use the test server for now validator = requests.get("https://zenodo.org/api/deposit/depositions", params=auth_parameter).status_code if validator != 200: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) return auth_parameter
async def async_get(url, session, headers): """ Coroutine that uses aiohttp to make a GET request. This is the method that will be called asynchronously with other GETs. Parameters ---------- url: str URL to call session: ClientSession object aiohttp ClientSession Object headers: dict Necessary header for OSF calls Returns ------- Response JSON """ async with session.get(url, headers=headers) as response: try: assert response.status == 200 return await response.json() except AssertionError: if response.status == 403: #TODO: doing this to avoid private file errors look into it pass else: raise PresQTValidationError("The source target API returned an error. Please try again.", status.HTTP_500_INTERNAL_SERVER_ERROR)
def fairshake_request_validator(request, rubric_id): """ Perform fairshake validation for required fields. Parameters ---------- request : HTTP request object rubric_id: str The ID of the rubric the requesting user would like to use Returns ------- Returns the rubric id, the digital object type, project url and project title """ supported_rubrics = { "93": "repo", "94": "data", "95": "tool", "96": "repo" } if rubric_id not in supported_rubrics.keys(): raise PresQTValidationError( f"PresQT Error: '{rubric_id}' is not a valid rubric id. Options are: ['93', '94', '95', '96']", status.HTTP_400_BAD_REQUEST) request_data = request.data try: project_url = request_data['project_url'] except KeyError: raise PresQTValidationError( "PresQT Error: 'project_url' missing in POST body.", status.HTTP_400_BAD_REQUEST) try: project_title = request_data['project_title'] except KeyError: raise PresQTValidationError( "PresQT Error: 'project_title' missing in POST body.", status.HTTP_400_BAD_REQUEST) return rubric_id, supported_rubrics[rubric_id], project_url, project_title
def transfer_target_validation(source_target, destination_target): """ Validation check for pending transfer partners. Parameters ---------- source_target : str The source target (where the transfer is coming from) destination_target : str The destination target (where the transfer will end up) Raises ------ PresQT Validation Error if targets don't allow transfer to or from other target. Returns ------- True if the targets allow transfer with each other. """ json_data = read_file('presqt/specs/targets.json', True) for data in json_data: if data['name'] == source_target: if destination_target not in data['supported_transfer_partners'][ 'transfer_out']: raise PresQTValidationError( "PresQT Error: '{}' does not allow transfer to '{}'.". format(source_target, destination_target), status.HTTP_400_BAD_REQUEST) elif data['name'] == destination_target: if source_target not in data['supported_transfer_partners'][ 'transfer_in']: raise PresQTValidationError( "PresQT Error: '{}' does not allow transfer from '{}'.". format(destination_target, source_target), status.HTTP_400_BAD_REQUEST) return True
def validate_bag(bag): """ Validate that a bag is in the correct format, all checksums match, and that there are no unexpected or missing files Parameters ---------- bag : bagit.Bag The BagIt class we want to validate. """ # Verify that checksums still match and that there are no unexpected or missing files try: bag.validate() except bagit.BagValidationError as e: if e.details: if isinstance(e.details[0], bagit.ChecksumMismatch): raise PresQTValidationError("Checksums failed to validate.", status.HTTP_400_BAD_REQUEST) else: raise PresQTValidationError(str(e.details[0]), status.HTTP_400_BAD_REQUEST) else: raise PresQTValidationError(str(e), status.HTTP_400_BAD_REQUEST)
def zenodo_fetch_resources(token, search_parameter): """ Fetch all users repos from Zenodo. Parameters ---------- token : str User's Zenodo token search_parameter : dict The search parameter passed to the API View Gets passed formatted as {'title': 'search_info'} Returns ------- List of dictionary objects that represent Zenodo resources. Dictionary must be in the following format { "kind": "container", "kind_name": "folder", "id": "12345", "container": "None", "title": "Folder Name" } """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError("Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Let's build them resources if search_parameter: if 'title' in search_parameter: search_parameters = search_parameter['title'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format( search_parameters) zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits'] is_record = True elif 'id' in search_parameter: base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format(search_parameter['id']) zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits'] is_record = True else: base_url = "https://zenodo.org/api/deposit/depositions" zenodo_projects = requests.get(base_url, params=auth_parameter).json() is_record = False resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter, is_record) return resources
def get_upload_source_metadata(instance, bag): """ Get all FTS metadata files in the bag. If they are valid then get their contents, otherwise rename the invalid metadata file. Parameters ---------- instance: BaseResource class instance Class we want to add the attributes to bag: Bag Class instance The bag we want to traverse and update. """ instance.source_fts_metadata_actions = [] instance.all_keywords = [] instance.extra_metadata = {} for bag_file in bag.payload_files(): if os.path.split(bag_file)[-1] == 'PRESQT_FTS_METADATA.json': metadata_path = os.path.join(instance.resource_main_dir, bag_file) try: source_metadata_content = read_file(metadata_path, True) except JSONDecodeError: print('error!!!') raise PresQTValidationError( "PRESQT_FTS_METADATA.json is not valid JSON", status.HTTP_400_BAD_REQUEST) # If the FTS metadata is valid then remove it from the bag and save the actions. if schema_validator('presqt/json_schemas/metadata_schema.json', source_metadata_content) is True: instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \ source_metadata_content['actions'] instance.all_keywords = instance.all_keywords + \ source_metadata_content['allKeywords'] if 'extra_metadata' in source_metadata_content.keys(): instance.extra_metadata = source_metadata_content[ 'extra_metadata'] os.remove(os.path.join(instance.resource_main_dir, bag_file)) bag.save(manifests=True) # If the FTS metadata is invalid then rename the file in the bag. else: invalid_metadata_path = os.path.join( os.path.split(metadata_path)[0], 'INVALID_PRESQT_FTS_METADATA.json') os.rename(metadata_path, invalid_metadata_path) bag.save(manifests=True)
def get_process_info_action(process_info_data, action): """ Get an action's data in the process_info dict Parameters ---------- process_info_data: dict Dict gathered from the process_info.json file action: str Action data we want from process_info_data Returns ------- Dict of the action data """ try: return process_info_data[action] except KeyError: raise PresQTValidationError( "PresQT Error: A {} does not exist for this user on the server.". format(action), status.HTTP_404_NOT_FOUND)
def get_process_info_data(action, ticket_number): """ Get the JSON from process_info.json in the requested ticket number directory. Parameters ---------- action : str The action directory we should look in for the ticket_number directory ticket_number : str Requested ticket_number directory the JSON should live in Returns ------- JSON dictionary representing the process_info.json data. """ try: return read_file( 'mediafiles/{}/{}/process_info.json'.format(action, ticket_number), True) except FileNotFoundError: raise PresQTValidationError( "PresQT Error: Invalid ticket number, '{}'.".format(ticket_number), status.HTTP_404_NOT_FOUND)
def get_process_info_data(ticket_number): """ Get the JSON from process_info.json in the requested ticket number directory. Parameters ---------- ticket_number : str Requested ticket_number directory the JSON should live in Returns ------- JSON dictionary representing the process_info.json data. """ while True: try: return read_file( 'mediafiles/jobs/{}/process_info.json'.format(ticket_number), True) except json.decoder.JSONDecodeError: pass except FileNotFoundError: raise PresQTValidationError( "PresQT Error: Invalid ticket number, '{}'.".format( ticket_number), status.HTTP_404_NOT_FOUND)
def zenodo_fetch_resource(token, resource_id): """ Fetch the Zenodo resource matching the resource_id given. Parameters ---------- token : str User's Zenodo token resource_id : str ID of the resource requested Returns ------- A dictionary object that represents the Zenodo resource. Dictionary must be in the following format: { "kind": "container", "kind_name": "repo", "id": "12345", "title": "23296359282_934200ec59_o.jpg", "date_created": "2019-05-13T14:54:17.129170Z", "date_modified": "2019-05-13T14:54:17.129170Z", "hashes": { "md5": "aaca7ef067dcab7cb8d79c36243823e4", }, "extra": { "any": extra, "values": here } } """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Let's first try to get the record with this id. if len(str(resource_id)) <= 7: base_url = "https://zenodo.org/api/records/{}".format(resource_id) zenodo_project = requests.get(base_url, params=auth_parameter) if zenodo_project.status_code == 200: # We found the record, pass the project to our function. resource = zenodo_fetch_resource_helper(zenodo_project.json(), resource_id, True) else: # We need to get the resource from the depositions base_url = "https://zenodo.org/api/deposit/depositions/{}".format( resource_id) zenodo_project = requests.get(base_url, params=auth_parameter) if zenodo_project.status_code != 200: raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) else: resource = zenodo_fetch_resource_helper( zenodo_project.json(), resource_id, False, False) else: # We got ourselves a file. base_url = "https://zenodo.org/api/files/{}".format(resource_id) zenodo_project = requests.get(base_url, params=auth_parameter) if zenodo_project.status_code == 200: # Contents returns a list of the single file resource = zenodo_fetch_resource_helper( zenodo_project.json()['contents'][0], resource_id, True, True) else: # We need to loop through the users depositions and see if the file is there. base_url = 'https://zenodo.org/api/deposit/depositions' zenodo_projects = requests.get(base_url, params=auth_parameter).json() for entry in zenodo_projects: project_files = requests.get(entry['links']['self'], params=auth_parameter).json() for file in project_files['files']: if file['id'] == resource_id: resource = { "container": entry['id'], "kind": "item", "kind_name": "file", "id": resource_id, "identifier": None, "title": file['filename'], "date_created": None, "date_modified": None, "hashes": { "md5": file['checksum'] }, "extra": {}, "children": [] } # We found the file, break out of file loop break # If the file wasn't found, we want to continue looping through the other projects. else: continue # File has been found, break out of project loop break # File not found, raise exception else: raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) return resource
def zenodo_fetch_resources(token, query_parameter): """ Fetch all users repos from Zenodo. Parameters ---------- token : str User's Zenodo token query_parameter : dict The search parameter passed to the API View Gets passed formatted as {'title': 'search_info'} Returns ------- List of dictionary objects that represent Zenodo resources. Dictionary must be in the following format { "kind": "container", "kind_name": "folder", "id": "12345", "container": "None", "title": "Folder Name" } We are also returning a dictionary of pagination information. Dictionary must be in the following format: { "first_page": '1', "previous_page": None, "next_page": None, "last_page": '1', "total_pages": '1', "per_page": 10 } """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) pages = { "first_page": '1', "previous_page": None, "next_page": None, "last_page": '1', "total_pages": '1', "per_page": 10 } # Let's build them resources if query_parameter and 'page' not in query_parameter: if 'title' in query_parameter: search_parameters = query_parameter['title'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format( search_parameters) elif 'id' in query_parameter: base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format( query_parameter['id']) elif 'general' in query_parameter: search_parameters = query_parameter['general'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q={}'.format( search_parameters) elif 'keywords' in query_parameter: search_parameters = query_parameter['keywords'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q=keywords:{}'.format( search_parameters) zenodo_projects = requests.get( base_url, params=auth_parameter).json()['hits']['hits'] is_record = True else: if query_parameter and 'page' in query_parameter: base_url = "https://zenodo.org/api/deposit/depositions?page={}".format( query_parameter['page']) else: base_url = "https://zenodo.org/api/deposit/depositions?page=1" zenodo_projects = requests.get(base_url, params=auth_parameter).json() is_record = False resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter, is_record) return resources, pages
def zenodo_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action): """ Upload the files found in the resource_main_dir to the target. Parameters ---------- token : str User's token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) os_path = next(os.walk(resource_main_dir)) # Since Zenodo is a finite depth target, the checks for path validity have already been done. if resource_id: name_helper = requests.get( "https://zenodo.org/api/deposit/depositions/{}".format( resource_id), params=auth_parameter).json() try: project_title = name_helper['title'] except KeyError: raise PresQTResponseException( "Can't find the resource with id {}, on Zenodo".format( resource_id), status.HTTP_404_NOT_FOUND) action_metadata = {"destinationUsername": None} post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( resource_id) upload_dict = zenodo_upload_loop(action_metadata, resource_id, resource_main_dir, post_url, auth_parameter, project_title) else: action_metadata = {"destinationUsername": None} project_title = os_path[1][0] name_helper = requests.get( "https://zenodo.org/api/deposit/depositions", params=auth_parameter).json() titles = [project['title'] for project in name_helper] new_title = get_duplicate_title(project_title, titles, ' (PresQT*)') resource_id = zenodo_upload_helper(auth_parameter, new_title) post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( resource_id) upload_dict = zenodo_upload_loop(action_metadata, resource_id, resource_main_dir, post_url, auth_parameter, new_title) return upload_dict
def curate_nd_download_resource(token, resource_id): """ Fetch the requested resource from CurateND along with its hash information. Parameters ---------- token : str User's CurateND token resource_id : str ID of the resource requested Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'source_path: '/full/path/to/file', 'extra_metadata': {'any': 'extra'} } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: curate_instance = CurateND(token) except PresQTInvalidTokenError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Get the resource resource = get_curate_nd_resource(resource_id, curate_instance) action_metadata = {"sourceUsername": resource.extra['depositor']} # Get all the files for the provided resources. files = [] empty_containers = [] if resource.kind_name == 'file': # Get the title of the Project to add to sourcePath project_title = requests.get(resource.extra['isPartOf'], headers={ 'X-Api-Token': '{}'.format(token) }).json()['title'] # This is so we aren't missing the few extra keys that are pulled out for the PresQT payload resource.extra.update({ "id": resource.id, "date_submitted": resource.date_submitted }) binary_file, curate_hash = resource.download() files.append({ 'file': binary_file, 'hashes': { 'md5': curate_hash }, 'title': resource.title, # If the file is the only resource we are downloading then we don't need it's full path. 'path': '/{}'.format(resource.title), 'source_path': '/{}/{}'.format(project_title, resource.title), 'extra_metadata': resource.extra }) else: if not resource.extra['containedFiles']: empty_containers.append('{}'.format(resource.title)) else: title_helper = {} hash_helper = {} file_urls = [] project_title = resource.title file_metadata = [] for file in resource.extra['containedFiles']: download_url = file['downloadUrl'] contained_file = get_curate_nd_resource( file['id'], curate_instance) file_metadata_dict = { "title": contained_file.title, "extra": contained_file.extra } file_metadata.append(file_metadata_dict) title_helper[download_url] = contained_file.title hash_helper[download_url] = contained_file.md5 file_urls.append(download_url) title_helper[file['downloadUrl']] = file['label'] file_urls.append(file['downloadUrl']) loop = asyncio.new_event_loop() download_data = loop.run_until_complete( async_main(file_urls, token)) for file in download_data: title = title_helper[file['url']] hash = hash_helper[file['url']] files.append({ 'file': file['binary_content'], 'hashes': { 'md5': hash }, 'title': title, "source_path": '/{}/{}'.format(project_title, title), 'path': '/{}/{}'.format(resource.title, title), 'extra_metadata': get_dictionary_from_list(file_metadata, 'title', title)['extra'] }) return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata }