def zenodo_fetch_resources(token, search_parameter): """ Fetch all users repos from Zenodo. Parameters ---------- token : str User's Zenodo token search_parameter : dict The search parameter passed to the API View Gets passed formatted as {'title': 'search_info'} Returns ------- List of dictionary objects that represent Zenodo resources. Dictionary must be in the following format { "kind": "container", "kind_name": "folder", "id": "12345", "container": "None", "title": "Folder Name" } """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError("Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Let's build them resources if search_parameter: if 'title' in search_parameter: search_parameters = search_parameter['title'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format( search_parameters) zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits'] is_record = True elif 'id' in search_parameter: base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format(search_parameter['id']) zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits'] is_record = True else: base_url = "https://zenodo.org/api/deposit/depositions" zenodo_projects = requests.get(base_url, params=auth_parameter).json() is_record = False resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter, is_record) return resources
def zenodo_fetch_resource(token, resource_id): """ Fetch the Zenodo resource matching the resource_id given. Parameters ---------- token : str User's Zenodo token resource_id : str ID of the resource requested Returns ------- A dictionary object that represents the Zenodo resource. Dictionary must be in the following format: { "kind": "container", "kind_name": "repo", "id": "12345", "title": "23296359282_934200ec59_o.jpg", "date_created": "2019-05-13T14:54:17.129170Z", "date_modified": "2019-05-13T14:54:17.129170Z", "hashes": { "md5": "aaca7ef067dcab7cb8d79c36243823e4", }, "extra": { "any": extra, "values": here } } """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) # Let's first try to get the record with this id. if len(str(resource_id)) <= 7: base_url = "https://zenodo.org/api/records/{}".format(resource_id) zenodo_project = requests.get(base_url, params=auth_parameter) if zenodo_project.status_code == 200: # We found the record, pass the project to our function. resource = zenodo_fetch_resource_helper(zenodo_project.json(), resource_id, True) else: # We need to get the resource from the depositions base_url = "https://zenodo.org/api/deposit/depositions/{}".format( resource_id) zenodo_project = requests.get(base_url, params=auth_parameter) if zenodo_project.status_code != 200: raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) else: resource = zenodo_fetch_resource_helper( zenodo_project.json(), resource_id, False, False) else: # We got ourselves a file. base_url = "https://zenodo.org/api/files/{}".format(resource_id) zenodo_project = requests.get(base_url, params=auth_parameter) if zenodo_project.status_code == 200: # Contents returns a list of the single file resource = zenodo_fetch_resource_helper( zenodo_project.json()['contents'][0], resource_id, True, True) else: # We need to loop through the users depositions and see if the file is there. base_url = 'https://zenodo.org/api/deposit/depositions' zenodo_projects = requests.get(base_url, params=auth_parameter).json() for entry in zenodo_projects: project_files = requests.get(entry['links']['self'], params=auth_parameter).json() for file in project_files['files']: if file['id'] == resource_id: resource = { "container": entry['id'], "kind": "item", "kind_name": "file", "id": resource_id, "identifier": None, "title": file['filename'], "date_created": None, "date_modified": None, "hashes": { "md5": file['checksum'] }, "extra": {}, "children": [] } # We found the file, break out of file loop break # If the file wasn't found, we want to continue looping through the other projects. else: continue # File has been found, break out of project loop break # File not found, raise exception else: raise PresQTResponseException( "The resource could not be found by the requesting user.", status.HTTP_404_NOT_FOUND) return resource
def zenodo_fetch_resources(token, query_parameter): """ Fetch all users repos from Zenodo. Parameters ---------- token : str User's Zenodo token query_parameter : dict The search parameter passed to the API View Gets passed formatted as {'title': 'search_info'} Returns ------- List of dictionary objects that represent Zenodo resources. Dictionary must be in the following format { "kind": "container", "kind_name": "folder", "id": "12345", "container": "None", "title": "Folder Name" } We are also returning a dictionary of pagination information. Dictionary must be in the following format: { "first_page": '1', "previous_page": None, "next_page": None, "last_page": '1', "total_pages": '1', "per_page": 10 } """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) pages = { "first_page": '1', "previous_page": None, "next_page": None, "last_page": '1', "total_pages": '1', "per_page": 10 } # Let's build them resources if query_parameter and 'page' not in query_parameter: if 'title' in query_parameter: search_parameters = query_parameter['title'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format( search_parameters) elif 'id' in query_parameter: base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format( query_parameter['id']) elif 'general' in query_parameter: search_parameters = query_parameter['general'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q={}'.format( search_parameters) elif 'keywords' in query_parameter: search_parameters = query_parameter['keywords'].replace(' ', '+') base_url = 'https://zenodo.org/api/records?q=keywords:{}'.format( search_parameters) zenodo_projects = requests.get( base_url, params=auth_parameter).json()['hits']['hits'] is_record = True else: if query_parameter and 'page' in query_parameter: base_url = "https://zenodo.org/api/deposit/depositions?page={}".format( query_parameter['page']) else: base_url = "https://zenodo.org/api/deposit/depositions?page=1" zenodo_projects = requests.get(base_url, params=auth_parameter).json() is_record = False resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter, is_record) return resources, pages
def zenodo_upload_resource(token, resource_id, resource_main_dir, hash_algorithm, file_duplicate_action): """ Upload the files found in the resource_main_dir to the target. Parameters ---------- token : str User's token. resource_id : str ID of the resource requested. resource_main_dir : str Path to the main directory for the resources to be uploaded. hash_algorithm : str Hash algorithm we are using to check for fixity. file_duplicate_action : str The action to take when a duplicate file is found Returns ------- Dictionary with the following keys: values 'resources_ignored' : Array of string file paths of files that were ignored when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/ignored/file.pg', 'another/ignored/file.jpg'] 'resources_updated' : Array of string file paths of files that were updated when uploading the resource. Path should have the same base as resource_main_dir. Example: ['path/to/updated/file.jpg'] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'destinationUsername': '******' } 'file_metadata_list': List of dictionaries for each file that contains metadata and hash info. Must be in the following format: { "actionRootPath": '/path/on/disk', "destinationPath": '/path/on/target/destination', "title": 'file_title', "destinationHash": {'hash_algorithm': 'the_hash'}} } 'project_id': ID of the parent project for this upload. Needed for metadata upload. """ try: auth_parameter = zenodo_validation_check(token) except PresQTValidationError: raise PresQTValidationError( "Token is invalid. Response returned a 401 status code.", status.HTTP_401_UNAUTHORIZED) os_path = next(os.walk(resource_main_dir)) # Since Zenodo is a finite depth target, the checks for path validity have already been done. if resource_id: name_helper = requests.get( "https://zenodo.org/api/deposit/depositions/{}".format( resource_id), params=auth_parameter).json() try: project_title = name_helper['title'] except KeyError: raise PresQTResponseException( "Can't find the resource with id {}, on Zenodo".format( resource_id), status.HTTP_404_NOT_FOUND) action_metadata = {"destinationUsername": None} post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( resource_id) upload_dict = zenodo_upload_loop(action_metadata, resource_id, resource_main_dir, post_url, auth_parameter, project_title) else: action_metadata = {"destinationUsername": None} project_title = os_path[1][0] name_helper = requests.get( "https://zenodo.org/api/deposit/depositions", params=auth_parameter).json() titles = [project['title'] for project in name_helper] new_title = get_duplicate_title(project_title, titles, ' (PresQT*)') resource_id = zenodo_upload_helper(auth_parameter, new_title) post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( resource_id) upload_dict = zenodo_upload_loop(action_metadata, resource_id, resource_main_dir, post_url, auth_parameter, new_title) return upload_dict
def zenodo_download_resource(token, resource_id, process_info_path, action): """ Fetch the requested resource from Zenodo along with its hash information. Parameters ---------- token : str User's Zenodo token resource_id : str ID of the resource requested process_info_path: str Path to the process info file that keeps track of the action's progress action: str The action being performed Returns ------- Dictionary with the following keys: values 'resources': List of dictionary objects that each hold a file and its information. Dictionary must be in the following format: { 'file': binary_file, 'hashes': {'hash_algorithm': 'the_hash'}, 'title': 'file.jpg', 'path': '/path/to/file', 'metadata': { 'sourcePath': '/full/path/at/source.jpg', 'title': 'file_title', 'sourceHashes': {'hash_algorithm': 'the_hash'}, 'extra': {'any': 'extra'} } } 'empty_containers: List of string paths representing empty containers that must be written. Example: ['empty/folder/to/write/', 'another/empty/folder/] 'action_metadata': Dictionary containing action metadata. Must be in the following format: { 'sourceUsername': '******', } """ try: auth_parameter = zenodo_validation_check(token) except PresQTResponseException: raise PresQTResponseException( 'Token is invalid. Response returned a 401 status code.', status.HTTP_401_UNAUTHORIZED) files = [] empty_containers = [] extra_metadata = {} base_url = None # If the resource_id is longer than 7 characters, the resource is an individual file if len(resource_id) > 7: # First we need to check if the file id given belongs to a public published record. zenodo_file = requests.get( 'https://zenodo.org/api/files/{}'.format(resource_id), params=auth_parameter) if zenodo_file.status_code != 200: # If not, we need to loop through their depositions to look for the file. zenodo_projects = requests.get( 'https://zenodo.org/api/deposit/depositions', params=auth_parameter).json() for entry in zenodo_projects: project_files = requests.get(entry['links']['self'], params=auth_parameter).json() for file in project_files['files']: if file['id'] == resource_id: base_url = entry['links']['self'] file_url = file['links']['self'] is_record = False break else: # If the file wasn't found we want to continue the loop. continue break else: is_record = True base_url = 'https://zenodo.org/api/files/{}'.format(resource_id) file_url = 'https://zenodo.org/api/files/{}'.format(resource_id) if base_url is None: raise PresQTResponseException( "The resource with id, {}, does not exist for this user.". format(resource_id), status.HTTP_404_NOT_FOUND) update_process_info_message(process_info_path, action, 'Downloading files from Zenodo...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, 1, action, 'download') files, action_metadata = zenodo_download_helper( is_record, base_url, auth_parameter, files, file_url) # Increment the number of files done in the process info file. increment_process_info(process_info_path, action, 'download') # Otherwise, it's a full project else: base_url = 'https://zenodo.org/api/records/{}'.format(resource_id) zenodo_record = requests.get(base_url, params=auth_parameter) is_record = True if zenodo_record.status_code != 200: base_url = 'https://zenodo.org/api/deposit/depositions/{}'.format( resource_id) is_record = False try: files, action_metadata = zenodo_download_helper( is_record, base_url, auth_parameter, files) except PresQTResponseException: raise PresQTResponseException( "The resource with id, {}, does not exist for this user.". format(resource_id), status.HTTP_404_NOT_FOUND) extra_metadata = extra_metadata_helper(base_url, is_record, auth_parameter) file_urls = [file['file'] for file in files] update_process_info_message(process_info_path, action, 'Downloading files from Zenodo...') # Add the total number of projects to the process info file. # This is necessary to keep track of the progress of the request. update_process_info(process_info_path, len(file_urls), action, 'download') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) download_data = loop.run_until_complete( async_main(file_urls, auth_parameter, process_info_path, action)) # Go through the file dictionaries and replace the file path with the binary_content for file in files: file['file'] = get_dictionary_from_list( download_data, 'url', file['file'])['binary_content'] return { 'resources': files, 'empty_containers': empty_containers, 'action_metadata': action_metadata, 'extra_metadata': extra_metadata }
def zenodo_fetch_keywords(token, resource_id): """ Fetch the keywords of a given resource id. Parameters ---------- token: str User's Zenodo token resource_id: str ID of the resource requested Returns ------- A dictionary object that represents the Zenodo resource keywords. Dictionary must be in the following format: { "zenodo_keywords": [ "eggs", "ham", "bacon" ], "keywords": [ "eggs", "ham", "bacon" ] } """ auth_parameter = zenodo_validation_check(token) from presqt.targets.zenodo.functions.fetch import zenodo_fetch_resource resource = zenodo_fetch_resource(token, resource_id) # Find the metadata file... metadata = None if resource['kind'] == 'container': file_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( resource_id) project_files_response = requests.get(file_url, params=auth_parameter) if project_files_response.status_code == 200: for file in project_files_response.json(): if file['filename'] == 'PRESQT_FTS_METADATA.json': # Download the metadata metadata_file = requests.get(file['links']['download'], params=auth_parameter).content metadata = json.loads(metadata_file) if 'keywords' in resource['extra'].keys(): if metadata: try: keywords = list( set(resource['extra']['keywords'] + metadata['allKeywords'])) except KeyError: keywords = list(set(resource['extra']['keywords'])) else: keywords = list(set(resource['extra']['keywords'])) return {'zenodo_keywords': keywords, 'keywords': keywords} else: raise PresQTResponseException( "The requested Zenodo resource does not have keywords.", status.HTTP_400_BAD_REQUEST)
def zenodo_upload_metadata(token, project_id, metadata_dict): """ Upload the metadata of this PresQT action at the top level of the project. Parameters ---------- token : str The user's Zenodo token project_id : str The id of the top level project that the upload took place on metadata_dict : dict The metadata to be written to the repo """ auth_parameter = zenodo_validation_check(token) post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format( project_id) file_name = 'PRESQT_FTS_METADATA.json' project_files = requests.get(post_url, params=auth_parameter).json() for file in project_files: if file['filename'] == file_name: # Download the metadata old_metadata_file = requests.get(file['links']['download'], params=auth_parameter).content # Load the existing metadata to be updated. updated_metadata = json.loads(old_metadata_file) if schema_validator('presqt/json_schemas/metadata_schema.json', updated_metadata) is not True: # We need to change the file name, this metadata is improperly formatted and # therefore invalid. Zenodo is having issues with their put method atm....... # Need to delete the old metadata file. requests.delete(file['links']['self'], params=auth_parameter) response_status = metadata_post_request( 'INVALID_PRESQT_FTS_METADATA.json', updated_metadata, auth_parameter, post_url) if response_status != 201: raise PresQTError( "The request to rename the invalid metadata file has returned a {} error code from Zenodo." .format(response_status)) break # Need to delete the old metadata file. requests.delete(file['links']['self'], params=auth_parameter) # Loop through each 'action' in both metadata files and make a new list of them. joined_actions = [ entry for entry in itertools.chain(metadata_dict['actions'], updated_metadata['actions']) ] joined_keywords = [ entry for entry in itertools.chain(metadata_dict['allKeywords'], updated_metadata['allKeywords']) ] updated_metadata['actions'] = joined_actions updated_metadata['allKeywords'] = list(set(joined_keywords)) response_status = metadata_post_request(file_name, updated_metadata, auth_parameter, post_url) # When updating an existing metadata file, Zenodo returns a 201 status if response_status != 201: raise PresQTError( "The request to update the metadata file has returned a {} error code from Zenodo." .format(response_status)) return response_status = metadata_post_request(file_name, metadata_dict, auth_parameter, post_url) if response_status != 201: raise PresQTError( "The request to create a metadata file has resulted in a {} error code from Zenodo." .format(response_status)) # Add extra metadata to the top level resource if 'extra_metadata' in metadata_dict.keys( ) and metadata_dict['extra_metadata']: attribute_url = "https://zenodo.org/api/deposit/depositions/{}".format( project_id) upload_extra_metadata(metadata_dict['extra_metadata'], auth_parameter, attribute_url)