예제 #1
0
def zenodo_fetch_resources(token, search_parameter):
    """
    Fetch all users repos from Zenodo.

    Parameters
    ----------
    token : str
        User's Zenodo token
    search_parameter : dict
        The search parameter passed to the API View
        Gets passed formatted as {'title': 'search_info'}

    Returns
    -------
    List of dictionary objects that represent Zenodo resources.
    Dictionary must be in the following format
        {
            "kind": "container",
            "kind_name": "folder",
            "id": "12345",
            "container": "None",
            "title": "Folder Name"
        }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError("Token is invalid. Response returned a 401 status code.",
                                    status.HTTP_401_UNAUTHORIZED)
    # Let's build them resources
    if search_parameter:
        if 'title' in search_parameter:
            search_parameters = search_parameter['title'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format(
                search_parameters)
            zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits']
            is_record = True
        elif 'id' in search_parameter:
            base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format(search_parameter['id'])
            zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits']
            is_record = True
    else:
        base_url = "https://zenodo.org/api/deposit/depositions"
        zenodo_projects = requests.get(base_url, params=auth_parameter).json()
        is_record = False

    resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter, is_record)

    return resources
예제 #2
0
def zenodo_fetch_resource(token, resource_id):
    """
    Fetch the Zenodo resource matching the resource_id given.

    Parameters
    ----------
    token : str
        User's Zenodo token

    resource_id : str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the Zenodo resource.
    Dictionary must be in the following format:
    {
        "kind": "container",
        "kind_name": "repo",
        "id": "12345",
        "title": "23296359282_934200ec59_o.jpg",
        "date_created": "2019-05-13T14:54:17.129170Z",
        "date_modified": "2019-05-13T14:54:17.129170Z",
        "hashes": {
            "md5": "aaca7ef067dcab7cb8d79c36243823e4",
        },
        "extra": {
            "any": extra,
            "values": here
        }
    }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Let's first try to get the record with this id.
    if len(str(resource_id)) <= 7:
        base_url = "https://zenodo.org/api/records/{}".format(resource_id)
        zenodo_project = requests.get(base_url, params=auth_parameter)
        if zenodo_project.status_code == 200:
            # We found the record, pass the project to our function.
            resource = zenodo_fetch_resource_helper(zenodo_project.json(),
                                                    resource_id, True)
        else:
            # We need to get the resource from the depositions
            base_url = "https://zenodo.org/api/deposit/depositions/{}".format(
                resource_id)
            zenodo_project = requests.get(base_url, params=auth_parameter)
            if zenodo_project.status_code != 200:
                raise PresQTResponseException(
                    "The resource could not be found by the requesting user.",
                    status.HTTP_404_NOT_FOUND)
            else:
                resource = zenodo_fetch_resource_helper(
                    zenodo_project.json(), resource_id, False, False)

    else:
        # We got ourselves a file.
        base_url = "https://zenodo.org/api/files/{}".format(resource_id)
        zenodo_project = requests.get(base_url, params=auth_parameter)
        if zenodo_project.status_code == 200:
            # Contents returns a list of the single file
            resource = zenodo_fetch_resource_helper(
                zenodo_project.json()['contents'][0], resource_id, True, True)
        else:
            # We need to loop through the users depositions and see if the file is there.
            base_url = 'https://zenodo.org/api/deposit/depositions'
            zenodo_projects = requests.get(base_url,
                                           params=auth_parameter).json()
            for entry in zenodo_projects:
                project_files = requests.get(entry['links']['self'],
                                             params=auth_parameter).json()
                for file in project_files['files']:
                    if file['id'] == resource_id:
                        resource = {
                            "container": entry['id'],
                            "kind": "item",
                            "kind_name": "file",
                            "id": resource_id,
                            "identifier": None,
                            "title": file['filename'],
                            "date_created": None,
                            "date_modified": None,
                            "hashes": {
                                "md5": file['checksum']
                            },
                            "extra": {},
                            "children": []
                        }
                        # We found the file, break out of file loop
                        break
                # If the file wasn't found, we want to continue looping through the other projects.
                else:
                    continue
                # File has been found, break out of project loop
                break

            # File not found, raise exception
            else:
                raise PresQTResponseException(
                    "The resource could not be found by the requesting user.",
                    status.HTTP_404_NOT_FOUND)

    return resource
예제 #3
0
def zenodo_fetch_resources(token, query_parameter):
    """
    Fetch all users repos from Zenodo.

    Parameters
    ----------
    token : str
        User's Zenodo token
    query_parameter : dict
        The search parameter passed to the API View
        Gets passed formatted as {'title': 'search_info'}

    Returns
    -------
    List of dictionary objects that represent Zenodo resources.
    Dictionary must be in the following format
        {
            "kind": "container",
            "kind_name": "folder",
            "id": "12345",
            "container": "None",
            "title": "Folder Name"
        }
    We are also returning a dictionary of pagination information.
    Dictionary must be in the following format:
        {
            "first_page": '1',
            "previous_page": None,
            "next_page": None,
            "last_page": '1',
            "total_pages": '1',
            "per_page": 10
        }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    pages = {
        "first_page": '1',
        "previous_page": None,
        "next_page": None,
        "last_page": '1',
        "total_pages": '1',
        "per_page": 10
    }

    # Let's build them resources
    if query_parameter and 'page' not in query_parameter:
        if 'title' in query_parameter:
            search_parameters = query_parameter['title'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format(
                search_parameters)

        elif 'id' in query_parameter:
            base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format(
                query_parameter['id'])

        elif 'general' in query_parameter:
            search_parameters = query_parameter['general'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q={}'.format(
                search_parameters)

        elif 'keywords' in query_parameter:
            search_parameters = query_parameter['keywords'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q=keywords:{}'.format(
                search_parameters)

        zenodo_projects = requests.get(
            base_url, params=auth_parameter).json()['hits']['hits']
        is_record = True

    else:
        if query_parameter and 'page' in query_parameter:
            base_url = "https://zenodo.org/api/deposit/depositions?page={}".format(
                query_parameter['page'])

        else:
            base_url = "https://zenodo.org/api/deposit/depositions?page=1"

        zenodo_projects = requests.get(base_url, params=auth_parameter).json()
        is_record = False

    resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter,
                                              is_record)

    return resources, pages
예제 #4
0
def zenodo_upload_resource(token, resource_id, resource_main_dir,
                           hash_algorithm, file_duplicate_action):
    """
    Upload the files found in the resource_main_dir to the target.

    Parameters
    ----------
    token : str
        User's token.
    resource_id : str
        ID of the resource requested.
    resource_main_dir : str
        Path to the main directory for the resources to be uploaded.
    hash_algorithm : str
        Hash algorithm we are using to check for fixity.
    file_duplicate_action : str
        The action to take when a duplicate file is found

    Returns
    -------
    Dictionary with the following keys: values
        'resources_ignored' : Array of string file paths of files that were ignored when
        uploading the resource. Path should have the same base as resource_main_dir.
                                Example:
                                    ['path/to/ignored/file.pg', 'another/ignored/file.jpg']

        'resources_updated' : Array of string file paths of files that were updated when
         uploading the resource. Path should have the same base as resource_main_dir.
                                 Example:
                                    ['path/to/updated/file.jpg']
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                            {
                                'destinationUsername': '******'
                            }
        'file_metadata_list': List of dictionaries for each file that contains metadata
                              and hash info. Must be in the following format:
                                {
                                    "actionRootPath": '/path/on/disk',
                                    "destinationPath": '/path/on/target/destination',
                                    "title": 'file_title',
                                    "destinationHash": {'hash_algorithm': 'the_hash'}}
                                }
        'project_id': ID of the parent project for this upload. Needed for metadata upload.
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    os_path = next(os.walk(resource_main_dir))

    # Since Zenodo is a finite depth target, the checks for path validity have already been done.
    if resource_id:
        name_helper = requests.get(
            "https://zenodo.org/api/deposit/depositions/{}".format(
                resource_id),
            params=auth_parameter).json()

        try:
            project_title = name_helper['title']
        except KeyError:
            raise PresQTResponseException(
                "Can't find the resource with id {}, on Zenodo".format(
                    resource_id), status.HTTP_404_NOT_FOUND)
        action_metadata = {"destinationUsername": None}
        post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
            resource_id)

        upload_dict = zenodo_upload_loop(action_metadata, resource_id,
                                         resource_main_dir, post_url,
                                         auth_parameter, project_title)

    else:
        action_metadata = {"destinationUsername": None}
        project_title = os_path[1][0]
        name_helper = requests.get(
            "https://zenodo.org/api/deposit/depositions",
            params=auth_parameter).json()
        titles = [project['title'] for project in name_helper]
        new_title = get_duplicate_title(project_title, titles, ' (PresQT*)')
        resource_id = zenodo_upload_helper(auth_parameter, new_title)

        post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
            resource_id)

        upload_dict = zenodo_upload_loop(action_metadata, resource_id,
                                         resource_main_dir, post_url,
                                         auth_parameter, new_title)

    return upload_dict
예제 #5
0
def zenodo_download_resource(token, resource_id, process_info_path, action):
    """
    Fetch the requested resource from Zenodo along with its hash information.

    Parameters
    ----------
    token : str
        User's Zenodo token
    resource_id : str
        ID of the resource requested
    process_info_path: str
        Path to the process info file that keeps track of the action's progress
    action: str
        The action being performed

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'metadata': {
                                'sourcePath': '/full/path/at/source.jpg',
                                'title': 'file_title',
                                'sourceHashes': {'hash_algorithm': 'the_hash'},
                                'extra': {'any': 'extra'}
                             }
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException(
            'Token is invalid. Response returned a 401 status code.',
            status.HTTP_401_UNAUTHORIZED)
    files = []
    empty_containers = []
    extra_metadata = {}
    base_url = None

    # If the resource_id is longer than 7 characters, the resource is an individual file
    if len(resource_id) > 7:
        # First we need to check if the file id given belongs to a public published record.
        zenodo_file = requests.get(
            'https://zenodo.org/api/files/{}'.format(resource_id),
            params=auth_parameter)
        if zenodo_file.status_code != 200:
            # If not, we need to loop through their depositions to look for the file.
            zenodo_projects = requests.get(
                'https://zenodo.org/api/deposit/depositions',
                params=auth_parameter).json()
            for entry in zenodo_projects:
                project_files = requests.get(entry['links']['self'],
                                             params=auth_parameter).json()
                for file in project_files['files']:
                    if file['id'] == resource_id:
                        base_url = entry['links']['self']
                        file_url = file['links']['self']
                        is_record = False
                        break
                else:
                    # If the file wasn't found we want to continue the loop.
                    continue
                break
        else:
            is_record = True
            base_url = 'https://zenodo.org/api/files/{}'.format(resource_id)
            file_url = 'https://zenodo.org/api/files/{}'.format(resource_id)

        if base_url is None:
            raise PresQTResponseException(
                "The resource with id, {}, does not exist for this user.".
                format(resource_id), status.HTTP_404_NOT_FOUND)

        update_process_info_message(process_info_path, action,
                                    'Downloading files from Zenodo...')
        # Add the total number of projects to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, 1, action, 'download')

        files, action_metadata = zenodo_download_helper(
            is_record, base_url, auth_parameter, files, file_url)

        # Increment the number of files done in the process info file.
        increment_process_info(process_info_path, action, 'download')

    # Otherwise, it's a full project
    else:
        base_url = 'https://zenodo.org/api/records/{}'.format(resource_id)
        zenodo_record = requests.get(base_url, params=auth_parameter)
        is_record = True
        if zenodo_record.status_code != 200:
            base_url = 'https://zenodo.org/api/deposit/depositions/{}'.format(
                resource_id)
            is_record = False
        try:
            files, action_metadata = zenodo_download_helper(
                is_record, base_url, auth_parameter, files)
        except PresQTResponseException:
            raise PresQTResponseException(
                "The resource with id, {}, does not exist for this user.".
                format(resource_id), status.HTTP_404_NOT_FOUND)

        extra_metadata = extra_metadata_helper(base_url, is_record,
                                               auth_parameter)
        file_urls = [file['file'] for file in files]

        update_process_info_message(process_info_path, action,
                                    'Downloading files from Zenodo...')
        # Add the total number of projects to the process info file.
        # This is necessary to keep track of the progress of the request.
        update_process_info(process_info_path, len(file_urls), action,
                            'download')

        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)
        download_data = loop.run_until_complete(
            async_main(file_urls, auth_parameter, process_info_path, action))

        # Go through the file dictionaries and replace the file path with the binary_content
        for file in files:
            file['file'] = get_dictionary_from_list(
                download_data, 'url', file['file'])['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata,
        'extra_metadata': extra_metadata
    }
예제 #6
0
def zenodo_fetch_keywords(token, resource_id):
    """
    Fetch the keywords of a given resource id.

    Parameters
    ----------
    token: str
        User's Zenodo token
    resource_id: str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the Zenodo resource keywords.
    Dictionary must be in the following format:
        {
            "zenodo_keywords": [
                "eggs",
                "ham",
                "bacon"
            ],
            "keywords": [
                "eggs",
                "ham",
                "bacon"
            ]
        }
    """
    auth_parameter = zenodo_validation_check(token)

    from presqt.targets.zenodo.functions.fetch import zenodo_fetch_resource
    resource = zenodo_fetch_resource(token, resource_id)

    # Find the metadata file...
    metadata = None
    if resource['kind'] == 'container':
        file_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
            resource_id)
        project_files_response = requests.get(file_url, params=auth_parameter)

        if project_files_response.status_code == 200:
            for file in project_files_response.json():
                if file['filename'] == 'PRESQT_FTS_METADATA.json':
                    # Download the metadata
                    metadata_file = requests.get(file['links']['download'],
                                                 params=auth_parameter).content
                    metadata = json.loads(metadata_file)

    if 'keywords' in resource['extra'].keys():
        if metadata:
            try:
                keywords = list(
                    set(resource['extra']['keywords'] +
                        metadata['allKeywords']))
            except KeyError:
                keywords = list(set(resource['extra']['keywords']))
        else:
            keywords = list(set(resource['extra']['keywords']))

        return {'zenodo_keywords': keywords, 'keywords': keywords}

    else:
        raise PresQTResponseException(
            "The requested Zenodo resource does not have keywords.",
            status.HTTP_400_BAD_REQUEST)
예제 #7
0
def zenodo_upload_metadata(token, project_id, metadata_dict):
    """
    Upload the metadata of this PresQT action at the top level of the project.

    Parameters
    ----------
    token : str
        The user's Zenodo token
    project_id : str
        The id of the top level project that the upload took place on
    metadata_dict : dict
        The metadata to be written to the repo
    """
    auth_parameter = zenodo_validation_check(token)
    post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
        project_id)
    file_name = 'PRESQT_FTS_METADATA.json'

    project_files = requests.get(post_url, params=auth_parameter).json()

    for file in project_files:
        if file['filename'] == file_name:
            # Download the metadata
            old_metadata_file = requests.get(file['links']['download'],
                                             params=auth_parameter).content
            # Load the existing metadata to be updated.
            updated_metadata = json.loads(old_metadata_file)

            if schema_validator('presqt/json_schemas/metadata_schema.json',
                                updated_metadata) is not True:
                # We need to change the file name, this metadata is improperly formatted and
                # therefore invalid. Zenodo is having issues with their put method atm.......
                # Need to delete the old metadata file.
                requests.delete(file['links']['self'], params=auth_parameter)
                response_status = metadata_post_request(
                    'INVALID_PRESQT_FTS_METADATA.json', updated_metadata,
                    auth_parameter, post_url)
                if response_status != 201:
                    raise PresQTError(
                        "The request to rename the invalid metadata file has returned a {} error code from Zenodo."
                        .format(response_status))
                break

            # Need to delete the old metadata file.
            requests.delete(file['links']['self'], params=auth_parameter)

            # Loop through each 'action' in both metadata files and make a new list of them.
            joined_actions = [
                entry for entry in itertools.chain(metadata_dict['actions'],
                                                   updated_metadata['actions'])
            ]
            joined_keywords = [
                entry
                for entry in itertools.chain(metadata_dict['allKeywords'],
                                             updated_metadata['allKeywords'])
            ]
            updated_metadata['actions'] = joined_actions
            updated_metadata['allKeywords'] = list(set(joined_keywords))

            response_status = metadata_post_request(file_name,
                                                    updated_metadata,
                                                    auth_parameter, post_url)
            # When updating an existing metadata file, Zenodo returns a 201 status
            if response_status != 201:
                raise PresQTError(
                    "The request to update the metadata file has returned a {} error code from Zenodo."
                    .format(response_status))
            return

    response_status = metadata_post_request(file_name, metadata_dict,
                                            auth_parameter, post_url)
    if response_status != 201:
        raise PresQTError(
            "The request to create a metadata file has resulted in a {} error code from Zenodo."
            .format(response_status))

    # Add extra metadata to the top level resource
    if 'extra_metadata' in metadata_dict.keys(
    ) and metadata_dict['extra_metadata']:
        attribute_url = "https://zenodo.org/api/deposit/depositions/{}".format(
            project_id)
        upload_extra_metadata(metadata_dict['extra_metadata'], auth_parameter,
                              attribute_url)