Example #1
0
def transfer_post_body_validation(request):
    """
    Extract target_name and resource_id from the request body.

    Parameters
    ----------
    request: HTTP Request Object

    Returns
    -------
    source_target:
        Name of the target that owns the resource to be transferred.
    source_resource_id:
        ID of the resource to transfer.
    """
    request_data = request.data

    try:
        source_target = request_data['source_target_name']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: source_target_name was not found in the request body.", status.HTTP_400_BAD_REQUEST)

    try:
        source_resource_id = request_data['source_resource_id']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: source_resource_id was not found in the request body.", status.HTTP_400_BAD_REQUEST)

    if source_resource_id is None or source_resource_id == "":
        raise PresQTValidationError(
            "PresQT Error: source_resource_id can't be None or blank.", status.HTTP_400_BAD_REQUEST)

    return source_target, source_resource_id
Example #2
0
def fairshare_request_validator(request):
    """
    Validate the request made by the user.

    Parameters
    ----------
    request: dict
        The request made by the user.

    Returns
    -------
        The resource_id and list of tests.
    """
    try:
        resource_id = request.data['resource_id']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'resource_id' missing in the request body.",
            status.HTTP_400_BAD_REQUEST)

    try:
        tests = request.data['tests']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'tests' missing in the request body.",
            status.HTTP_400_BAD_REQUEST)

    if type(tests) is not list:
        raise PresQTValidationError(
            "PresQT Error: 'tests' must be in list format.",
            status.HTTP_400_BAD_REQUEST)

    return resource_id, tests
Example #3
0
def keyword_post_validation(request):
    """
    Validate that the correct keyword lists are in the POST body.

    Parameters
    ----------
    request: POST request

    Returns
    -------
    The list of keywords.
    """
    try:
        keywords = request.data['keywords']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'keywords' is missing from the request body.",
            status.HTTP_400_BAD_REQUEST)

    if type(keywords) is not list:
        raise PresQTValidationError(
            "PresQT Error: 'keywords' must be in list format.",
            status.HTTP_400_BAD_REQUEST)

    return keywords
Example #4
0
def fairshare_evaluator_validation(request):
    """
    Perform fairshare evaluator validation for the presqt-fairshare-evaluator-opt-in header.

    Parameters
    ----------
    request : HTTP request object

    Returns
    -------
    Returns whether the user wants to run fairshare tests during transfer
    """
    try:
        choice = request.META['HTTP_PRESQT_FAIRSHARE_EVALUATOR_OPT_IN']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'presqt-fairshare-evaluator-opt-in' missing in the request headers.",
            status.HTTP_400_BAD_REQUEST)

    if choice not in ['yes', 'no']:
        raise PresQTValidationError(
            "PresQT Error: 'presqt-fairshare-evaluator-opt-in' must be 'yes' or 'no'.",
            status.HTTP_400_BAD_REQUEST)

    if choice == 'yes':
        return True
    else:
        return False
Example #5
0
def file_validation(request):
    """
    Verify that the file, 'presqt-file' exists in the body of the request.

    Parameters
    ----------
    request : HTTP request object

    Returns
    -------
    Returns the file provided in the body named 'presqt-file'
    """
    try:
        file = request.FILES['presqt-file']
    except MultiValueDictKeyError:
        raise PresQTValidationError(
            "PresQT Error: The file, 'presqt-file', is not found in the body of the request.",
            status.HTTP_400_BAD_REQUEST)

    # Check if the file provided is a zip file
    if not zipfile.is_zipfile(file):
        raise PresQTValidationError(
            "PresQT Error: The file provided, 'presqt-file', is not a zip file.",
            status.HTTP_400_BAD_REQUEST)
    return file
Example #6
0
def target_validation(target_name, action):
    """
    Given a Target name and an action, determine if the target_name is a valid
    target in target.json and if the target supports the action.

    Parameters
    ----------
    target_name : str
        Name of the Target.
    action : str
        Type of action the API is looking to perform on the Target

    Returns
    -------
    True if the validation passes.
    Raises a custom ValidationException error if validation fails.
    """
    json_data = read_file('presqt/specs/targets.json', True)
    for data in json_data:
        if data['name'] == target_name:
            if data["supported_actions"][action] is False:
                raise PresQTValidationError(
                    "PresQT Error: '{}' does not support the action '{}'.".
                    format(target_name, action), status.HTTP_400_BAD_REQUEST)
            return True, data['infinite_depth']
    else:
        raise PresQTValidationError(
            "PresQT Error: '{}' is not a valid Target name.".format(
                target_name), status.HTTP_404_NOT_FOUND)
Example #7
0
def fairshare_test_validator(test_list, valid_tests):
    """
    Validate the list of tests passed by the user.

    Parameters
    ----------
    test_list: list
        List of tests the user wants to check
    valid_tests: dict
        The tests that PresQT has identified for this project.

    Returns
    -------
        The users list of tests.
    """
    list_of_valid_tests = [
        int(key.rpartition('/')[2]) for key, value in valid_tests.items()
    ]

    # Check if empty list
    if len(test_list) == 0:
        raise PresQTValidationError(
            "PresQT Error: At least one test is required. Options are: {}".
            format(list_of_valid_tests), status.HTTP_400_BAD_REQUEST)

    # Ensure all tests in passed in list are valid
    for test in test_list:
        if test not in list_of_valid_tests:
            raise PresQTValidationError(
                "PresQT Error: '{}' not a valid test id. Options are: {}".
                format(test, list_of_valid_tests), status.HTTP_400_BAD_REQUEST)

    return test_list
def fairshake_assessment_validator(request, rubric_id):
    """
    Perform fairshake validation for required fields.

    Parameters
    ----------
    request : HTTP request object
    rubric_id: str
        The ID of the rubric the requesting user would like to use

    Returns
    -------
    Returns the answers to the rubric.
    """
    rubric_answers = None
    if rubric_id != '96':
        try:
            rubric_answers = request.data['rubric_answers']
        except KeyError:
            raise PresQTValidationError(
                "PresQT Error: 'rubric_answers' missing in POST body.",
                status.HTTP_400_BAD_REQUEST
            )

        # Validate that rubric answers is a dict...
        if type(rubric_answers) is not dict:
            raise PresQTValidationError(
                "PresQT Error: 'rubric_answers' must be an object with the metric id's as the keys and answer values as the values.",
                status.HTTP_400_BAD_REQUEST
            )

        test_translator = read_file(
            'presqt/specs/services/fairshake/fairshake_test_fetch.json', True)[rubric_id]
        score_translator = read_file(
            'presqt/specs/services/fairshake/fairshake_score_translator.json', True)

        for key, value in test_translator.items():
            if key not in rubric_answers.keys():
                raise PresQTValidationError(
                    f"Missing response for metric '{key}'. Required metrics are: {list(test_translator.keys())}",
                    status.HTTP_400_BAD_REQUEST)
        for key, value in rubric_answers.items():
            if value not in score_translator.keys():
                raise PresQTValidationError(
                    f"'{value}' is not a valid answer. Options are: {list(score_translator.keys())}",
                    status.HTTP_400_BAD_REQUEST)
            if key not in test_translator.keys():
                raise PresQTValidationError(
                    f"'{key}' is not a valid metric. Required metrics are: {list(test_translator.keys())}",
                    status.HTTP_400_BAD_REQUEST)

    return rubric_answers
Example #9
0
def keyword_action_validation(request):
    try:
        keyword_action = request.META['HTTP_PRESQT_KEYWORD_ACTION']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'presqt-keyword-action' missing in the request headers.",
            status.HTTP_400_BAD_REQUEST)

    if keyword_action not in ['automatic', 'manual', "none"]:
        raise PresQTValidationError(
            "PresQT Error: '{}' is not a valid keyword_action. "
            "The options are 'automatic', 'manual', or 'none'.".format(
                keyword_action), status.HTTP_400_BAD_REQUEST)

    return keyword_action
Example #10
0
async def async_get(self_instance, url, session):
    """
    Coroutine that uses aiohttp to make a GET request. This is the method that will be called
    asynchronously with other GETs.

    Parameters
    ----------
    self_instance: Target Class Instance
        Instance of the Target class we are using for async calls.

    url: str
        URL to call

    session: ClientSession object
        aiohttp ClientSession Object

    Returns
    -------
    Response JSON
    """
    async with session.get(url,
                           headers=self_instance.session.headers) as response:
        try:
            assert response.status == 200
            return await response.json()
        except AssertionError:
            raise PresQTValidationError(
                "The source target API returned an error. Please try again.",
                status.HTTP_500_INTERNAL_SERVER_ERROR)
def file_duplicate_action_validation(request):
    try:
        file_duplicate_action = request.META[
            'HTTP_PRESQT_FILE_DUPLICATE_ACTION']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'presqt-file-duplicate-action' missing in the request headers.",
            status.HTTP_400_BAD_REQUEST)

    if file_duplicate_action not in ['ignore', 'update']:
        raise PresQTValidationError(
            "PresQT Error: '{}' is not a valid file_duplicate_action. "
            "The options are 'ignore' or 'update'.".format(
                file_duplicate_action), status.HTTP_400_BAD_REQUEST)

    return file_duplicate_action
Example #12
0
def zenodo_validation_check(token):
    """
    Ensure a proper Zenodo API token has been provided.

    Parameters
    ----------
    token : str
        User's Zenodo token

    Returns
    -------
    Properly formatted Zenodo Auth parameter.
    """

    auth_parameter = {'access_token': token}

    # Gonna use the test server for now
    validator = requests.get("https://zenodo.org/api/deposit/depositions",
                             params=auth_parameter).status_code

    if validator != 200:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    return auth_parameter
Example #13
0
async def async_get(url, session, headers):
    """
    Coroutine that uses aiohttp to make a GET request. This is the method that will be called
    asynchronously with other GETs.

    Parameters
    ----------
    url: str
        URL to call

    session: ClientSession object
        aiohttp ClientSession Object

    headers: dict
        Necessary header for OSF calls

    Returns
    -------
    Response JSON
    """
    async with session.get(url, headers=headers) as response:
        try:
            assert response.status == 200
            return await response.json()
        except AssertionError:
            if response.status == 403: #TODO: doing this to avoid private file errors look into it
                pass
            else:
                raise PresQTValidationError("The source target API returned an error. Please try again.",
                                            status.HTTP_500_INTERNAL_SERVER_ERROR)
def fairshake_request_validator(request, rubric_id):
    """
    Perform fairshake validation for required fields.

    Parameters
    ----------
    request : HTTP request object
    rubric_id: str
        The ID of the rubric the requesting user would like to use

    Returns
    -------
    Returns the rubric id, the digital object type, project url and project title
    """
    supported_rubrics = {
        "93": "repo",
        "94": "data",
        "95": "tool",
        "96": "repo"
    }
    if rubric_id not in supported_rubrics.keys():
        raise PresQTValidationError(
            f"PresQT Error: '{rubric_id}' is not a valid rubric id. Options are: ['93', '94', '95', '96']",
            status.HTTP_400_BAD_REQUEST)

    request_data = request.data

    try:
        project_url = request_data['project_url']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'project_url' missing in POST body.",
            status.HTTP_400_BAD_REQUEST)

    try:
        project_title = request_data['project_title']
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: 'project_title' missing in POST body.",
            status.HTTP_400_BAD_REQUEST)

    return rubric_id, supported_rubrics[rubric_id], project_url, project_title
Example #15
0
def transfer_target_validation(source_target, destination_target):
    """
    Validation check for pending transfer partners.

    Parameters
    ----------
    source_target : str
        The source target (where the transfer is coming from)
    destination_target : str
        The destination target (where the transfer will end up)

    Raises
    ------
    PresQT Validation Error if targets don't allow transfer to or from other target.

    Returns
    -------
    True if the targets allow transfer with each other.
    """
    json_data = read_file('presqt/specs/targets.json', True)

    for data in json_data:
        if data['name'] == source_target:
            if destination_target not in data['supported_transfer_partners'][
                    'transfer_out']:
                raise PresQTValidationError(
                    "PresQT Error: '{}' does not allow transfer to '{}'.".
                    format(source_target,
                           destination_target), status.HTTP_400_BAD_REQUEST)

        elif data['name'] == destination_target:
            if source_target not in data['supported_transfer_partners'][
                    'transfer_in']:
                raise PresQTValidationError(
                    "PresQT Error: '{}' does not allow transfer from '{}'.".
                    format(destination_target,
                           source_target), status.HTTP_400_BAD_REQUEST)

    return True
Example #16
0
def validate_bag(bag):
    """
    Validate that a bag is in the correct format, all checksums match, and that there
    are no unexpected or missing files

    Parameters
    ----------
    bag : bagit.Bag
        The BagIt class we want to validate.
    """
    # Verify that checksums still match and that there are no unexpected or missing files
    try:
        bag.validate()
    except bagit.BagValidationError as e:
        if e.details:
            if isinstance(e.details[0], bagit.ChecksumMismatch):
                raise PresQTValidationError("Checksums failed to validate.",
                                            status.HTTP_400_BAD_REQUEST)
            else:
                raise PresQTValidationError(str(e.details[0]), status.HTTP_400_BAD_REQUEST)
        else:
            raise PresQTValidationError(str(e), status.HTTP_400_BAD_REQUEST)
Example #17
0
def zenodo_fetch_resources(token, search_parameter):
    """
    Fetch all users repos from Zenodo.

    Parameters
    ----------
    token : str
        User's Zenodo token
    search_parameter : dict
        The search parameter passed to the API View
        Gets passed formatted as {'title': 'search_info'}

    Returns
    -------
    List of dictionary objects that represent Zenodo resources.
    Dictionary must be in the following format
        {
            "kind": "container",
            "kind_name": "folder",
            "id": "12345",
            "container": "None",
            "title": "Folder Name"
        }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError("Token is invalid. Response returned a 401 status code.",
                                    status.HTTP_401_UNAUTHORIZED)
    # Let's build them resources
    if search_parameter:
        if 'title' in search_parameter:
            search_parameters = search_parameter['title'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format(
                search_parameters)
            zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits']
            is_record = True
        elif 'id' in search_parameter:
            base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format(search_parameter['id'])
            zenodo_projects = requests.get(base_url, params=auth_parameter).json()['hits']['hits']
            is_record = True
    else:
        base_url = "https://zenodo.org/api/deposit/depositions"
        zenodo_projects = requests.get(base_url, params=auth_parameter).json()
        is_record = False

    resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter, is_record)

    return resources
Example #18
0
def get_upload_source_metadata(instance, bag):
    """
    Get all FTS metadata files in the bag. If they are valid then get their contents, otherwise
    rename the invalid metadata file.

    Parameters
    ----------
    instance: BaseResource class instance
        Class we want to add the attributes to
    bag: Bag Class instance
        The bag we want to traverse and update.
    """
    instance.source_fts_metadata_actions = []
    instance.all_keywords = []
    instance.extra_metadata = {}
    for bag_file in bag.payload_files():
        if os.path.split(bag_file)[-1] == 'PRESQT_FTS_METADATA.json':
            metadata_path = os.path.join(instance.resource_main_dir, bag_file)
            try:
                source_metadata_content = read_file(metadata_path, True)
            except JSONDecodeError:
                print('error!!!')
                raise PresQTValidationError(
                    "PRESQT_FTS_METADATA.json is not valid JSON",
                    status.HTTP_400_BAD_REQUEST)
            # If the FTS metadata is valid then remove it from the bag and save the actions.
            if schema_validator('presqt/json_schemas/metadata_schema.json',
                                source_metadata_content) is True:
                instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \
                                                       source_metadata_content['actions']
                instance.all_keywords = instance.all_keywords + \
                                        source_metadata_content['allKeywords']
                if 'extra_metadata' in source_metadata_content.keys():
                    instance.extra_metadata = source_metadata_content[
                        'extra_metadata']
                os.remove(os.path.join(instance.resource_main_dir, bag_file))
                bag.save(manifests=True)
            # If the FTS metadata is invalid then rename the file in the bag.
            else:
                invalid_metadata_path = os.path.join(
                    os.path.split(metadata_path)[0],
                    'INVALID_PRESQT_FTS_METADATA.json')
                os.rename(metadata_path, invalid_metadata_path)
                bag.save(manifests=True)
Example #19
0
def get_process_info_action(process_info_data, action):
    """
    Get an action's data in the process_info dict

    Parameters
    ----------
    process_info_data: dict
        Dict gathered from the process_info.json file
    action: str
        Action data we want from process_info_data

    Returns
    -------
    Dict of the action data
    """

    try:
        return process_info_data[action]
    except KeyError:
        raise PresQTValidationError(
            "PresQT Error: A {} does not exist for this user on the server.".
            format(action), status.HTTP_404_NOT_FOUND)
Example #20
0
def get_process_info_data(action, ticket_number):
    """
    Get the JSON from process_info.json in the requested ticket number directory.

    Parameters
    ----------
    action : str
        The action directory we should look in for the ticket_number directory
    ticket_number : str
        Requested ticket_number directory the JSON should live in

    Returns
    -------
    JSON dictionary representing the process_info.json data.
    """
    try:
        return read_file(
            'mediafiles/{}/{}/process_info.json'.format(action, ticket_number),
            True)
    except FileNotFoundError:
        raise PresQTValidationError(
            "PresQT Error: Invalid ticket number, '{}'.".format(ticket_number),
            status.HTTP_404_NOT_FOUND)
Example #21
0
def get_process_info_data(ticket_number):
    """
    Get the JSON from process_info.json in the requested ticket number directory.

    Parameters
    ----------
    ticket_number : str
        Requested ticket_number directory the JSON should live in

    Returns
    -------
    JSON dictionary representing the process_info.json data.
    """
    while True:
        try:
            return read_file(
                'mediafiles/jobs/{}/process_info.json'.format(ticket_number),
                True)
        except json.decoder.JSONDecodeError:
            pass
        except FileNotFoundError:
            raise PresQTValidationError(
                "PresQT Error: Invalid ticket number, '{}'.".format(
                    ticket_number), status.HTTP_404_NOT_FOUND)
Example #22
0
def zenodo_fetch_resource(token, resource_id):
    """
    Fetch the Zenodo resource matching the resource_id given.

    Parameters
    ----------
    token : str
        User's Zenodo token

    resource_id : str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the Zenodo resource.
    Dictionary must be in the following format:
    {
        "kind": "container",
        "kind_name": "repo",
        "id": "12345",
        "title": "23296359282_934200ec59_o.jpg",
        "date_created": "2019-05-13T14:54:17.129170Z",
        "date_modified": "2019-05-13T14:54:17.129170Z",
        "hashes": {
            "md5": "aaca7ef067dcab7cb8d79c36243823e4",
        },
        "extra": {
            "any": extra,
            "values": here
        }
    }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Let's first try to get the record with this id.
    if len(str(resource_id)) <= 7:
        base_url = "https://zenodo.org/api/records/{}".format(resource_id)
        zenodo_project = requests.get(base_url, params=auth_parameter)
        if zenodo_project.status_code == 200:
            # We found the record, pass the project to our function.
            resource = zenodo_fetch_resource_helper(zenodo_project.json(),
                                                    resource_id, True)
        else:
            # We need to get the resource from the depositions
            base_url = "https://zenodo.org/api/deposit/depositions/{}".format(
                resource_id)
            zenodo_project = requests.get(base_url, params=auth_parameter)
            if zenodo_project.status_code != 200:
                raise PresQTResponseException(
                    "The resource could not be found by the requesting user.",
                    status.HTTP_404_NOT_FOUND)
            else:
                resource = zenodo_fetch_resource_helper(
                    zenodo_project.json(), resource_id, False, False)

    else:
        # We got ourselves a file.
        base_url = "https://zenodo.org/api/files/{}".format(resource_id)
        zenodo_project = requests.get(base_url, params=auth_parameter)
        if zenodo_project.status_code == 200:
            # Contents returns a list of the single file
            resource = zenodo_fetch_resource_helper(
                zenodo_project.json()['contents'][0], resource_id, True, True)
        else:
            # We need to loop through the users depositions and see if the file is there.
            base_url = 'https://zenodo.org/api/deposit/depositions'
            zenodo_projects = requests.get(base_url,
                                           params=auth_parameter).json()
            for entry in zenodo_projects:
                project_files = requests.get(entry['links']['self'],
                                             params=auth_parameter).json()
                for file in project_files['files']:
                    if file['id'] == resource_id:
                        resource = {
                            "container": entry['id'],
                            "kind": "item",
                            "kind_name": "file",
                            "id": resource_id,
                            "identifier": None,
                            "title": file['filename'],
                            "date_created": None,
                            "date_modified": None,
                            "hashes": {
                                "md5": file['checksum']
                            },
                            "extra": {},
                            "children": []
                        }
                        # We found the file, break out of file loop
                        break
                # If the file wasn't found, we want to continue looping through the other projects.
                else:
                    continue
                # File has been found, break out of project loop
                break

            # File not found, raise exception
            else:
                raise PresQTResponseException(
                    "The resource could not be found by the requesting user.",
                    status.HTTP_404_NOT_FOUND)

    return resource
Example #23
0
def zenodo_fetch_resources(token, query_parameter):
    """
    Fetch all users repos from Zenodo.

    Parameters
    ----------
    token : str
        User's Zenodo token
    query_parameter : dict
        The search parameter passed to the API View
        Gets passed formatted as {'title': 'search_info'}

    Returns
    -------
    List of dictionary objects that represent Zenodo resources.
    Dictionary must be in the following format
        {
            "kind": "container",
            "kind_name": "folder",
            "id": "12345",
            "container": "None",
            "title": "Folder Name"
        }
    We are also returning a dictionary of pagination information.
    Dictionary must be in the following format:
        {
            "first_page": '1',
            "previous_page": None,
            "next_page": None,
            "last_page": '1',
            "total_pages": '1',
            "per_page": 10
        }
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    pages = {
        "first_page": '1',
        "previous_page": None,
        "next_page": None,
        "last_page": '1',
        "total_pages": '1',
        "per_page": 10
    }

    # Let's build them resources
    if query_parameter and 'page' not in query_parameter:
        if 'title' in query_parameter:
            search_parameters = query_parameter['title'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q=title:"{}"&sort=most_recent'.format(
                search_parameters)

        elif 'id' in query_parameter:
            base_url = 'https://zenodo.org/api/records?q=conceptrecid:{}'.format(
                query_parameter['id'])

        elif 'general' in query_parameter:
            search_parameters = query_parameter['general'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q={}'.format(
                search_parameters)

        elif 'keywords' in query_parameter:
            search_parameters = query_parameter['keywords'].replace(' ', '+')
            base_url = 'https://zenodo.org/api/records?q=keywords:{}'.format(
                search_parameters)

        zenodo_projects = requests.get(
            base_url, params=auth_parameter).json()['hits']['hits']
        is_record = True

    else:
        if query_parameter and 'page' in query_parameter:
            base_url = "https://zenodo.org/api/deposit/depositions?page={}".format(
                query_parameter['page'])

        else:
            base_url = "https://zenodo.org/api/deposit/depositions?page=1"

        zenodo_projects = requests.get(base_url, params=auth_parameter).json()
        is_record = False

    resources = zenodo_fetch_resources_helper(zenodo_projects, auth_parameter,
                                              is_record)

    return resources, pages
Example #24
0
def zenodo_upload_resource(token, resource_id, resource_main_dir,
                           hash_algorithm, file_duplicate_action):
    """
    Upload the files found in the resource_main_dir to the target.

    Parameters
    ----------
    token : str
        User's token.
    resource_id : str
        ID of the resource requested.
    resource_main_dir : str
        Path to the main directory for the resources to be uploaded.
    hash_algorithm : str
        Hash algorithm we are using to check for fixity.
    file_duplicate_action : str
        The action to take when a duplicate file is found

    Returns
    -------
    Dictionary with the following keys: values
        'resources_ignored' : Array of string file paths of files that were ignored when
        uploading the resource. Path should have the same base as resource_main_dir.
                                Example:
                                    ['path/to/ignored/file.pg', 'another/ignored/file.jpg']

        'resources_updated' : Array of string file paths of files that were updated when
         uploading the resource. Path should have the same base as resource_main_dir.
                                 Example:
                                    ['path/to/updated/file.jpg']
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                            {
                                'destinationUsername': '******'
                            }
        'file_metadata_list': List of dictionaries for each file that contains metadata
                              and hash info. Must be in the following format:
                                {
                                    "actionRootPath": '/path/on/disk',
                                    "destinationPath": '/path/on/target/destination',
                                    "title": 'file_title',
                                    "destinationHash": {'hash_algorithm': 'the_hash'}}
                                }
        'project_id': ID of the parent project for this upload. Needed for metadata upload.
    """
    try:
        auth_parameter = zenodo_validation_check(token)
    except PresQTValidationError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    os_path = next(os.walk(resource_main_dir))

    # Since Zenodo is a finite depth target, the checks for path validity have already been done.
    if resource_id:
        name_helper = requests.get(
            "https://zenodo.org/api/deposit/depositions/{}".format(
                resource_id),
            params=auth_parameter).json()

        try:
            project_title = name_helper['title']
        except KeyError:
            raise PresQTResponseException(
                "Can't find the resource with id {}, on Zenodo".format(
                    resource_id), status.HTTP_404_NOT_FOUND)
        action_metadata = {"destinationUsername": None}
        post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
            resource_id)

        upload_dict = zenodo_upload_loop(action_metadata, resource_id,
                                         resource_main_dir, post_url,
                                         auth_parameter, project_title)

    else:
        action_metadata = {"destinationUsername": None}
        project_title = os_path[1][0]
        name_helper = requests.get(
            "https://zenodo.org/api/deposit/depositions",
            params=auth_parameter).json()
        titles = [project['title'] for project in name_helper]
        new_title = get_duplicate_title(project_title, titles, ' (PresQT*)')
        resource_id = zenodo_upload_helper(auth_parameter, new_title)

        post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
            resource_id)

        upload_dict = zenodo_upload_loop(action_metadata, resource_id,
                                         resource_main_dir, post_url,
                                         auth_parameter, new_title)

    return upload_dict
Example #25
0
def curate_nd_download_resource(token, resource_id):
    """
    Fetch the requested resource from CurateND along with its hash information.

    Parameters
    ----------
    token : str
        User's CurateND token
    resource_id : str
        ID of the resource requested

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        curate_instance = CurateND(token)
    except PresQTInvalidTokenError:
        raise PresQTValidationError(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Get the resource
    resource = get_curate_nd_resource(resource_id, curate_instance)
    action_metadata = {"sourceUsername": resource.extra['depositor']}

    # Get all the files for the provided resources.
    files = []
    empty_containers = []
    if resource.kind_name == 'file':
        # Get the title of the Project to add to sourcePath
        project_title = requests.get(resource.extra['isPartOf'],
                                     headers={
                                         'X-Api-Token': '{}'.format(token)
                                     }).json()['title']

        # This is so we aren't missing the few extra keys that are pulled out for the PresQT payload
        resource.extra.update({
            "id": resource.id,
            "date_submitted": resource.date_submitted
        })

        binary_file, curate_hash = resource.download()

        files.append({
            'file':
            binary_file,
            'hashes': {
                'md5': curate_hash
            },
            'title':
            resource.title,
            # If the file is the only resource we are downloading then we don't need it's full path.
            'path':
            '/{}'.format(resource.title),
            'source_path':
            '/{}/{}'.format(project_title, resource.title),
            'extra_metadata':
            resource.extra
        })

    else:
        if not resource.extra['containedFiles']:
            empty_containers.append('{}'.format(resource.title))
        else:
            title_helper = {}
            hash_helper = {}
            file_urls = []
            project_title = resource.title
            file_metadata = []
            for file in resource.extra['containedFiles']:
                download_url = file['downloadUrl']
                contained_file = get_curate_nd_resource(
                    file['id'], curate_instance)
                file_metadata_dict = {
                    "title": contained_file.title,
                    "extra": contained_file.extra
                }
                file_metadata.append(file_metadata_dict)

                title_helper[download_url] = contained_file.title
                hash_helper[download_url] = contained_file.md5
                file_urls.append(download_url)

                title_helper[file['downloadUrl']] = file['label']
                file_urls.append(file['downloadUrl'])

            loop = asyncio.new_event_loop()
            download_data = loop.run_until_complete(
                async_main(file_urls, token))

            for file in download_data:
                title = title_helper[file['url']]
                hash = hash_helper[file['url']]
                files.append({
                    'file':
                    file['binary_content'],
                    'hashes': {
                        'md5': hash
                    },
                    'title':
                    title,
                    "source_path":
                    '/{}/{}'.format(project_title, title),
                    'path':
                    '/{}/{}'.format(resource.title, title),
                    'extra_metadata':
                    get_dictionary_from_list(file_metadata, 'title',
                                             title)['extra']
                })

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata
    }