コード例 #1
0
def search_validator(search_parameter):
    """
    Ensure the query parameter passed into the API view is valid.

    Parameters
    ----------
    search_parameter : dict
        The query parameter passed to the view.
    """
    # Check that the search query only has one key.
    if len(search_parameter.keys()) > 1:
        raise PresQTResponseException('PresQT Error: The search query is not formatted correctly.',
                                      status.HTTP_400_BAD_REQUEST)

    list_of_search_params = ['id', 'title']
    # Check that the query parameter is in list of accepted searches
    if list(search_parameter.keys())[0] not in list_of_search_params:
        raise PresQTResponseException('PresQT Error: The search query is not formatted correctly.',
                                      status.HTTP_400_BAD_REQUEST)

    # Ensure that there are no special characters in the search.
    regex = re.compile('[@_!#$%^&*()<>?/\|}{~:]')

    if 'title' in search_parameter:
        if (regex.search(search_parameter['title']) is not None):
            raise PresQTResponseException('PresQT Error: The search query is not formatted correctly.',
                                          status.HTTP_400_BAD_REQUEST)
コード例 #2
0
    def create_file(self, file_name, file_to_write, file_duplicate_action):
        """
        Upload a file to a container.

        Parameters
        ----------
        file_name : str
            Name of the file to create.
        file_to_write : bytes
            File to create.
        file_duplicate_action : str
            Flag for how to handle the case of the file already existing.

        Returns
        -------
        Class instance of the created file.
        """
        # When uploading a large file (>a few MB) that already exists
        # we sometimes get a ConnectionError instead of a status == 409.
        connection_error = False
        try:
            response = self.put(self._new_file_url,
                                params={'name': file_name},
                                data=file_to_write)
        except ConnectionError:
            connection_error = True

        # If the file is a duplicate then either ignore or update it
        if connection_error or response.status_code == 409:
            original_file = self.get_file_by_name(file_name)

            if file_duplicate_action == 'ignore':
                return 'ignored', original_file

            elif file_duplicate_action == 'update':
                # Only attempt to update the file if the new file is different than the original
                if hash_generator(file_to_write,
                                  'md5') != original_file.hashes['md5']:
                    response = self.get_file_by_name(file_name).update(
                        file_to_write)

                    if response.status_code == 200:
                        return 'updated', self.get_file_by_name(file_name)
                    else:
                        raise PresQTResponseException(
                            "Response has status code {} while updating file {}"
                            .format(response.status_code,
                                    file_name), status.HTTP_400_BAD_REQUEST)
                else:
                    return 'ignored', original_file

        # File uploaded successfully
        elif response.status_code == 201:
            return 'created', self.get_file_by_name(file_name)

        else:
            raise PresQTResponseException(
                "Response has status code {} while creating file {}".format(
                    response.status_code, file_name),
                status.HTTP_400_BAD_REQUEST)
コード例 #3
0
ファイル: structure_validation.py プロジェクト: ndlib/presqt
def structure_validation(instance):
    """
    This function will ensure that the structure of the files or project to upload are valid.

    Parameters
    ----------
    instance: BaseResource class instance
        Class we want to add the attributes to
    """
    # Get information about the data directory
    os_path, folders, files = next(os.walk(instance.data_directory))

    if len(folders) > 1:
        raise PresQTResponseException(
            "PresQT Error: Repository is not formatted correctly. Multiple directories exist at the top level.",
            status.HTTP_400_BAD_REQUEST)

    if len(
            files
    ) > 0 and instance.destination_resource_id is None and instance.action == 'resource_upload':
        raise PresQTResponseException(
            "PresQT Error: Repository is not formatted correctly. Files exist at the top level.",
            status.HTTP_400_BAD_REQUEST)

    if len(
            files
    ) > 0 and instance.destination_resource_id is None and instance.action == 'resource_transfer_in':
        raise PresQTResponseException(
            "PresQT Error: You need to select a resource to transfer into, as a single file can not be uploaded as a new project.",
            status.HTTP_400_BAD_REQUEST)
コード例 #4
0
ファイル: keywords.py プロジェクト: ndlib/presqt
def figshare_upload_keywords(token, resource_id, keywords):
    """
    Upload the keywords to a given resource id.

    Parameters
    ----------
    token: str
        User's FigShare token
    resource_id: str
        ID of the resource requested
    keywords: list
        List of new keywords to upload.

    Returns
    -------
    A dictionary object that represents the updated FigShare resource keywords.
    Dictionary must be in the following format:
        {
            "updated_keywords": [
                'eggs',
                'EGG',
                'Breakfast'
            ]
        }
    """
    split_id = resource_id.split(":")
    if len(split_id) == 3:
        raise PresQTResponseException(
            "FigShare projects/files do no have keywords.",
            status.HTTP_400_BAD_REQUEST)
    elif len(split_id) == 1:
        return {'updated_keywords': keywords, 'project_id': resource_id}

    from presqt.targets.figshare.functions.fetch import figshare_fetch_resource
    # This will raise an error if the id is invalid
    figshare_fetch_resource(token, resource_id)

    headers = {"Authorization": "token {}".format(token)}
    put_url = "https://api.figshare.com/v2/account/articles/{}".format(
        split_id[1])

    data = {"tags": keywords}

    response = requests.put(put_url, headers=headers, data=json.dumps(data))

    if response.status_code != 205:
        raise PresQTResponseException(
            "FigShare returned a {} error trying to update keywords.".format(
                response.status_code), status.HTTP_400_BAD_REQUEST)

    return {'updated_keywords': keywords, 'project_id': resource_id}
コード例 #5
0
def upload_parts(headers, upload_url, parts, file):
    """
    Upload the parts of the file to FigShare. File offsets are determined by the initial
    FigShare POST upload.

    Parameters
    ----------
    headers: dict
      The user's FigShare Auth headers
    upload_url: str
        The url to upload the file
    parts: list
        List of parts to be uploaded
    file: bytes
        The file itself
    """
    headers["Content-Type"] = "application/binary"
    for part in parts:
        file.seek(part['startOffset'])
        data = file.read(part['endOffset'] - part['startOffset'] + 1)
        upload_status = requests.put(
            "{}/{}".format(upload_url, part['partNo']), headers=headers, data=data)
        if upload_status.status_code != 200:
            raise PresQTResponseException(
                "FigShare returned an error trying to upload. Some items may still have been created on FigShare.", status.HTTP_400_BAD_REQUEST)
コード例 #6
0
def validation_check(token):
    """
    Ensure a proper FigShare API token has been provided.

    Parameters
    ----------
    token : str
        User's FigShare token

    Returns
    -------
    The properly formatted FigShare Auth header.
    """

    headers = {"Authorization": "token {}".format(token)}
    request = requests.get("http://api.figshare.com/v2/account",
                           headers=headers)

    if request.status_code == 403:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 403 status code.",
            status.HTTP_401_UNAUTHORIZED)
    username = request.json()['email']

    return headers, username
コード例 #7
0
def keyword_enhancer(keywords):
    """
    Send a list of keywords to SciGraph to be enhanced.

    Parameters
    ----------
    keywords: list
        The list of keywords to be enhanced.

    Returns
    -------
    The enhanced list of keywords.
    """
    if not keywords:
        raise PresQTResponseException(
            'There are no keywords to enhance for this resource.',
            status.HTTP_400_BAD_REQUEST)

    new_list_of_keywords = []
    final_list_of_keywords = []
    keyword_lower_case = [keyword.lower() for keyword in keywords]
    # Get the new keyword suggestions from Sci-Graph
    for keyword in keyword_lower_case:
        final_list_of_keywords.append(keyword)
        # Get SciGraph 'term' keyword suggestions
        response = requests.get(
            'http://ec-scigraph.sdsc.edu:9000/scigraph/vocabulary/term/{}?limit=20'
            .format(keyword))
        if response.status_code == 200:
            for label in response.json()[0]['labels']:
                label_lower_case = label.lower()
                if label_lower_case not in keyword_lower_case:
                    new_list_of_keywords.append(label_lower_case)
                    final_list_of_keywords.append(label_lower_case)
    return list(set(new_list_of_keywords)), list(set(final_list_of_keywords))
コード例 #8
0
ファイル: download_helper.py プロジェクト: ndlib/presqt
def zenodo_download_helper(is_record,
                           base_url,
                           auth_parameter,
                           files,
                           file_url=None):
    """
    This is used in Zenodo's download function.

    Parameters
    ----------
    is_record : boolean
        Flag for if the download is a public record
    base_url : str
        The url of the Zenodo project.
    auth_parameter : str
        The Authentication parameter expected by Zenodo.
    files : list
        The list of files to append to.
    file_url : str
        If the download is a single file, we also pass the link to the file.

    Returns
    -------
    The list of file dictionaries and action_metadata.
    """
    project_info = requests.get(base_url, auth_parameter)
    if project_info.status_code != 200:
        raise PresQTResponseException(
            'The response returned a 404 not found status code.',
            status.HTTP_404_NOT_FOUND)
    project_helper = project_info.json()

    if is_record is True:
        # Record endpoints are inconsistent, so there are a few checks that need to happen.
        try:
            username = project_helper['owners'][0]
        except KeyError:
            username = None
        try:
            project_name = project_helper['metadata']['title']
        except KeyError:
            project_name = None
    else:
        # The deposition endpoints are consistent
        username = project_helper['owner']
        project_name = project_helper['title']

    action_metadata = {"sourceUsername": username}

    if file_url:
        metadata_helper = requests.get(file_url, params=auth_parameter).json()
        files = zenodo_file_download_helper(auth_parameter, is_record,
                                            project_name, metadata_helper,
                                            files)

    else:
        files = zenodo_project_download_helper(is_record, project_name,
                                               project_helper, files)

    return files, action_metadata
コード例 #9
0
ファイル: base.py プロジェクト: craig-willis/presqt
    def get(self, url, *args, **kwargs):
        """
        Handle any errors that may pop up while making GET requests through the session.

        Parameters
        ----------
        url: str
            URL to make the GET request to.

        Returns
        -------
        HTTP Response object
        """
        response = self.session.get(url, *args, **kwargs)
        if response.status_code == 200:
            return response
        elif response.status_code == 410:
            raise PresQTResponseException(
                "The requested resource is no longer available.",
                status.HTTP_410_GONE)
        elif response.status_code == 404:
            raise OSFNotFoundError("Resource not found.",
                                   status.HTTP_404_NOT_FOUND)
        elif response.status_code == 403:
            raise OSFForbiddenError(
                "User does not have access to this resource with the token provided.",
                status.HTTP_403_FORBIDDEN)
コード例 #10
0
def create_repository(title, token):
    """
    Create a GitHub repository.

    Parameters
    ----------
    title : str
        The title of the repo being created
    token : str
        The users GitHub API token.
    """
    header = {"Authorization": "token {}".format(token)}
    repository_payload = {"name": title}
    response = requests.post('https://api.github.com/user/repos'.format(token),
                             headers=header,
                             data=json.dumps(repository_payload))

    if response.status_code == 201:
        return title

    elif response.status_code == 422:
        # This is a little gross, but there isn't a better way to do it that I'm aware of.
        from presqt.targets.github.utilities import github_paginated_data

        titles = [data['name'] for data in github_paginated_data(token)]
        title = get_duplicate_title(title, titles, '-PresQT*-')

        return create_repository(title, token)

    else:
        raise PresQTResponseException(
            "Response has status code {} while creating repository {}".format(
                response.status_code, title), status.HTTP_400_BAD_REQUEST)
コード例 #11
0
    def create_folder(self, folder_name):
        """
        Create a new sub-folder for this container.

        Parameters
        ----------
        folder_name : str
            Name of the folder to create.

        Returns
        -------
        Class instance of the created folder.
        """
        response = self.put(self._new_folder_url, params={'name': folder_name})
        if response.status_code == 409:
            return self.get_folder_by_name(folder_name)

        elif response.status_code == 201:
            return self.get_folder_by_name(folder_name)

        else:
            raise PresQTResponseException(
                "Response has status code {} while creating folder {}".format(
                    response.status_code, folder_name),
                status.HTTP_400_BAD_REQUEST)
コード例 #12
0
ファイル: main.py プロジェクト: ndlib/presqt
    def resource(self, resource_id):
        """
        Get an item or file with the given resource_id.

        Parameters
        ----------
        resource_id : str
            id of the resource we want to fetch.

        Returns
        -------
        Instance of the desired resource.
        """
        url = self.session.build_url(resource_id)
        response_data = self.get(url)
        response_json = response_data.json()
        # If the id given can't be found or is of type person, we want to raise an exception.
        # Error are only present in the payload if an error occured.
        if 'error' in response_json.keys():
            raise PresQTResponseException(
                'The resource, {}, could not be found on CurateND.'.format(
                    resource_id), status.HTTP_404_NOT_FOUND)

        try:
            response_json['containedFiles']
        except KeyError:
            # If the containedFiles key is not in the payload, we are creating a file.
            return File(response_data.json(), self.session)
        else:
            return Item(response_data.json(), self.session)
コード例 #13
0
def create_article(article_title, headers, project_id):
    """
    Create a FigShare article.

    Parameters
    ----------
    article_title : str
        The title of the project being created
    headers : dict
        The users FigShare Auth header
    """
    article_payload = {"title": article_title}

    response = requests.post(
        "https://api.figshare.com/v2/account/projects/{}/articles".format(project_id),
        headers=headers,
        data=json.dumps(article_payload)
    )

    if response.status_code != 201:
        raise PresQTResponseException(
            "Response has status code {} while creating article {}".format(response.status_code,
                                                                           article_title),
            status.HTTP_400_BAD_REQUEST)

    article_response = requests.get(response.json()['location'], headers=headers).json()

    return article_response['id']
コード例 #14
0
ファイル: main.py プロジェクト: ndlib/presqt
    def create_project(self, title):
        """-
        Create a project for this user.
        """
        titles = [project.title for project in self.projects()[1]]
        title = get_duplicate_title(title, titles, ' (PresQT*)')

        project_payload = {
            "data": {
                "type": "nodes",
                "attributes": {
                    "title": title,
                    "category": "project"
                }
            }
        }
        response = self.post(self.session.build_url('nodes'),
                             data=json.dumps(project_payload),
                             headers={'content-type': 'application/json'})

        if response.status_code == 201:
            return self.project(response.json()['data']['id'])
        else:
            raise PresQTResponseException(
                "Response has status code {} while creating project {}".format(
                    response.status_code, title), status.HTTP_400_BAD_REQUEST)
コード例 #15
0
ファイル: keywords.py プロジェクト: ndlib/presqt
def gitlab_fetch_keywords(token, resource_id):
    """
    Fetch the keywords of a given resource id.

    Parameters
    ----------
    token: str
        User's GitLab token
    resource_id: str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the GitLab resource keywords.
    Dictionary must be in the following format:
        {
            "topics": [
                "eggs",
                "ham",
                "bacon"
            ],
            "keywords": [
                "eggs",
                "ham",
                "bacon"
            ]
        }
    """
    headers, user_id = validation_check(token)

    from presqt.targets.gitlab.functions.fetch import gitlab_fetch_resource

    resource = gitlab_fetch_resource(token, resource_id)
    if resource['kind_name'] in ['dir', 'file']:
        raise PresQTResponseException(
            "On GitLab only projects have keywords, not files or directories, therefore PresQT keyword features are not supported at GitLab's file or directory levels.",
            status.HTTP_400_BAD_REQUEST)

    # LOOK INTO THE PROJECT FOR METADATA
    metadata = None
    metadata_url = "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA.json?ref=master".format(
        resource_id)
    metadata_file_response = requests.get(metadata_url, headers=headers)

    if metadata_file_response.status_code == 200:
        base64_metadata = base64.b64decode(
            metadata_file_response.json()['content'])
        metadata = json.loads(base64_metadata)
    if metadata:
        try:
            keywords = list(
                set(resource['extra']['tag_list'] +
                    metadata['allEnhancedKeywords']))
        except KeyError:
            keywords = list(set(resource['extra']['tag_list']))
    else:
        keywords = list(set(resource['extra']['tag_list']))

    return {'tag_list': keywords, 'keywords': keywords}
コード例 #16
0
ファイル: keywords.py プロジェクト: ndlib/presqt
def curate_nd_fetch_keywords(token, resource_id):
    """
    Fetch the keywords of a given resource id.

    Parameters
    ----------
    token: str
        User's CurateND token
    resource_id: str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the CurateND resource keywords.
    Dictionary must be in the following format:
        {
            "subject": [
                "eggs",
                "ham",
                "bacon"
            ],
            "keywords": [
                "eggs",
                "ham",
                "bacon"
            ]
        }
    """
    try:
        curate_instance = CurateND(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)
    resource = get_curate_nd_resource(resource_id, curate_instance)

    if 'subject' in resource.extra.keys():
        return {
            'subject': resource.extra['subject'],
            'keywords': resource.extra['subject']
        }

    else:
        raise PresQTResponseException(
            "The given resource id does not support keywords.",
            status.HTTP_400_BAD_REQUEST)
コード例 #17
0
ファイル: fetch.py プロジェクト: craig-willis/presqt
def curate_nd_fetch_resource(token, resource_id):
    """
    Fetch the CurateND resource matching the resource_id given.

    Parameters
    ----------
    token : str
        User's CurateND token

    resource_id : str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the CurateND resource.
    Dictionary must be in the following format:
    {
        "kind": "item",
        "kind_name": "file",
        "id": "12345",
        "title": "23296359282_934200ec59_o.jpg",
        "date_created": "2019-05-13T14:54:17.129170Z",
        "date_modified": "2019-05-13T14:54:17.129170Z",
        "hashes": {
            "md5": "aaca7ef067dcab7cb8d79c36243823e4",
        },
        "extra": {
            "any": extra,
            "values": here
        }
    }
    """
    try:
        curate_instance = CurateND(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED,
        )
    # Get the resource
    resource = get_curate_nd_resource(resource_id, curate_instance)
    resource_dict = {
        "kind": resource.kind,
        "kind_name": resource.kind_name,
        "id": resource.id,
        "title": resource.title,
        "date_created": resource.date_submitted,
        "date_modified": resource.modified,
        "hashes": {
            "md5": resource.md5
        },
        "extra": resource.extra
    }

    return resource_dict
コード例 #18
0
ファイル: keywords.py プロジェクト: ndlib/presqt
def gitlab_upload_keywords(token, resource_id, keywords):
    """
    Upload the keywords to a given resource id.

    Parameters
    ----------
    token: str
        User's GitLab token
    resource_id: str
        ID of the resource requested
    keywords: list
        List of new keywords to upload.

    Returns
    -------
    A dictionary object that represents the updated GitLab resource keywords.
    Dictionary must be in the following format:
        {
            "updated_keywords": [
                'eggs',
                'EGG',
                'Breakfast'
            ]
        }
    """
    from presqt.targets.gitlab.functions.fetch import gitlab_fetch_resource

    # This will raise an error if not a project.
    resource = gitlab_fetch_resource(token, resource_id)

    project_id = resource_id
    if resource['kind_name'] in ['file', 'dir']:
        project_id = resource['id'].partition(':')[0]

    headers = {"Private-Token": "{}".format(token)}
    put_url = 'https://gitlab.com/api/v4/projects/{}'.format(project_id)

    new_keywords = [keyword.lower() for keyword in keywords]

    new_keywords_string = ','.join(list(set(new_keywords)))

    response = requests.put("{}?tag_list={}".format(put_url,
                                                    new_keywords_string),
                            headers=headers)

    if response.status_code != 200:
        raise PresQTResponseException(
            "GitLab returned a {} error trying to update keywords.".format(
                response.status_code), status.HTTP_400_BAD_REQUEST)

    return {
        'updated_keywords': response.json()['tag_list'],
        'project_id': project_id
    }
コード例 #19
0
def zenodo_upload_helper(auth_parameter, project_title=None):
    """
    Initialize a new project on Zenodo.

    Parameters
    ----------
    auth_parameter : str
        The Authentication parameter expected by Zenodo.

    Returns
    -------
    The new Project ID.
    """

    headers = {"Content-Type": "application/json"}

    project_info = requests.post('https://zenodo.org/api/deposit/depositions',
                                 params=auth_parameter,
                                 json={},
                                 headers=headers)

    if project_info.status_code != 201:
        raise PresQTResponseException(
            "Zenodo returned a {} status code while trying to create the project."
            .format(project_info.status_code), status.HTTP_400_BAD_REQUEST)

    project_helper = project_info.json()
    project_id = project_helper['id']
    project_owner = project_helper['owner']

    # Now we need to add some info to the project.
    data = {
        'metadata': {
            'title': project_title,
            'upload_type': 'other',
            'description': 'PresQT Upload',
            'creators': [{
                'name': str(project_owner)
            }]
        }
    }

    requests.put(
        'https://zenodo.org/api/deposit/depositions/{}'.format(project_id),
        params=auth_parameter,
        data=json.dumps(data),
        headers=headers)

    return project_id
コード例 #20
0
def get_all_paginated_data(url, token):
    """
    Get all data for the requesting user.

    Parameters
    ----------
    url : str
        URL to the current data to get

    token: str
        User's OSF token

    Returns
    -------
    Data dictionary of the data points gathered up until now.
    """
    headers = {'Authorization': 'Bearer {}'.format(token)}
    # Get initial data
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        response_json = response.json()
    elif response.status_code == 410:
        raise PresQTResponseException("The requested resource is no longer available.", status.HTTP_410_GONE)
    elif response.status_code == 404:
        raise OSFNotFoundError("Resource not found.", status.HTTP_404_NOT_FOUND)
    elif response.status_code == 403:
        raise OSFForbiddenError(
        "User does not have access to this resource with the token provided.", status.HTTP_403_FORBIDDEN)

    data = response_json['data']
    meta = response_json['links']['meta']

    # Calculate pagination pages
    if '?filter' in url or '?page' in url:
        # We already have all the data we need for this request
        return data
    else:
        page_total = get_page_total(meta['total'], meta['per_page'])
        url_list = ['{}?page={}'.format(url, number) for number in range(2, page_total + 1)]

    # Call all pagination pages asynchronously
    from presqt.targets.osf.utilities.utils.async_functions import run_urls_async
    children_data = run_urls_async(url_list, headers)
    [data.extend(child['data']) for child in children_data]

    return data
コード例 #21
0
ファイル: fetch.py プロジェクト: craig-willis/presqt
def osf_fetch_resources(token, search_parameter):
    """
    Fetch all OSF resources for the user connected to the given token.

    Parameters
    ----------
    token : str
        User's OSF token
    search_parameter : dict
        The search parameter passed to the API View
        Gets passed formatted as {'title': 'search_info'}

    Returns
    -------
    List of dictionary objects that represent OSF resources.
    Dictionary must be in the following format:
        {
            "kind": "container",
            "kind_name": "folder",
            "id": "12345",
            "container": "None",
            "title": "Folder Name",
        }
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    if search_parameter:
        if 'title' in search_parameter:
            # Format the search that is coming in to be passed to the OSF API
            search_parameters = search_parameter['title'].replace(' ', '+')
            url = 'https://api.osf.io/v2/nodes/?filter[title]={}'.format(
                search_parameters)
        elif 'id' in search_parameter:
            url = 'https://api.osf.io/v2/nodes/?filter[id]={}'.format(
                search_parameter['id'])
    else:
        url = None
    try:
        resources = osf_instance.get_resources(url)
    except PresQTValidationError as e:
        raise e
    return resources
コード例 #22
0
ファイル: fetch.py プロジェクト: craig-willis/presqt
def curate_nd_fetch_resources(token, search_parameter):
    """
    Fetch all CurateND resources for the user connected to the given token.

    Parameters
    ----------
    token : str
        User's CurateND token
    search_parameter : dict
        The search parameter passed to the API View
        Gets passed formatted as {'title': 'search_info'}

    Returns
    -------
    List of dictionary objects that represent CurateND resources.
    Dictionary must be in the following format:
        {
            "kind": "container",
            "kind_name": "folder",
            "id": "12345",
            "container": "None",
            "title": "Folder Name",
        }
    """
    try:
        curate_instance = CurateND(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    if search_parameter:
        if 'title' in search_parameter:
            # Format the search that is coming in to be passed to the Curate API
            search_parameters = search_parameter['title'].replace(' ', '+')
            search_url = 'https://curate.nd.edu/api/items?q={}'.format(
                search_parameters)
            try:
                resources = curate_instance.get_resources(search_url)
            except PresQTValidationError as e:
                raise e
        elif 'id' in search_parameter:
            resources = get_curate_nd_resources_by_id(token,
                                                      search_parameter['id'])
    else:
        resources = curate_instance.get_resources()
    return resources
コード例 #23
0
ファイル: get_osf_resource.py プロジェクト: ndlib/presqt
def get_osf_resource(resource_id, osf_instance):
    """
    Get an OSF resource based on a given id.

    Parameters
    ----------
    resource_id : str
        Resource ID to retrieve.

    osf_instance : OSF class object
        Instance of the OSF class we want to use to get the resource from.

    Returns
    -------
    The class object for the resource requested.
    """
    # Since we don't know the file type, try and get the resource as a storage provider first.
    resource_id_split = resource_id.split(':')
    try:
        resource = osf_instance.project(resource_id_split[0]).storage(resource_id_split[1])
    except (OSFNotFoundError, IndexError):
        pass
    else:
        return resource

    # If it's not a storage provider then check if it's a file or folder.
    try:
        resource = osf_instance.resource(resource_id)
    except OSFNotFoundError:
        pass
    else:
        return resource

    # If it's not a folder/file then it's a project or it doesn't exist.
    try:
        resource = osf_instance.project(resource_id)
    except OSFNotFoundError as e:
        raise PresQTResponseException(
            "Resource with id '{}' not found for this user.".format(resource_id), e.status_code)
    else:
        return resource
コード例 #24
0
ファイル: validation_check.py プロジェクト: ndlib/presqt
def validation_check(token):
    """
    Ensure a proper GitLab API token has been provided.

    Parameters
    ----------
    token : str
        User's GitLab token

    Returns
    -------
    The requesting user's username and properly formatted GitLab Auth header.
    """
    headers = {"Private-Token": "{}".format(token)}
    request = requests.get("https://gitlab.com/api/v4/user", headers=headers)

    if request.status_code == 401:
        raise PresQTResponseException("Token is invalid. Response returned a 401 status code.",
                                      status.HTTP_401_UNAUTHORIZED)
    else:
        return headers, request.json()['id']
コード例 #25
0
ファイル: validation_check.py プロジェクト: ndlib/presqt
def validation_check(token):
    """
    Ensure a proper GitHub API token has been provided.

    Parameters
    ----------
    token : str
        User's GitHub token

    Returns
    -------
    The requesting user's username and properly formatted GitHub Auth header.
    """
    header = {"Authorization": "token {}".format(token), "Accept": "application/vnd.github.mercy-preview+json"}
    validation = requests.get("https://api.github.com/user", headers=header).json()
    try:
        username = validation['login']
    except:
        raise PresQTResponseException("Token is invalid. Response returned a 401 status code.",
                                      status.HTTP_401_UNAUTHORIZED)

    return header, username
コード例 #26
0
def create_project(project_title, headers, token):
    """
    Create a FigShare repository.

    Parameters
    ----------
    project_title : str
        The title of the project being created
    headers : dict
        The users FigShare Auth header
    token : str
        The users Auth token
    """
    from presqt.targets.figshare.functions.fetch import figshare_fetch_resources
    # Get all the project titles
    figshare_resources, pages = figshare_fetch_resources(token, None)

    titles = [
        data['title'] for data in figshare_resources
        if data['kind_name'] == 'project'
    ]
    title = get_duplicate_title(project_title, titles, '(PresQT*)')

    project_payload = {"title": title}

    response = requests.post("https://api.figshare.com/v2/account/projects",
                             headers=headers,
                             data=json.dumps(project_payload))

    if response.status_code == 201:
        # The second item returned is the project id.
        return project_title, response.json()['location'].rpartition('/')[2]

    else:
        raise PresQTResponseException(
            "Response has status code {} while creating project {}".format(
                response.status_code, project_title),
            status.HTTP_400_BAD_REQUEST)
コード例 #27
0
def get_curate_nd_resource(resource_id, curate_nd_instance):
    """
    Get a CurateND resource based on a given id.

    Parameters
    ----------
    resource_id : str
        Resource ID to retrieve

    curate_nd_instance : CurateND class object
        Instance of the CurateND class we want to use to get the resource from.

    Returns
    -------
    The class object for the resource requested.
    """
    try:
        resource = curate_nd_instance.resource(resource_id)
    except CurateNDForbiddenError as e:
        raise PresQTResponseException(
            "User does not have access to this resource with the token provided.",
            e.status_code)
    else:
        return resource
コード例 #28
0
ファイル: upload.py プロジェクト: ndlib/presqt
def figshare_upload_resource(token, resource_id, resource_main_dir,
                             hash_algorithm, file_duplicate_action,
                             process_info_path, action):
    """
    Upload the files found in the resource_main_dir to the target.

    Parameters
    ----------
    token : str
        User's token.
    resource_id : str
        ID of the resource requested.
    resource_main_dir : str
        Path to the main directory for the resources to be uploaded.
    hash_algorithm : str
        Hash algorithm we are using to check for fixity.
    file_duplicate_action : str
        The action to take when a duplicate file is found
    process_info_path: str
        Path to the process info file that keeps track of the action's progress
    action: str
        The action being performed

    Returns
    -------
    Dictionary with the following keys: values
        'resources_ignored' : Array of string file paths of files that were ignored when
        uploading the resource. Path should have the same base as resource_main_dir.
                                Example:
                                    ['path/to/ignored/file.pg', 'another/ignored/file.jpg']

        'resources_updated' : Array of string file paths of files that were updated when
         uploading the resource. Path should have the same base as resource_main_dir.
                                 Example:
                                    ['path/to/updated/file.jpg']
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                            {
                                'destinationUsername': '******'
                            }
        'file_metadata_list': List of dictionaries for each file that contains metadata
                              and hash info. Must be in the following format:
                                {
                                    "actionRootPath": '/path/on/disk',
                                    "destinationPath": '/path/on/target/destination',
                                    "title": 'file_title',
                                    "destinationHash": {'hash_algorithm': 'the_hash'}}
                                }
        'project_id': ID of the parent project for this upload. Needed for metadata upload. 
        'project_link': The link to either the resource or the home page of the user if not available through API

    FigShare's Upload Process
        1. Initiate new file upload (POST) within the article. Send file size, md5, and name but no file contents yet.
        2. Send a GET request to the 'Uploader Service' to determine that the status is "Pending" and how many parts to split the upload into.
        3. Split the file into the correct number of parts and upload each using a PUT request.
        4. Send a POST request to complete the upload.
    """
    try:
        headers, username = validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    os_path = next(os.walk(resource_main_dir))
    total_files = upload_total_files(resource_main_dir)
    # Update process info file
    update_process_info(process_info_path, total_files, action, 'upload')
    update_process_info_message(process_info_path, action,
                                "Uploading files to FigShare...")

    resources_ignored = []
    resources_updated = []
    file_metadata_list = []
    action_metadata = {'destinationUsername': username}

    # Upload a new project
    if not resource_id:
        project_title = os_path[1][0]
        # Create a new project with the name being the top level directory's name.
        project_name, project_id = create_project(project_title, headers,
                                                  token)
        # Create article, for now we'll name it the same as the project
        article_id = create_article(project_title, headers, project_id)
    else:
        # Upload to an existing project
        split_id = str(resource_id).split(":")
        project_id = split_id[0]

        try:
            project_title = requests.get(
                "https://api.figshare.com/v2/account/projects/{}".format(
                    project_id),
                headers=headers).json()['title']
        except KeyError:
            raise PresQTResponseException(
                "Project with id, {}, could not be found by the requesting user."
                .format(project_id), status.HTTP_400_BAD_REQUEST)

        if len(split_id) == 1:
            # We only have a project and we need to make a new article id
            # Check to see if an article with this name already exists
            articles = requests.get(
                "https://api.figshare.com/v2/account/projects/{}/articles".
                format(project_id),
                headers=headers).json()
            article_titles = [article['title'] for article in articles]
            new_title = get_duplicate_title(project_title, article_titles,
                                            "(PresQT*)")
            article_id = create_article(new_title, headers, resource_id)
        elif len(split_id) == 2:
            article_id = split_id[1]
        else:
            # Can't upload to file
            raise PresQTResponseException(
                "Can not upload into an existing file.",
                status.HTTP_400_BAD_REQUEST)

    # Get the article title
    try:
        article_title = requests.get(
            "https://api.figshare.com/v2/account/articles/{}".format(
                article_id),
            headers=headers).json()['title']
    except KeyError:
        raise PresQTResponseException(
            "Article with id, {}, could not be found by the requesting user.".
            format(article_id), status.HTTP_400_BAD_REQUEST)

    # Get md5, size and name of zip file to be uploaded
    for path, subdirs, files in os.walk(resource_main_dir):
        for name in files:
            file_info = open(os.path.join(path, name), 'rb')
            zip_hash = hash_generator(file_info.read(), 'md5')

            figshare_file_upload_process(file_info,
                                         headers,
                                         name,
                                         article_id,
                                         file_type='zip',
                                         path=path)

            file_metadata_list.append({
                'actionRootPath':
                os.path.join(path, name),
                'destinationPath':
                '/{}/{}/{}'.format(project_title, article_title, name),
                'title':
                name,
                'destinationHash':
                zip_hash
            })
            increment_process_info(process_info_path, action, 'upload')

    return {
        "resources_ignored": resources_ignored,
        "resources_updated": resources_updated,
        "action_metadata": action_metadata,
        "file_metadata_list": file_metadata_list,
        "project_id": "{}:{}".format(project_id, article_id),
        "project_link": "https://figshare.com/account/home#/projects"
    }
コード例 #29
0
ファイル: download.py プロジェクト: craig-willis/presqt
def github_download_resource(token, resource_id):
    """
    Fetch the requested resource from GitHub along with its hash information.

    Parameters
    ----------
    token : str
        User's GitHub token
    resource_id : str
        ID of the resource requested

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                              'sourceUsername': '******',
                              }
    """
    try:
        header, username = validation_check(token)
    except PresQTResponseException:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    project_url = 'https://api.github.com/repositories/{}'.format(resource_id)

    response = requests.get(project_url, headers=header)

    if response.status_code != 200:
        raise PresQTResponseException(
            'The resource with id, {}, does not exist for this user.'.format(
                resource_id), status.HTTP_404_NOT_FOUND)
    data = response.json()

    repo_name = data['name']
    # Strip off the unnecessary {+path} that's included in the url
    # Example: https://api.github.com/repos/eggyboi/djangoblog/contents/{+path} becomes
    # https://api.github.com/repos/eggyboi/djangoblog/contents
    contents_url = data['contents_url'].partition('/{+path}')[0]

    files, empty_containers, action_metadata = download_content(
        username, contents_url, header, repo_name, [])
    file_urls = [file['file'] for file in files]

    loop = asyncio.new_event_loop()
    download_data = loop.run_until_complete(async_main(file_urls, header))
    # Go through the file dictionaries and replace the file path with the binary_content
    for file in files:
        file['file'] = get_dictionary_from_list(download_data, 'url',
                                                file['file'])['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata
    }
コード例 #30
0
    def _download_resource(self):
        """
        Downloads the resources from the target, performs a fixity check,
        zips them up in BagIt format.
        """
        action = 'resource_download'

        # Write the process id to the process_info file
        self.process_info_obj[
            'function_process_id'] = self.function_process.pid
        update_or_create_process_info(self.process_info_obj, self.action,
                                      self.ticket_number)

        # Fetch the proper function to call
        func = FunctionRouter.get_function(self.source_target_name, action)

        # Fetch the resources. func_dict is in the format:
        #   {
        #       'resources': files,
        #       'empty_containers': empty_containers,
        #       'action_metadata': action_metadata
        #   }
        try:
            func_dict = func(self.source_token, self.source_resource_id,
                             self.process_info_path, self.action)
            # If the resource is being transferred, has only one file, and that file is the
            # PresQT metadata then raise an error.
            if self.action == 'resource_transfer_in' and \
                    len(func_dict['resources']) == 1 \
                    and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json':
                raise PresQTResponseException(
                    'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.',
                    status.HTTP_400_BAD_REQUEST)
        except PresQTResponseException as e:
            # TODO: Functionalize this error section
            # Catch any errors that happen within the target fetch.
            # Update the server process_info file appropriately.
            self.process_info_obj['status_code'] = e.status_code
            self.process_info_obj['status'] = 'failed'
            if self.action == 'resource_transfer_in':
                self.process_info_obj['download_status'] = 'failed'
            self.process_info_obj['message'] = e.data
            # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because
            # it's an incomplete/failed directory.
            self.process_info_obj['expiration'] = str(timezone.now() +
                                                      relativedelta(hours=1))
            update_or_create_process_info(self.process_info_obj, self.action,
                                          self.ticket_number)

            return False

        # Get the latest contents of the job's process_info.json file
        self.process_info_obj = read_file(self.process_info_path,
                                          True)[self.action]

        # The directory all files should be saved in.
        self.resource_main_dir = os.path.join(self.ticket_path,
                                              self.base_directory_name)
        update_process_info_message(
            self.process_info_path, self.action,
            'Performing fixity checks and gathering metadata...')

        self.extra_metadata = func_dict['extra_metadata']
        # For each resource, perform fixity check, gather metadata, and save it to disk.
        fixity_info = []
        self.download_fixity = True
        self.download_failed_fixity = []
        self.source_fts_metadata_actions = []
        self.new_fts_metadata_files = []
        self.all_keywords = []
        self.initial_keywords = []
        self.manual_keywords = []
        self.enhanced_keywords = []
        for resource in func_dict['resources']:
            # Perform the fixity check and add extra info to the returned fixity object.
            # Note: This method of calling the function needs to stay this way for test Mock
            fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker(
                resource)
            fixity_info.append(fixity_obj)

            if not fixity_obj['fixity']:
                self.download_failed_fixity.append(resource['path'])

            # Create metadata for this resource or validate the metadata file
            if resource['title'] == 'PRESQT_FTS_METADATA.json':
                is_valid = validate_metadata(self, resource)
                if not is_valid:
                    resource['path'] = resource['path'].replace(
                        'PRESQT_FTS_METADATA.json',
                        'INVALID_PRESQT_FTS_METADATA.json')
                    create_download_metadata(self, resource, fixity_obj)
                    write_file(
                        '{}{}'.format(self.resource_main_dir,
                                      resource['path']), resource['file'])
            else:
                create_download_metadata(self, resource, fixity_obj)
                write_file(
                    '{}{}'.format(self.resource_main_dir, resource['path']),
                    resource['file'])

        # Enhance the source keywords
        self.keyword_dict = {}
        if self.action == 'resource_transfer_in':
            if self.supports_keywords:
                if self.keyword_action == 'automatic':
                    self.keyword_dict = automatic_keywords(self)
                elif self.keyword_action == 'manual':
                    self.keyword_dict = manual_keywords(self)
        self.keyword_enhancement_successful = True

        # Create PresQT action metadata
        update_process_info_message(self.process_info_path, self.action,
                                    "Creating PRESQT_FTS_METADATA...")
        self.source_username = func_dict['action_metadata']['sourceUsername']
        if self.action == 'resource_transfer_in':
            source_target_data = get_target_data(self.source_target_name)
            destination_target_data = get_target_data(
                self.destination_target_name)
            self.details = "PresQT Transfer from {} to {}".format(
                source_target_data['readable_name'],
                destination_target_data['readable_name'])
        else:
            source_target_data = get_target_data(self.source_target_name)
            self.details = "PresQT Download from {}".format(
                source_target_data['readable_name'])

        self.action_metadata = {
            'id': str(uuid4()),
            'details': self.details,
            'actionDateTime': str(timezone.now()),
            'actionType': self.action,
            'sourceTargetName': self.source_target_name,
            'sourceUsername': self.source_username,
            'destinationTargetName': 'Local Machine',
            'destinationUsername': None,
            'keywords': self.keyword_dict,
            'files': {
                'created': self.new_fts_metadata_files,
                'updated': [],
                'ignored': []
            }
        }

        # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write
        # resources
        # Write empty containers to disk
        for container_path in func_dict['empty_containers']:
            # Make sure the container_path has a '/' and the beginning and end
            if container_path[-1] != '/':
                container_path += '/'
            if container_path[0] != '/':
                container_path = '/' + container_path
            os.makedirs(
                os.path.dirname('{}{}'.format(self.resource_main_dir,
                                              container_path)))

        # If we are transferring the downloaded resource then bag it for the resource_upload method
        if self.action == 'resource_transfer_in':
            self.action_metadata[
                'destinationTargetName'] = self.destination_target_name

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])
            self.process_info_obj['download_status'] = get_action_message(
                self, 'Download', self.download_fixity, True,
                self.action_metadata)
            return True
        # If we are only downloading the resource then create metadata, bag, zip,
        # and update the server process file.
        else:
            # Create Metadata file
            final_fts_metadata_data = create_fts_metadata(
                self.all_keywords, self.action_metadata,
                self.source_fts_metadata_actions, self.extra_metadata)

            # Validate the final metadata
            metadata_validation = schema_validator(
                'presqt/json_schemas/metadata_schema.json',
                final_fts_metadata_data)
            self.process_info_obj['message'] = get_action_message(
                self, 'Download', self.download_fixity, metadata_validation,
                self.action_metadata)

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])

            # Write metadata file.
            write_file(
                os.path.join(self.resource_main_dir,
                             'PRESQT_FTS_METADATA.json'),
                final_fts_metadata_data, True)

            # Add the fixity file to the disk directory
            write_file(
                os.path.join(self.resource_main_dir, 'fixity_info.json'),
                fixity_info, True)

            # Zip the BagIt 'bag' to send forward.
            zip_directory(self.resource_main_dir,
                          "{}.zip".format(self.resource_main_dir),
                          self.ticket_path)

            # Everything was a success so update the server metadata file.
            self.process_info_obj['status_code'] = '200'
            self.process_info_obj['status'] = 'finished'
            self.process_info_obj['zip_name'] = '{}.zip'.format(
                self.base_directory_name)
            self.process_info_obj[
                'failed_fixity'] = self.download_failed_fixity
            update_or_create_process_info(self.process_info_obj, self.action,
                                          self.ticket_number)
            if self.email:
                # Build link to retrieve the download
                download_reverse = reverse('job_status',
                                           kwargs={
                                               "action": "download",
                                               "response_format": "zip"
                                           })
                download_url = self.request.build_absolute_uri(
                    download_reverse)
                final_download_url = "{}?ticket_number={}".format(
                    download_url, self.ticket_number)
                context = {
                    "download_url": final_download_url,
                    "download_message": self.process_info_obj['message'],
                    "failed_fixity": self.process_info_obj['failed_fixity']
                }
                email_blaster(self.email, "PresQT Download Complete", context,
                              "emails/download_email.html")

        return True