Beispiel #1
0
def osf_upload_keywords(token, resource_id, keywords):
    """
    Upload the keywords to a given resource id.

    Parameters
    ----------
    token: str
        User's OSF token
    resource_id: str
        ID of the resource requested
    keywords: list
        List of new keywords to upload

    Returns
    -------
    A dictionary object that represents the updated OSF resource keywords.
    Dictionary must be in the following format:
        {
            "updated_keywords": [
                'eggs',
                'EGG',
                'Breakfast'
            ]
        }
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)
    headers = {
        'Authorization': 'Bearer {}'.format(token),
        'Content-Type': 'application/json'
    }

    resource = get_osf_resource(resource_id, osf_instance)

    if resource.kind_name == 'project':
        patch_url = 'https://api.osf.io/v2/nodes/{}/'.format(resource_id)
        data = {
            "data": {
                "type": "nodes",
                "id": resource_id,
                "attributes": {
                    "tags": keywords
                }
            }
        }

    elif resource.kind_name == 'file':
        patch_url = 'https://api.osf.io/v2/files/{}/'.format(resource_id)
        data = {
            "data": {
                "type": "files",
                "id": resource_id,
                "attributes": {
                    "tags": keywords
                }
            }
        }

    elif resource.kind_name == 'folder':
        patch_url = 'https://api.osf.io/v2/nodes/{}/'.format(resource_id)
        data = {
            "data": {
                "type": "nodes",
                "id": resource_id,
                "attributes": {
                    "tags": keywords
                }
            }
        }

    response = requests.patch(patch_url,
                              headers=headers,
                              data=json.dumps(data))
    if response.status_code != 200:
        raise PresQTResponseException(
            "OSF returned a {} error trying to update keywords.".format(
                response.status_code), status.HTTP_400_BAD_REQUEST)

    return {
        "updated_keywords": response.json()['data']['attributes']['tags'],
        "project_id": get_project_id(resource)
    }
Beispiel #2
0
def osf_fetch_keywords(token, resource_id):
    """
    Fetch the keywords of a given resource id.

    Parameters
    ----------
    token: str
        User's OSF token
    resource_id: str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the OSF resource keywords.
    Dictionary must be in the following format:
        {
            "tags": [
                "eggs",
                "ham",
                "bacon"
            ],
            "keywords": [
                "eggs",
                "ham",
                "bacon"
            ]
        }
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)
    header = {'Authorization': 'Bearer {}'.format(token)}
    resource = get_osf_resource(resource_id, osf_instance)

    if resource.kind_name == 'storage':
        raise PresQTResponseException(
            "On OSF only projects, folders and files have keywords, not storages, therefore PresQT keyword features are not supported at OSF's storage level.",
            status.HTTP_400_BAD_REQUEST)

    # Find out if metadata exists for this project
    project_id = get_project_id(resource)
    project_data = osf_instance._get_all_paginated_data(
        'https://api.osf.io/v2/nodes/{}/files/osfstorage'.format(project_id))

    metadata = None
    for data in project_data:
        if data['attributes']['name'] == "PRESQT_FTS_METADATA.json":
            metadata_file = requests.get(data['links']['move'],
                                         headers=header).content
            # Update the existing metadata
            metadata = json.loads(metadata_file)

    if metadata:
        try:
            keywords = list(set(resource.tags + metadata['allKeywords']))
        except KeyError:
            keywords = list(set(resource.tags))
    else:
        keywords = list(set(resource.tags))

    return {'tags': keywords, 'keywords': keywords}
Beispiel #3
0
def osf_download_resource(token, resource_id):
    """
    Fetch the requested resource from OSF along with its hash information.

    Parameters
    ----------
    token : str
        User's OSF token

    resource_id : str
        ID of the resource requested

    Returns
    -------
    Dictionary with the following keys: values
        'resources': List of dictionary objects that each hold a file and its information.
                     Dictionary must be in the following format:
                         {
                            'file': binary_file,
                            'hashes': {'hash_algorithm': 'the_hash'},
                            'title': 'file.jpg',
                            'path': '/path/to/file',
                            'source_path: '/full/path/to/file',
                            'extra_metadata': {'any': 'extra'}
                         }
        'empty_containers: List of string paths representing empty containers that must be written.
                              Example: ['empty/folder/to/write/', 'another/empty/folder/]
        'action_metadata': Dictionary containing action metadata. Must be in the following format:
                              {
                                'sourceUsername': '******',
                              }
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Get contributor name
    contributor_name = requests.get(
        'https://api.osf.io/v2/users/me/',
        headers={
            'Authorization': 'Bearer {}'.format(token)
        }).json()['data']['attributes']['full_name']
    action_metadata = {"sourceUsername": contributor_name}
    # Get the resource
    resource = get_osf_resource(resource_id, osf_instance)

    # Get all files for the provided resources.
    # The 'path' value will be the path that the file is eventually saved in. The root of the
    # path should be the resource.
    files = []
    empty_containers = []
    if resource.kind_name == 'file':
        project = osf_instance.project(resource.parent_project_id)
        files.append({
            "file":
            resource.download(),
            "hashes":
            resource.hashes,
            "title":
            resource.title,
            # If the file is the only resource we are downloading then we don't need it's full path
            "path":
            '/{}'.format(resource.title),
            "source_path":
            '/{}/{}{}'.format(project.title, resource.provider,
                              resource.materialized_path),
            "extra_metadata":
            osf_download_metadata(resource)
        })
    else:
        if resource.kind_name == 'project':
            resource.get_all_files('', files, empty_containers)
            project = resource
        elif resource.kind_name == 'storage':
            resource.get_all_files('/{}'.format(resource.title), files,
                                   empty_containers)
            project = osf_instance.project(resource.node)
        else:
            resource.get_all_files('', files, empty_containers)
            project = osf_instance.project(resource.parent_project_id)
            for file in files:
                # File Path needs to start at the folder and strip everything before it.
                # Example: If the resource is 'Docs2' and the starting path is
                # '/Project/Storage/Docs1/Docs2/file.jpeg' then the final path
                # needs to be '/Docs2/file.jpeg'
                path_to_strip = resource.materialized_path[:-(
                    len(resource.title) + 2)]
                file['path'] = file['file'].materialized_path[len(path_to_strip
                                                                  ):]

        # Asynchronously make all download requests
        file_urls = [file['file'].download_url for file in files]
        loop = asyncio.new_event_loop()
        download_data = loop.run_until_complete(async_main(file_urls, token))

        # Go through the file dictionaries and replace the file class with the binary_content
        for file in files:
            file['source_path'] = '/{}/{}{}'.format(
                project.title, file['file'].provider,
                file['file'].materialized_path)
            file['file'] = get_dictionary_from_list(
                download_data, 'url',
                file['file'].download_url)['binary_content']

    return {
        'resources': files,
        'empty_containers': empty_containers,
        'action_metadata': action_metadata
    }
Beispiel #4
0
def osf_upload_resource(token, resource_id, resource_main_dir, hash_algorithm,
                        file_duplicate_action, process_info_path, action):
    """
    Upload the files found in the resource_main_dir to OSF.

    Parameters
    ----------
    token : str
        User's OSF token.
    resource_id : str
        ID of the resource requested.
    resource_main_dir : str
        Path to the main directory for the resources to be uploaded.
    hash_algorithm : str
        Hash algorithm we are using to check for fixity.
    file_duplicate_action : str
        The action to take when a duplicate file is found
    process_info_path: str
        Path to the process info file that keeps track of the action's progress
    action: str
        The action being performed

    Returns
    -------
    Dictionary with the following keys: values
        'resources_ignored' : Array of string file paths of files that were ignored when
        uploading the resource. Path should have the same base as resource_main_dir.
                                Example:
                                    ['path/to/ignored/file.pg', 'another/ignored/file.jpg']

        'resources_updated' : Array of string file paths of files that were updated when
         uploading the resource. Path should have the same base as resource_main_dir.
                                 Example:
                                    ['path/to/updated/file.jpg']
        'action_metadata': Dictionary containing FTS action metadata. Must be in the following format:
                            {
                                'destinationUsername': '******'
                            }
        'file_metadata_list': List of dictionaries for each file that contains FTS metadata
                              and hash info. Must be in the following format:
                                {
                                    "actionRootPath": '/path/on/disk',
                                    "destinationPath": '/path/on/target/destination',
                                    "title": 'file_title',
                                    "destinationHash": {'hash_algorithm': 'the_hash'}}
                                }
        'project_id': ID of the parent project for this upload. Needed for metadata upload.
        'project_link': The link to either the resource or the home page of the user if not available through API
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    # Get contributor name
    contributor_name = requests.get(
        'https://api.osf.io/v2/users/me/',
        headers={
            'Authorization': 'Bearer {}'.format(token)
        }).json()['data']['attributes']['full_name']
    action_metadata = {"destinationUsername": contributor_name}

    hashes = {}
    resources_ignored = []
    resources_updated = []
    file_metadata_list = []
    # Get total amount of files
    total_files = upload_total_files(resource_main_dir)
    update_process_info(process_info_path, total_files, action, 'upload')
    update_process_info_message(process_info_path, action,
                                "Uploading files to OSF...")

    # If we are uploading to an existing container
    if resource_id:
        # Get the resource
        resource = get_osf_resource(resource_id, osf_instance)

        # Resource being uploaded to must not be a file
        if resource.kind_name == 'file':
            raise PresQTResponseException(
                "The Resource provided, {}, is not a container".format(
                    resource_id), status.HTTP_400_BAD_REQUEST)

        elif resource.kind_name == 'project':
            project = resource
            project_id = project.id
            resource.storage('osfstorage').create_directory(
                resource_main_dir, file_duplicate_action, hashes,
                resources_ignored, resources_updated, file_metadata_list,
                process_info_path, action)

        else:  # Folder or Storage
            resource.create_directory(resource_main_dir, file_duplicate_action,
                                      hashes, resources_ignored,
                                      resources_updated, file_metadata_list,
                                      process_info_path, action)
            # Get the project class for later metadata work
            if resource.kind_name == 'storage':
                project_id = resource.node
            else:
                project_id = resource.parent_project_id
            project = osf_instance.project(project_id)

    # else we are uploading a new project
    else:
        os_path = next(os.walk(resource_main_dir))

        # Get the actual data we want to upload
        data_to_upload_path = '{}/{}'.format(os_path[0], os_path[1][0])

        # Create a new project with the name being the top level directory's name.
        project = osf_instance.create_project(os_path[1][0])
        project_id = project.id

        # Upload resources into OSFStorage for the new project.
        project.storage('osfstorage').create_directory(
            data_to_upload_path, file_duplicate_action, hashes,
            resources_ignored, resources_updated, file_metadata_list,
            process_info_path, action)

    for file_metadata in file_metadata_list:
        # Only send forward the hash we need based on the hash_algorithm provided
        file_metadata['destinationHash'] = file_metadata['destinationHash'][
            hash_algorithm]
        # Prepend the project title to each resource's the metadata destinationPath
        file_metadata['destinationPath'] = '/{}/{}'.format(
            project.title, file_metadata['destinationPath'])

    return {
        'resources_ignored': resources_ignored,
        'resources_updated': resources_updated,
        'action_metadata': action_metadata,
        'file_metadata_list': file_metadata_list,
        'project_id': project_id,
        "project_link": "https://osf.io/{}".format(project_id)
    }
Beispiel #5
0
def osf_fetch_resource(token, resource_id):
    """
    Fetch the OSF resource matching the resource_id given.

    Parameters
    ----------
    token : str
        User's OSF token

    resource_id : str
        ID of the resource requested

    Returns
    -------
    A dictionary object that represents the OSF resource.
    Dictionary must be in the following format:
    {
        "kind": "item",
        "kind_name": "file",
        "id": "12345",
        "title": "23296359282_934200ec59_o.jpg",
        "date_created": "2019-05-13T14:54:17.129170Z",
        "date_modified": "2019-05-13T14:54:17.129170Z",
        "hashes": {
            "md5": "aaca7ef067dcab7cb8d79c36243823e4",
            "sha256": "ea94ce54261720c16abb508c6dcd1fd481c30c09b7f2f5ab0b79e3199b7e2b55"
        },
        "extra": {
            "any": "extra",
            "values": "here"
        }
    }
    """
    try:
        osf_instance = OSF(token)
    except PresQTInvalidTokenError:
        raise PresQTResponseException(
            "Token is invalid. Response returned a 401 status code.",
            status.HTTP_401_UNAUTHORIZED)

    def create_object(resource_object):
        resource_object_obj = {
            'kind': resource_object.kind,
            'kind_name': resource_object.kind_name,
            'id': resource_object.id,
            'title': resource_object.title,
            'date_created': resource_object.date_created,
            'date_modified': resource_object.date_modified,
            'hashes': {
                'md5': resource_object.md5,
                'sha256': resource_object.sha256
            },
            'extra': {}
        }

        if resource_object.kind_name in ['folder', 'file']:
            resource_object_obj['extra'] = {
                'last_touched': resource_object.last_touched,
                'materialized_path': resource_object.materialized_path,
                'current_version': resource_object.current_version,
                'provider': resource_object.provider,
                'path': resource_object.path,
                'current_user_can_comment':
                resource_object.current_user_can_comment,
                'guid': resource_object.guid,
                'checkout': resource_object.checkout,
                'tags': resource_object.tags,
                'size': resource_object.size
            }
        elif resource_object.kind_name == 'project':
            resource_object_obj['extra'] = {
                'category': resource_object.category,
                'fork': resource_object.fork,
                'current_user_is_contributor':
                resource_object.current_user_is_contributor,
                'preprint': resource_object.preprint,
                'current_user_permissions':
                resource_object.current_user_permissions,
                'custom_citation': resource_object.custom_citation,
                'collection': resource_object.collection,
                'public': resource_object.public,
                'subjects': resource_object.subjects,
                'registration': resource_object.registration,
                'current_user_can_comment':
                resource_object.current_user_can_comment,
                'wiki_enabled': resource_object.wiki_enabled,
                'node_license': resource_object.node_license,
                'tags': resource_object.tags,
                'size': resource_object.size
            }
        return resource_object_obj

    # Get the resource
    resource = get_osf_resource(resource_id, osf_instance)

    return create_object(resource)