Example #1
0
def write_and_validate_metadata(instance, project_id, fts_metadata_data):
    """
    Write FTS metadata to the correct place in the target's project. Also validate the FTS metadata.

    Parameters
    ----------
    instance: BaseResource Class Instance
        Class instance for the action
    project_id: str
        ID of the project the resource metadata should be uploaded to
    fts_metadata_data: dict
        Full FTS metadata to be written.

    Returns
    -------
    Returns the result of schema validation against the final FTS metadata.
    Will be True if valid and an error string if invalid.
    """
    from presqt.api_v1.utilities import FunctionRouter
    # Get the action's metadata upload function
    metadata_func = FunctionRouter.get_function(
        instance.destination_target_name, 'metadata_upload')

    try:
        metadata_func(instance.destination_token, project_id,
                      fts_metadata_data)
    except PresQTError as e:
        # If the upload fails then return that error
        metadata_validation = e
    else:
        # If the upload succeeds then return the metadata's validation string
        metadata_validation = schema_validator(
            'presqt/json_schemas/metadata_schema.json', fts_metadata_data)
    return metadata_validation
Example #2
0
def get_upload_source_metadata(instance, bag):
    """
    Get all FTS metadata files in the bag. If they are valid then get their contents, otherwise
    rename the invalid metadata file.

    Parameters
    ----------
    instance: BaseResource class instance
        Class we want to add the attributes to
    bag: Bag Class instance
        The bag we want to traverse and update.
    """
    instance.source_fts_metadata_actions = []
    for bag_file in bag.payload_files():
        if os.path.split(bag_file)[-1] == 'PRESQT_FTS_METADATA.json':
            metadata_path = os.path.join(instance.resource_main_dir, bag_file)
            source_metadata_content = read_file(metadata_path, True)
            # If the FTS metadata is valid then remove it from the bag and save the actions.
            if schema_validator('presqt/json_schemas/metadata_schema.json',
                                source_metadata_content) is True:
                instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \
                    source_metadata_content['actions']
                os.remove(os.path.join(instance.resource_main_dir, bag_file))
                bag.save(manifests=True)
            # If the FTS metadata is invalid then rename the file in the bag.
            else:
                invalid_metadata_path = os.path.join(
                    os.path.split(metadata_path)[0],
                    'INVALID_PRESQT_FTS_METADATA.json')
                os.rename(metadata_path, invalid_metadata_path)
                bag.save(manifests=True)
Example #3
0
    def test_invalid_json(self):
        """
        Return ValidationError if invalid JSON file and JSONSchema are provided
        """
        invalid_json = [{'name': 3}]
        invalid_path = 'presqt/json_schemas/tests/test_json_file.json'

        # Create the test JSON file
        with open(invalid_path, 'w') as json_file:
            json.dump(invalid_json, json_file)

        schema_validator(self.schema_path, invalid_path)
        # Verify that the schema_validator returns a ValidationError
        self.assertRaises(ValidationError)

        # Delete the test JSON file
        os.remove(invalid_path)
Example #4
0
def create_download_metadata(instance, resource, fixity_obj):
    """
    Add metadata for a given resource to the list of file metadata.
    If the resource is a valid FTS metadata file, grab its contents and don't write metadata for it.

    Parameters
    ----------
    instance: BaseResource Class Instance
        Class instance we save metadata to.
    resource: Dict
        Resource dictionary we want metadata for.
    fixity_obj: Dict
        Dictionary of fixity information for this resource.

    Returns
    -------
    True if the resource is a valid FTS metadata file.
    False if the resource is not a valid FTS metadata file.
    """

    # If this is the PresQT FTS Metadata file, don't write it to disk but get its contents
    if resource['title'] == 'PRESQT_FTS_METADATA.json':
        source_fts_metadata_content = json.loads(resource['file'].decode())
        # If the metadata is valid then grab it's contents and don't save it
        if schema_validator('presqt/json_schemas/metadata_schema.json',
                            source_fts_metadata_content) is True:
            instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \
                                                   source_fts_metadata_content['actions']
            return True
        # If the metadata is invalid rename and write it. We don't want invalid contents.
        else:
            resource['path'] = resource['path'].replace(
                'PRESQT_FTS_METADATA.json', 'INVALID_PRESQT_FTS_METADATA.json')
    metadata = {
        'destinationPath': resource['path'],
        'destinationHashes': {},
        'failedFixityInfo': [],
        'title': resource['title'],
        'sourceHashes': resource['hashes'],
        'sourcePath': resource['source_path'],
        'extra': resource['extra_metadata']
    }
    # Add fixity info to metadata
    if not fixity_obj['fixity']:
        metadata['failedFixityInfo'].append({
            'NewGeneratedHash':
            fixity_obj['presqt_hash'],
            'algorithmUsed':
            fixity_obj['hash_algorithm'],
            'reasonFixityFailed':
            fixity_obj['fixity_details']
        })

    # Append file metadata to fts metadata list
    instance.new_fts_metadata_files.append(metadata)

    return False
Example #5
0
def validate_metadata(instance, resource):
    source_fts_metadata_content = json.loads(resource['file'].decode())
    # If the metadata is valid then grab it's contents and don't save it
    if schema_validator('presqt/json_schemas/metadata_schema.json',
                        source_fts_metadata_content) is True:
        instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \
                                               source_fts_metadata_content['actions']
        instance.all_keywords = instance.all_keywords + \
                                source_fts_metadata_content['allKeywords']
        return True
    # If the metadata is invalid rename and write it. We don't want invalid contents.
    else:
        return False
Example #6
0
    def handle(self, *args, **options):
        """
        Verify that the Target JSON file is valid against our JSON Schema
        """
        keys_to_validate = [
            'resource_collection', 'resource_detail', 'resource_download',
            'resource_upload', 'keywords', 'keywords_upload'
        ]

        validation = schema_validator('presqt/json_schemas/target_schema.json',
                                      'presqt/specs/targets.json')

        failure_string = "Target JSON Schema Validation Failed!\n" \
                         "You've modified the targets.json in such a way that it is incorrectly " \
                         "formatted.\nPlease refer to the project docs."

        # If JSON Schema validation has failed
        if validation is not True:
            print(validation)
            print(failure_string)
            exit(0)
        else:
            # Further validation
            json_data = read_file('presqt/specs/targets.json', True)

            name_list = []
            for data in json_data:
                # Verify that there are no duplicate name values
                if data['name'] in name_list:
                    print(failure_string)
                    exit(1)
                    break
                # Verify that all actions for this target which are 'true' have a corresponding
                # function in FunctionRouter for it.
                for key, value in data['supported_actions'].items():
                    if key in keys_to_validate and value is True:
                        try:
                            getattr(FunctionRouter, f"{data['name']}_{key}")
                        except AttributeError:
                            print(
                                f"{data['name']} does not have a corresponding function in FunctionRouter for "
                                f"the attribute {key}")
                            exit(2)
                else:
                    name_list.append(data['name'])

            # Validation has passed!
            print("Target JSON Schema Validation Passed")
Example #7
0
def get_upload_source_metadata(instance, bag):
    """
    Get all FTS metadata files in the bag. If they are valid then get their contents, otherwise
    rename the invalid metadata file.

    Parameters
    ----------
    instance: BaseResource class instance
        Class we want to add the attributes to
    bag: Bag Class instance
        The bag we want to traverse and update.
    """
    instance.source_fts_metadata_actions = []
    instance.all_keywords = []
    instance.extra_metadata = {}
    for bag_file in bag.payload_files():
        if os.path.split(bag_file)[-1] == 'PRESQT_FTS_METADATA.json':
            metadata_path = os.path.join(instance.resource_main_dir, bag_file)
            try:
                source_metadata_content = read_file(metadata_path, True)
            except JSONDecodeError:
                print('error!!!')
                raise PresQTValidationError(
                    "PRESQT_FTS_METADATA.json is not valid JSON",
                    status.HTTP_400_BAD_REQUEST)
            # If the FTS metadata is valid then remove it from the bag and save the actions.
            if schema_validator('presqt/json_schemas/metadata_schema.json',
                                source_metadata_content) is True:
                instance.source_fts_metadata_actions = instance.source_fts_metadata_actions + \
                                                       source_metadata_content['actions']
                instance.all_keywords = instance.all_keywords + \
                                        source_metadata_content['allKeywords']
                if 'extra_metadata' in source_metadata_content.keys():
                    instance.extra_metadata = source_metadata_content[
                        'extra_metadata']
                os.remove(os.path.join(instance.resource_main_dir, bag_file))
                bag.save(manifests=True)
            # If the FTS metadata is invalid then rename the file in the bag.
            else:
                invalid_metadata_path = os.path.join(
                    os.path.split(metadata_path)[0],
                    'INVALID_PRESQT_FTS_METADATA.json')
                os.rename(metadata_path, invalid_metadata_path)
                bag.save(manifests=True)
Example #8
0
    def _download_resource(self):
        """
        Downloads the resources from the target, performs a fixity check,
        zips them up in BagIt format.
        """
        action = 'resource_download'

        # Write the process id to the process_info file
        self.process_info_obj[
            'function_process_id'] = self.function_process.pid
        update_or_create_process_info(self.process_info_obj, self.action,
                                      self.ticket_number)

        # Fetch the proper function to call
        func = FunctionRouter.get_function(self.source_target_name, action)

        # Fetch the resources. func_dict is in the format:
        #   {
        #       'resources': files,
        #       'empty_containers': empty_containers,
        #       'action_metadata': action_metadata
        #   }
        try:
            func_dict = func(self.source_token, self.source_resource_id,
                             self.process_info_path, self.action)
            # If the resource is being transferred, has only one file, and that file is the
            # PresQT metadata then raise an error.
            if self.action == 'resource_transfer_in' and \
                    len(func_dict['resources']) == 1 \
                    and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json':
                raise PresQTResponseException(
                    'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.',
                    status.HTTP_400_BAD_REQUEST)
        except PresQTResponseException as e:
            # TODO: Functionalize this error section
            # Catch any errors that happen within the target fetch.
            # Update the server process_info file appropriately.
            self.process_info_obj['status_code'] = e.status_code
            self.process_info_obj['status'] = 'failed'
            if self.action == 'resource_transfer_in':
                self.process_info_obj['download_status'] = 'failed'
            self.process_info_obj['message'] = e.data
            # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because
            # it's an incomplete/failed directory.
            self.process_info_obj['expiration'] = str(timezone.now() +
                                                      relativedelta(hours=1))
            update_or_create_process_info(self.process_info_obj, self.action,
                                          self.ticket_number)

            return False

        # Get the latest contents of the job's process_info.json file
        self.process_info_obj = read_file(self.process_info_path,
                                          True)[self.action]

        # The directory all files should be saved in.
        self.resource_main_dir = os.path.join(self.ticket_path,
                                              self.base_directory_name)
        update_process_info_message(
            self.process_info_path, self.action,
            'Performing fixity checks and gathering metadata...')

        self.extra_metadata = func_dict['extra_metadata']
        # For each resource, perform fixity check, gather metadata, and save it to disk.
        fixity_info = []
        self.download_fixity = True
        self.download_failed_fixity = []
        self.source_fts_metadata_actions = []
        self.new_fts_metadata_files = []
        self.all_keywords = []
        self.initial_keywords = []
        self.manual_keywords = []
        self.enhanced_keywords = []
        for resource in func_dict['resources']:
            # Perform the fixity check and add extra info to the returned fixity object.
            # Note: This method of calling the function needs to stay this way for test Mock
            fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker(
                resource)
            fixity_info.append(fixity_obj)

            if not fixity_obj['fixity']:
                self.download_failed_fixity.append(resource['path'])

            # Create metadata for this resource or validate the metadata file
            if resource['title'] == 'PRESQT_FTS_METADATA.json':
                is_valid = validate_metadata(self, resource)
                if not is_valid:
                    resource['path'] = resource['path'].replace(
                        'PRESQT_FTS_METADATA.json',
                        'INVALID_PRESQT_FTS_METADATA.json')
                    create_download_metadata(self, resource, fixity_obj)
                    write_file(
                        '{}{}'.format(self.resource_main_dir,
                                      resource['path']), resource['file'])
            else:
                create_download_metadata(self, resource, fixity_obj)
                write_file(
                    '{}{}'.format(self.resource_main_dir, resource['path']),
                    resource['file'])

        # Enhance the source keywords
        self.keyword_dict = {}
        if self.action == 'resource_transfer_in':
            if self.supports_keywords:
                if self.keyword_action == 'automatic':
                    self.keyword_dict = automatic_keywords(self)
                elif self.keyword_action == 'manual':
                    self.keyword_dict = manual_keywords(self)
        self.keyword_enhancement_successful = True

        # Create PresQT action metadata
        update_process_info_message(self.process_info_path, self.action,
                                    "Creating PRESQT_FTS_METADATA...")
        self.source_username = func_dict['action_metadata']['sourceUsername']
        if self.action == 'resource_transfer_in':
            source_target_data = get_target_data(self.source_target_name)
            destination_target_data = get_target_data(
                self.destination_target_name)
            self.details = "PresQT Transfer from {} to {}".format(
                source_target_data['readable_name'],
                destination_target_data['readable_name'])
        else:
            source_target_data = get_target_data(self.source_target_name)
            self.details = "PresQT Download from {}".format(
                source_target_data['readable_name'])

        self.action_metadata = {
            'id': str(uuid4()),
            'details': self.details,
            'actionDateTime': str(timezone.now()),
            'actionType': self.action,
            'sourceTargetName': self.source_target_name,
            'sourceUsername': self.source_username,
            'destinationTargetName': 'Local Machine',
            'destinationUsername': None,
            'keywords': self.keyword_dict,
            'files': {
                'created': self.new_fts_metadata_files,
                'updated': [],
                'ignored': []
            }
        }

        # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write
        # resources
        # Write empty containers to disk
        for container_path in func_dict['empty_containers']:
            # Make sure the container_path has a '/' and the beginning and end
            if container_path[-1] != '/':
                container_path += '/'
            if container_path[0] != '/':
                container_path = '/' + container_path
            os.makedirs(
                os.path.dirname('{}{}'.format(self.resource_main_dir,
                                              container_path)))

        # If we are transferring the downloaded resource then bag it for the resource_upload method
        if self.action == 'resource_transfer_in':
            self.action_metadata[
                'destinationTargetName'] = self.destination_target_name

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])
            self.process_info_obj['download_status'] = get_action_message(
                self, 'Download', self.download_fixity, True,
                self.action_metadata)
            return True
        # If we are only downloading the resource then create metadata, bag, zip,
        # and update the server process file.
        else:
            # Create Metadata file
            final_fts_metadata_data = create_fts_metadata(
                self.all_keywords, self.action_metadata,
                self.source_fts_metadata_actions, self.extra_metadata)

            # Validate the final metadata
            metadata_validation = schema_validator(
                'presqt/json_schemas/metadata_schema.json',
                final_fts_metadata_data)
            self.process_info_obj['message'] = get_action_message(
                self, 'Download', self.download_fixity, metadata_validation,
                self.action_metadata)

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])

            # Write metadata file.
            write_file(
                os.path.join(self.resource_main_dir,
                             'PRESQT_FTS_METADATA.json'),
                final_fts_metadata_data, True)

            # Add the fixity file to the disk directory
            write_file(
                os.path.join(self.resource_main_dir, 'fixity_info.json'),
                fixity_info, True)

            # Zip the BagIt 'bag' to send forward.
            zip_directory(self.resource_main_dir,
                          "{}.zip".format(self.resource_main_dir),
                          self.ticket_path)

            # Everything was a success so update the server metadata file.
            self.process_info_obj['status_code'] = '200'
            self.process_info_obj['status'] = 'finished'
            self.process_info_obj['zip_name'] = '{}.zip'.format(
                self.base_directory_name)
            self.process_info_obj[
                'failed_fixity'] = self.download_failed_fixity
            update_or_create_process_info(self.process_info_obj, self.action,
                                          self.ticket_number)
            if self.email:
                # Build link to retrieve the download
                download_reverse = reverse('job_status',
                                           kwargs={
                                               "action": "download",
                                               "response_format": "zip"
                                           })
                download_url = self.request.build_absolute_uri(
                    download_reverse)
                final_download_url = "{}?ticket_number={}".format(
                    download_url, self.ticket_number)
                context = {
                    "download_url": final_download_url,
                    "download_message": self.process_info_obj['message'],
                    "failed_fixity": self.process_info_obj['failed_fixity']
                }
                email_blaster(self.email, "PresQT Download Complete", context,
                              "emails/download_email.html")

        return True
Example #9
0
def gitlab_upload_metadata(token, project_id, metadata_dict):
    """
    Upload the metadata of this PresQT action at the top level of the repo.

    Parameters
    ----------
    token : str
        The user's GitLab token
    project_id : str
        The id of the top level project that the upload took place on
    metadata_dict : dict
        The metadata to be written to the repo
    """
    headers, user_id = validation_check(token)

    # Check if metadata exists
    base_post_url = "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA.json?ref=master".format(
        project_id)

    metadata_file_response = requests.get(base_post_url, headers=headers)
    metadata_file_data = metadata_file_response.json()
    request_type = requests.post

    # If a metadata file already exists then grab its contents
    if metadata_file_response.status_code == 200:
        base64_metadata = base64.b64decode(metadata_file_data['content'])
        updated_metadata = json.loads(base64_metadata)
        if schema_validator('presqt/json_schemas/metadata_schema.json', updated_metadata) is not True:
            # We need to change the file name, this metadata is improperly formatted and
            # therefore invalid.
            invalid_base64_metadata = base64.b64encode(base64_metadata)
            data = {"branch": "master",
                    "commit_message": "PresQT Invalid Metadata Upload",
                    "encoding": "base64",
                    "content": invalid_base64_metadata}

            invalid_metadata_response = requests.post(
                'https://gitlab.com/api/v4/projects/{}/repository/files/INVALID_PRESQT_FTS_METADATA%2Ejson'.format(
                    project_id),
                headers=headers,
                data=data)
            if invalid_metadata_response.status_code != 201:
                raise PresQTError(
                    "The request to rename the invalid metadata file has returned a {} error code from Gitlab.".format(
                        invalid_metadata_response.status_code))
            request_type = requests.put
        else:
            # Loop through each 'action' in both metadata files and make a new list of them.
            joined_actions = [entry for entry in itertools.chain(metadata_dict['actions'],
                                                                 updated_metadata['actions'])]
            joined_keywords = [entry for entry in itertools.chain(metadata_dict['allKeywords'],
                                                                  updated_metadata['allKeywords'])]

            updated_metadata['actions'] = joined_actions
            updated_metadata['allKeywords'] = list(set(joined_keywords))

            updated_metadata_bytes = json.dumps(updated_metadata, indent=4).encode('utf-8')
            updated_base64_metadata = base64.b64encode(updated_metadata_bytes)

            data = {"branch": "master",
                    "commit_message": "Updated PresQT Metadata Upload",
                    "encoding": "base64",
                    "content": updated_base64_metadata}

            metadata_response = requests.put(
                "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA%2Ejson".format(
                    project_id),
                headers=headers,
                data=data)

            if metadata_response.status_code != 200:
                raise PresQTError(
                    "The request to update the metadata file has returned a {} error code from Gitlab.".format(
                        metadata_response.status_code))
            return

    metadata_bytes = json.dumps(metadata_dict, indent=4).encode('utf-8')
    base64_metadata = base64.b64encode(metadata_bytes)

    post_url = "https://gitlab.com/api/v4/projects/{}/repository/files/PRESQT_FTS_METADATA%2Ejson".format(
        project_id)

    data = {"branch": "master",
            "commit_message": "PresQT Metadata Upload",
            "encoding": "base64",
            "content": base64_metadata}

    response = request_type(post_url, headers=headers, data=data)

    if response.status_code != 201:
        raise PresQTError(
            "The request to create a metadata file has resulted in a {} error code from GitLab.".format(
                response.status_code))

    # Add extra metadata to the top level resource
    if 'extra_metadata' in metadata_dict.keys() and metadata_dict['extra_metadata']:
        attribute_url =  "https://gitlab.com/api/v4/projects/{}".format(project_id)
        upload_extra_metadata(metadata_dict['extra_metadata'], headers, attribute_url)
Example #10
0
def osf_upload_metadata(token, project_id, metadata_dict):
    """
    Upload the metadata of this PresQT action at the top level of the repo.

    Parameters
    ----------
    token : str
        The user's OSF token
    project_id : str
        The id of the top level project that the upload took place on
    metadata_dict : dict
        The metadata to be written to the project
    """
    osf_instance = OSF(token)
    header = {'Authorization': 'Bearer {}'.format(token)}
    file_name = 'PRESQT_FTS_METADATA.json'
    encoded_metadata = json.dumps(metadata_dict, indent=4).encode('utf-8')
    put_url = "https://files.osf.io/v1/resources/{}/providers/osfstorage/"

    # We need to find out if this project already has metadata associated with it.
    project_data = osf_instance._get_all_paginated_data(
        'https://api.osf.io/v2/nodes/{}/files/osfstorage'.format(project_id))

    for data in project_data:
        if data['attributes']['name'] == file_name:
            old_metadata_file = requests.get(data['links']['move'],
                                             headers=header).content
            # Update the existing metadata
            updated_metadata = json.loads(old_metadata_file)

            if schema_validator('presqt/json_schemas/metadata_schema.json',
                                updated_metadata) is not True:
                # We need to change the file name, this metadata is improperly formatted and
                # therefore invalid.
                rename_payload = {
                    "action": "rename",
                    "rename": "INVALID_PRESQT_FTS_METADATA.json"
                }
                response = requests.post(
                    data['links']['move'],
                    headers=header,
                    data=json.dumps(rename_payload).encode('utf-8'))
                if response.status_code != 201:
                    raise PresQTError(
                        "The request to rename the invalid metadata file has returned a {} error code from OSF."
                        .format(response.status_code))
                break

            # Loop through each 'action' in both metadata files and make a new list of them.
            joined_actions = [
                entry for entry in itertools.chain(metadata_dict['actions'],
                                                   updated_metadata['actions'])
            ]
            joined_keywords = [
                entry
                for entry in itertools.chain(metadata_dict['allKeywords'],
                                             updated_metadata['allKeywords'])
            ]

            updated_metadata['actions'] = joined_actions
            updated_metadata['allKeywords'] = list(set(joined_keywords))

            encoded_metadata = json.dumps(updated_metadata,
                                          indent=4).encode('utf-8')

            # Now we need to update the metadata file with this updated metadata
            response = requests.put(data['links']['upload'],
                                    headers=header,
                                    params={'kind': 'file'},
                                    data=encoded_metadata)

            # When updating an existing metadata file, OSF returns a 200 status
            if response.status_code != 200:
                raise PresQTError(
                    "The request to update the metadata file has returned a {} error code from OSF."
                    .format(response.status_code))
            return

    # If there is no existing metadata file, then create a new one.
    response = requests.put(put_url.format(project_id),
                            headers=header,
                            params={"name": file_name},
                            data=encoded_metadata)

    if response.status_code != 201:
        raise PresQTError(
            "The request to create a metadata file has resulted in a {} error code from OSF"
            .format(response.status_code))

    # Add extra metadata to the top level resource
    if 'extra_metadata' in metadata_dict.keys(
    ) and metadata_dict['extra_metadata']:
        attribute_url = "https://api.osf.io/v2/nodes/{}/".format(project_id)
        upload_extra_metadata(metadata_dict['extra_metadata'], header,
                              attribute_url, project_id)
Example #11
0
 def test_valid_json(self):
     """
     Return True if valid JSON file and JSONSchema are provided
     """
     validation = schema_validator(self.schema_path, self.target_json_path)
     self.assertEqual(validation, True)
Example #12
0
def zenodo_upload_metadata(token, project_id, metadata_dict):
    """
    Upload the metadata of this PresQT action at the top level of the project.

    Parameters
    ----------
    token : str
        The user's Zenodo token
    project_id : str
        The id of the top level project that the upload took place on
    metadata_dict : dict
        The metadata to be written to the repo
    """
    auth_parameter = zenodo_validation_check(token)
    post_url = "https://zenodo.org/api/deposit/depositions/{}/files".format(
        project_id)
    file_name = 'PRESQT_FTS_METADATA.json'

    project_files = requests.get(post_url, params=auth_parameter).json()

    for file in project_files:
        if file['filename'] == file_name:
            # Download the metadata
            old_metadata_file = requests.get(file['links']['download'],
                                             params=auth_parameter).content
            # Load the existing metadata to be updated.
            updated_metadata = json.loads(old_metadata_file)

            if schema_validator('presqt/json_schemas/metadata_schema.json',
                                updated_metadata) is not True:
                # We need to change the file name, this metadata is improperly formatted and
                # therefore invalid. Zenodo is having issues with their put method atm.......
                # Need to delete the old metadata file.
                requests.delete(file['links']['self'], params=auth_parameter)
                response_status = metadata_post_request(
                    'INVALID_PRESQT_FTS_METADATA.json', updated_metadata,
                    auth_parameter, post_url)
                if response_status != 201:
                    raise PresQTError(
                        "The request to rename the invalid metadata file has returned a {} error code from Zenodo."
                        .format(response_status))
                break

            # Need to delete the old metadata file.
            requests.delete(file['links']['self'], params=auth_parameter)

            # Loop through each 'action' in both metadata files and make a new list of them.
            joined_actions = [
                entry for entry in itertools.chain(metadata_dict['actions'],
                                                   updated_metadata['actions'])
            ]
            joined_keywords = [
                entry
                for entry in itertools.chain(metadata_dict['allKeywords'],
                                             updated_metadata['allKeywords'])
            ]
            updated_metadata['actions'] = joined_actions
            updated_metadata['allKeywords'] = list(set(joined_keywords))

            response_status = metadata_post_request(file_name,
                                                    updated_metadata,
                                                    auth_parameter, post_url)
            # When updating an existing metadata file, Zenodo returns a 201 status
            if response_status != 201:
                raise PresQTError(
                    "The request to update the metadata file has returned a {} error code from Zenodo."
                    .format(response_status))
            return

    response_status = metadata_post_request(file_name, metadata_dict,
                                            auth_parameter, post_url)
    if response_status != 201:
        raise PresQTError(
            "The request to create a metadata file has resulted in a {} error code from Zenodo."
            .format(response_status))

    # Add extra metadata to the top level resource
    if 'extra_metadata' in metadata_dict.keys(
    ) and metadata_dict['extra_metadata']:
        attribute_url = "https://zenodo.org/api/deposit/depositions/{}".format(
            project_id)
        upload_extra_metadata(metadata_dict['extra_metadata'], auth_parameter,
                              attribute_url)
Example #13
0
def github_upload_metadata(token, project_id, metadata_dict):
    """
    Upload the metadata of this PresQT action at the top level of the repo.

    Parameters
    ----------
    token : str
        The user's GitHub token
    project_id : str
        The id of the top level project that the upload took place on
    metadata_dict : dict
        The metadata to be written to the repo
    """
    header, username = validation_check(token)
    project_data = requests.get(
        "https://api.github.com/repositories/{}".format(project_id),
        headers=header)

    if project_data.status_code == 200:
        project_name = project_data.json()['name']
    else:
        raise PresQTError(
            "The request to create a metadata file has resulted in a {} error code from GitHub."
            .format(project_data.status_code))

    base_put_url = "https://api.github.com/repos/{}/{}/contents/".format(
        username, project_name)
    metadata_file_data = requests.get(
        '{}PRESQT_FTS_METADATA.json'.format(base_put_url),
        headers=header).json()

    try:
        sha = metadata_file_data['sha']
    except KeyError:
        sha = None

    # If a metadata file already exists then grab its contents
    if sha:
        base64_metadata = base64.b64decode(metadata_file_data['content'])
        updated_metadata = json.loads(base64_metadata)

        if schema_validator('presqt/json_schemas/metadata_schema.json',
                            updated_metadata) is not True:
            # We need to change the file name, this metadata is improperly formatted and
            # therefore invalid.
            invalid_base64_metadata = base64.b64encode(base64_metadata).decode(
                'utf-8')
            rename_payload = {
                "message": "PresQT Invalid Upload",
                "committer": {
                    "name": "PresQT",
                    "email": "N/A"
                },
                "content": invalid_base64_metadata
            }

            response = requests.put('{}{}'.format(
                base_put_url, 'INVALID_PRESQT_FTS_METADATA.json'),
                                    headers=header,
                                    data=json.dumps(rename_payload))
            if response.status_code != 201:
                raise PresQTError(
                    "The request to rename the invalid metadata file has returned a {} error code from Github."
                    .format(response.status_code))
        else:
            # Loop through each 'action' in both metadata files and make a new list of them.
            joined_actions = [
                entry for entry in itertools.chain(metadata_dict['actions'],
                                                   updated_metadata['actions'])
            ]
            joined_keywords = [
                entry
                for entry in itertools.chain(metadata_dict['allKeywords'],
                                             updated_metadata['allKeywords'])
            ]
            updated_metadata['actions'] = joined_actions
            updated_metadata['allKeywords'] = list(set(joined_keywords))

            updated_metadata_bytes = json.dumps(updated_metadata,
                                                indent=4).encode('utf-8')
            updated_base64_metadata = base64.b64encode(
                updated_metadata_bytes).decode('utf-8')

            update_payload = {
                "message": "PresQT Update",
                "committer": {
                    "name": "PresQT",
                    "email": "N/A"
                },
                "branch": "master",
                "content": updated_base64_metadata,
                "sha": sha
            }

            # Now we need to update the metadata file with this updated metadata
            response = requests.put('{}{}'.format(base_put_url,
                                                  'PRESQT_FTS_METADATA.json'),
                                    headers=header,
                                    data=json.dumps(update_payload))
            if response.status_code != 200:
                raise PresQTError(
                    "The request to create a metadata file has resulted in a {} error code from GitHub."
                    .format(response.status_code))
            return

    metadata_bytes = json.dumps(metadata_dict, indent=4).encode('utf-8')
    base64_metadata = base64.b64encode(metadata_bytes).decode('utf-8')

    payload = {
        "message": "PresQT Upload",
        "sha": sha,
        "committer": {
            "name": "PresQT",
            "email": "N/A"
        },
        "content": base64_metadata
    }
    response = requests.put('{}{}'.format(base_put_url,
                                          'PRESQT_FTS_METADATA.json'),
                            headers=header,
                            data=json.dumps(payload))

    if response.status_code != 201 and response.status_code != 200:
        raise PresQTError(
            "The request to create a metadata file has resulted in a {} error code from GitHub."
            .format(response.status_code))

    # Add extra metadata to the top level resource
    if 'extra_metadata' in metadata_dict.keys(
    ) and metadata_dict['extra_metadata']:
        attribute_url = "https://api.github.com/repos/{}/{}".format(
            username, project_name)
        upload_extra_metadata(metadata_dict['extra_metadata'], header,
                              attribute_url)
Example #14
0
    def _download_resource(self):
        """
        Downloads the resources from the target, performs a fixity check,
        zips them up in BagIt format.
        """
        action = 'resource_download'

        # Write the process id to the process_info file
        self.process_info_obj[
            'function_process_id'] = self.function_process.pid
        write_file(self.process_info_path, self.process_info_obj, True)

        # Fetch the proper function to call
        func = FunctionRouter.get_function(self.source_target_name, action)

        # Fetch the resources. func_dict is in the format:
        #   {
        #       'resources': files,
        #       'empty_containers': empty_containers,
        #       'action_metadata': action_metadata
        #   }
        try:
            func_dict = func(self.source_token, self.source_resource_id)
            # If the resource is being transferred, has only one file, and that file is PresQT
            # metadata then raise an error.
            if self.action == 'resource_transfer_in' \
                    and len(func_dict['resources']) == 1 \
                    and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json':
                raise PresQTResponseException(
                    'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.',
                    status.HTTP_400_BAD_REQUEST)
        except PresQTResponseException as e:
            # Catch any errors that happen within the target fetch.
            # Update the server process_info file appropriately.
            self.process_info_obj['status_code'] = e.status_code
            self.process_info_obj['status'] = 'failed'
            if self.action == 'resource_transfer_in':
                self.process_info_obj['download_status'] = 'failed'
            self.process_info_obj['message'] = e.data
            # Update the expiration from 5 days to 1 hour from now. We can delete this faster because
            # it's an incomplete/failed directory.
            self.process_info_obj['expiration'] = str(timezone.now() +
                                                      relativedelta(hours=1))
            write_file(self.process_info_path, self.process_info_obj, True)
            return False

        # The directory all files should be saved in.
        self.resource_main_dir = os.path.join(self.ticket_path,
                                              self.base_directory_name)

        # For each resource, perform fixity check, gather metadata, and save it to disk.
        fixity_info = []
        self.download_fixity = True
        self.source_fts_metadata_actions = []
        self.new_fts_metadata_files = []
        self.download_failed_fixity = []
        for resource in func_dict['resources']:
            # Perform the fixity check and add extra info to the returned fixity object.
            fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker(
                resource)
            fixity_info.append(fixity_obj)

            if not fixity_obj['fixity']:
                self.download_failed_fixity.append(resource['path'])

            # Create metadata for this resource. Return True if a valid FTS metadata file is found.
            if create_download_metadata(self, resource, fixity_obj):
                # Don't write valid FTS metadata file.
                continue

            # Save the file to the disk.
            write_file('{}{}'.format(self.resource_main_dir, resource['path']),
                       resource['file'])

        # Create PresQT action metadata
        self.action_metadata = {
            'id': str(uuid4()),
            'actionDateTime': str(timezone.now()),
            'actionType': self.action,
            'sourceTargetName': self.source_target_name,
            'sourceUsername': func_dict['action_metadata']['sourceUsername'],
            'destinationTargetName': 'Local Machine',
            'destinationUsername': None,
            'files': {
                'created': self.new_fts_metadata_files,
                'updated': [],
                'ignored': []
            }
        }

        # Write empty containers to disk
        for container_path in func_dict['empty_containers']:
            # Make sure the container_path has a '/' and the beginning and end
            if container_path[-1] != '/':
                container_path += '/'
            if container_path[0] != '/':
                container_path = '/' + container_path
            os.makedirs(
                os.path.dirname('{}{}'.format(self.resource_main_dir,
                                              container_path)))

        # If we are transferring the downloaded resource then bag it for the resource_upload method
        if self.action == 'resource_transfer_in':
            self.action_metadata[
                'destinationTargetName'] = self.destination_target_name

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])
            self.process_info_obj['download_status'] = get_action_message(
                'Download', self.download_fixity, True, self.action_metadata)
            return True
        # If we are only downloading the resource then create metadata, bag, zip,
        # and update the server process file.
        else:
            # Create and write metadata file.
            final_fts_metadata_data = create_fts_metadata(
                self.action_metadata, self.source_fts_metadata_actions)
            write_file(
                os.path.join(self.resource_main_dir,
                             'PRESQT_FTS_METADATA.json'),
                final_fts_metadata_data, True)

            # Validate the final metadata
            metadata_validation = schema_validator(
                'presqt/json_schemas/metadata_schema.json',
                final_fts_metadata_data)
            self.process_info_obj['message'] = get_action_message(
                'Download', self.download_fixity, metadata_validation,
                self.action_metadata)

            # Add the fixity file to the disk directory
            write_file(
                os.path.join(self.resource_main_dir, 'fixity_info.json'),
                fixity_info, True)

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])

            # Zip the BagIt 'bag' to send forward.
            zip_directory(self.resource_main_dir,
                          "{}.zip".format(self.resource_main_dir),
                          self.ticket_path)

            # Everything was a success so update the server metadata file.
            self.process_info_obj['status_code'] = '200'
            self.process_info_obj['status'] = 'finished'
            self.process_info_obj['zip_name'] = '{}.zip'.format(
                self.base_directory_name)
            self.process_info_obj[
                'failed_fixity'] = self.download_failed_fixity

            write_file(self.process_info_path, self.process_info_obj, True)
            return True