예제 #1
0
    def test_success_item_multiple_files(self):
        """
        Return a 200 along with a zip file of the item and assosciated files requested.
        """
        resource_id = 'dj52w379504'
        shared_call_get_resource_zip(self, resource_id)

        url = reverse('download_job', kwargs={'ticket_number': self.ticket_number})
        response = self.client.get(url, **self.header)
        # Verify the status code
        self.assertEqual(response.status_code, 200)

        zip_file = zipfile.ZipFile(io.BytesIO(response.content))
        # Verify the name of the zip file
        self.assertEquals(
            response._headers['content-disposition'][1],
            'attachment; filename={}_download_{}.zip'.format(self.target_name, resource_id))
        # Verify content type
        self.assertEqual(response._headers['content-type'][1], 'application/zip')
        # Verify the number of resources in the zip is correct
        self.assertEqual(len(zip_file.namelist()), 14)

        # Verify the custom hash_file information is correct
        with zip_file.open('{}_download_{}/data/fixity_info.json'.format(self.target_name, resource_id)) as fixityfile:
            zip_json = json.load(fixityfile)
            for file_fixity in zip_json:
                self.assertEqual(file_fixity['fixity'], True)
                self.assertEqual(file_fixity['fixity_details'],
                                 'Source Hash and PresQT Calculated hash matched.')
                self.assertEqual(file_fixity['hash_algorithm'], 'md5')
                self.assertEqual(file_fixity['presqt_hash'], file_fixity['source_hash'])

        # Run the files through the fixity checker again to make sure they downloaded correctly
        with zip_file.open('{}_download_{}/data{}'.format(
                self.target_name, resource_id, zip_json[0]['path'])) as myfile:
            temp_file = myfile.read()
            resource_dict = {
                "file": temp_file,
                "hashes": {'md5': zip_json[0]['presqt_hash']},
                "title": 'f',
                "path": '{}_download_{}/data{}'.format(self.target_name, resource_id, zip_json[0]['path']),
                "metadata": {}
            }
            fixity, fixity_match = download_fixity_checker(resource_dict)
            self.assertEqual(fixity['fixity'], True)
        with zip_file.open('{}_download_{}/data{}'.format(
                self.target_name, resource_id, zip_json[1]['path'])) as myfile:
            temp_file = myfile.read()
            resource_dict = {
                "file": temp_file,
                "hashes": {'md5': zip_json[1]['presqt_hash']},
                "title": 'f',
                "path": '{}_download_{}/data{}'.format(self.target_name, resource_id, zip_json[1]['path']),
                "metadata": {}
            }
            fixity, fixity_match = download_fixity_checker(resource_dict)
            self.assertEqual(fixity['fixity'], True)

        # Delete corresponding folder
        shutil.rmtree('mediafiles/downloads/{}'.format(self.ticket_number))
예제 #2
0
    def test_success_200_zip(self):
        """
        Return a 200 along with a zip file of the resource requested.
        """
        shared_call_get_resource_zip(self, self.resource_id)

        url = reverse('job_status', kwargs={'action': 'download', 'response_format': 'zip'})
        response = self.client.get(url, **self.header)
        # Verify the status code
        self.assertEqual(response.status_code, 200)

        zip_file = zipfile.ZipFile(io.BytesIO(response.content))
        # Verify the name of the zip file
        self.assertEquals(
            response._headers['content-disposition'][1],
            'attachment; filename=osf_download_{}.zip'.format(self.resource_id))
        # Verify content type
        self.assertEqual(response._headers['content-type'][1], 'application/zip')
        # Verify the number of resources in the zip is correct
        self.assertEqual(len(zip_file.namelist()), 13)

        # Verify the custom hash_file information is correct
        with zip_file.open('osf_download_{}/fixity_info.json'.format(self.resource_id)) as fixityfile:
            zip_json = json.load(fixityfile)[0]
            self.assertEqual(zip_json['fixity'], True)
            self.assertEqual(zip_json['fixity_details'],
                             'Source Hash and PresQT Calculated hash matched.')
            self.assertIn(zip_json['hash_algorithm'], ['sha256', 'md5'])
            self.assertEqual(zip_json['presqt_hash'], self.hashes[zip_json['hash_algorithm']])

        # Run the file through the fixity checker again to make sure it downloaded correctly
        with zip_file.open('osf_download_{}/data/22776439564_7edbed7e10_o.jpg'.format(self.resource_id)) as myfile:
            temp_file = myfile.read()
            resource_dict = {
                "file": temp_file,
                "hashes": self.hashes,
                "title": '22776439564_7edbed7e10_o.jpg',
                "path": 'osf_download_{}/data/22776439564_7edbed7e10_o.jpg'.format(self.resource_id),
                "metadata": {}
            }
            fixity, fixity_match = download_fixity_checker(resource_dict)
            self.assertEqual(fixity['fixity'], True)

        # Delete corresponding folder
        shutil.rmtree('mediafiles/jobs/{}'.format(self.ticket_number))

        # Ensure no email was sent for this request as no email was provided.
        self.assertEqual(len(mail.outbox), 0)
예제 #3
0
    def _download_resource(self):
        """
        Downloads the resources from the target, performs a fixity check,
        zips them up in BagIt format.
        """
        action = 'resource_download'

        # Write the process id to the process_info file
        self.process_info_obj[
            'function_process_id'] = self.function_process.pid
        update_or_create_process_info(self.process_info_obj, self.action,
                                      self.ticket_number)

        # Fetch the proper function to call
        func = FunctionRouter.get_function(self.source_target_name, action)

        # Fetch the resources. func_dict is in the format:
        #   {
        #       'resources': files,
        #       'empty_containers': empty_containers,
        #       'action_metadata': action_metadata
        #   }
        try:
            func_dict = func(self.source_token, self.source_resource_id,
                             self.process_info_path, self.action)
            # If the resource is being transferred, has only one file, and that file is the
            # PresQT metadata then raise an error.
            if self.action == 'resource_transfer_in' and \
                    len(func_dict['resources']) == 1 \
                    and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json':
                raise PresQTResponseException(
                    'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.',
                    status.HTTP_400_BAD_REQUEST)
        except PresQTResponseException as e:
            # TODO: Functionalize this error section
            # Catch any errors that happen within the target fetch.
            # Update the server process_info file appropriately.
            self.process_info_obj['status_code'] = e.status_code
            self.process_info_obj['status'] = 'failed'
            if self.action == 'resource_transfer_in':
                self.process_info_obj['download_status'] = 'failed'
            self.process_info_obj['message'] = e.data
            # Update the expiration from 5 hours to 1 hour from now. We can delete this faster because
            # it's an incomplete/failed directory.
            self.process_info_obj['expiration'] = str(timezone.now() +
                                                      relativedelta(hours=1))
            update_or_create_process_info(self.process_info_obj, self.action,
                                          self.ticket_number)

            return False

        # Get the latest contents of the job's process_info.json file
        self.process_info_obj = read_file(self.process_info_path,
                                          True)[self.action]

        # The directory all files should be saved in.
        self.resource_main_dir = os.path.join(self.ticket_path,
                                              self.base_directory_name)
        update_process_info_message(
            self.process_info_path, self.action,
            'Performing fixity checks and gathering metadata...')

        self.extra_metadata = func_dict['extra_metadata']
        # For each resource, perform fixity check, gather metadata, and save it to disk.
        fixity_info = []
        self.download_fixity = True
        self.download_failed_fixity = []
        self.source_fts_metadata_actions = []
        self.new_fts_metadata_files = []
        self.all_keywords = []
        self.initial_keywords = []
        self.manual_keywords = []
        self.enhanced_keywords = []
        for resource in func_dict['resources']:
            # Perform the fixity check and add extra info to the returned fixity object.
            # Note: This method of calling the function needs to stay this way for test Mock
            fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker(
                resource)
            fixity_info.append(fixity_obj)

            if not fixity_obj['fixity']:
                self.download_failed_fixity.append(resource['path'])

            # Create metadata for this resource or validate the metadata file
            if resource['title'] == 'PRESQT_FTS_METADATA.json':
                is_valid = validate_metadata(self, resource)
                if not is_valid:
                    resource['path'] = resource['path'].replace(
                        'PRESQT_FTS_METADATA.json',
                        'INVALID_PRESQT_FTS_METADATA.json')
                    create_download_metadata(self, resource, fixity_obj)
                    write_file(
                        '{}{}'.format(self.resource_main_dir,
                                      resource['path']), resource['file'])
            else:
                create_download_metadata(self, resource, fixity_obj)
                write_file(
                    '{}{}'.format(self.resource_main_dir, resource['path']),
                    resource['file'])

        # Enhance the source keywords
        self.keyword_dict = {}
        if self.action == 'resource_transfer_in':
            if self.supports_keywords:
                if self.keyword_action == 'automatic':
                    self.keyword_dict = automatic_keywords(self)
                elif self.keyword_action == 'manual':
                    self.keyword_dict = manual_keywords(self)
        self.keyword_enhancement_successful = True

        # Create PresQT action metadata
        update_process_info_message(self.process_info_path, self.action,
                                    "Creating PRESQT_FTS_METADATA...")
        self.source_username = func_dict['action_metadata']['sourceUsername']
        if self.action == 'resource_transfer_in':
            source_target_data = get_target_data(self.source_target_name)
            destination_target_data = get_target_data(
                self.destination_target_name)
            self.details = "PresQT Transfer from {} to {}".format(
                source_target_data['readable_name'],
                destination_target_data['readable_name'])
        else:
            source_target_data = get_target_data(self.source_target_name)
            self.details = "PresQT Download from {}".format(
                source_target_data['readable_name'])

        self.action_metadata = {
            'id': str(uuid4()),
            'details': self.details,
            'actionDateTime': str(timezone.now()),
            'actionType': self.action,
            'sourceTargetName': self.source_target_name,
            'sourceUsername': self.source_username,
            'destinationTargetName': 'Local Machine',
            'destinationUsername': None,
            'keywords': self.keyword_dict,
            'files': {
                'created': self.new_fts_metadata_files,
                'updated': [],
                'ignored': []
            }
        }

        # TODO: Move this up to make it occur after we loop through func_dict['resources'] and write
        # resources
        # Write empty containers to disk
        for container_path in func_dict['empty_containers']:
            # Make sure the container_path has a '/' and the beginning and end
            if container_path[-1] != '/':
                container_path += '/'
            if container_path[0] != '/':
                container_path = '/' + container_path
            os.makedirs(
                os.path.dirname('{}{}'.format(self.resource_main_dir,
                                              container_path)))

        # If we are transferring the downloaded resource then bag it for the resource_upload method
        if self.action == 'resource_transfer_in':
            self.action_metadata[
                'destinationTargetName'] = self.destination_target_name

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])
            self.process_info_obj['download_status'] = get_action_message(
                self, 'Download', self.download_fixity, True,
                self.action_metadata)
            return True
        # If we are only downloading the resource then create metadata, bag, zip,
        # and update the server process file.
        else:
            # Create Metadata file
            final_fts_metadata_data = create_fts_metadata(
                self.all_keywords, self.action_metadata,
                self.source_fts_metadata_actions, self.extra_metadata)

            # Validate the final metadata
            metadata_validation = schema_validator(
                'presqt/json_schemas/metadata_schema.json',
                final_fts_metadata_data)
            self.process_info_obj['message'] = get_action_message(
                self, 'Download', self.download_fixity, metadata_validation,
                self.action_metadata)

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])

            # Write metadata file.
            write_file(
                os.path.join(self.resource_main_dir,
                             'PRESQT_FTS_METADATA.json'),
                final_fts_metadata_data, True)

            # Add the fixity file to the disk directory
            write_file(
                os.path.join(self.resource_main_dir, 'fixity_info.json'),
                fixity_info, True)

            # Zip the BagIt 'bag' to send forward.
            zip_directory(self.resource_main_dir,
                          "{}.zip".format(self.resource_main_dir),
                          self.ticket_path)

            # Everything was a success so update the server metadata file.
            self.process_info_obj['status_code'] = '200'
            self.process_info_obj['status'] = 'finished'
            self.process_info_obj['zip_name'] = '{}.zip'.format(
                self.base_directory_name)
            self.process_info_obj[
                'failed_fixity'] = self.download_failed_fixity
            update_or_create_process_info(self.process_info_obj, self.action,
                                          self.ticket_number)
            if self.email:
                # Build link to retrieve the download
                download_reverse = reverse('job_status',
                                           kwargs={
                                               "action": "download",
                                               "response_format": "zip"
                                           })
                download_url = self.request.build_absolute_uri(
                    download_reverse)
                final_download_url = "{}?ticket_number={}".format(
                    download_url, self.ticket_number)
                context = {
                    "download_url": final_download_url,
                    "download_message": self.process_info_obj['message'],
                    "failed_fixity": self.process_info_obj['failed_fixity']
                }
                email_blaster(self.email, "PresQT Download Complete", context,
                              "emails/download_email.html")

        return True
예제 #4
0
    def _download_resource(self):
        """
        Downloads the resources from the target, performs a fixity check,
        zips them up in BagIt format.
        """
        action = 'resource_download'

        # Write the process id to the process_info file
        self.process_info_obj[
            'function_process_id'] = self.function_process.pid
        write_file(self.process_info_path, self.process_info_obj, True)

        # Fetch the proper function to call
        func = FunctionRouter.get_function(self.source_target_name, action)

        # Fetch the resources. func_dict is in the format:
        #   {
        #       'resources': files,
        #       'empty_containers': empty_containers,
        #       'action_metadata': action_metadata
        #   }
        try:
            func_dict = func(self.source_token, self.source_resource_id)
            # If the resource is being transferred, has only one file, and that file is PresQT
            # metadata then raise an error.
            if self.action == 'resource_transfer_in' \
                    and len(func_dict['resources']) == 1 \
                    and func_dict['resources'][0]['title'] == 'PRESQT_FTS_METADATA.json':
                raise PresQTResponseException(
                    'PresQT Error: PresQT FTS metadata cannot not be transferred by itself.',
                    status.HTTP_400_BAD_REQUEST)
        except PresQTResponseException as e:
            # Catch any errors that happen within the target fetch.
            # Update the server process_info file appropriately.
            self.process_info_obj['status_code'] = e.status_code
            self.process_info_obj['status'] = 'failed'
            if self.action == 'resource_transfer_in':
                self.process_info_obj['download_status'] = 'failed'
            self.process_info_obj['message'] = e.data
            # Update the expiration from 5 days to 1 hour from now. We can delete this faster because
            # it's an incomplete/failed directory.
            self.process_info_obj['expiration'] = str(timezone.now() +
                                                      relativedelta(hours=1))
            write_file(self.process_info_path, self.process_info_obj, True)
            return False

        # The directory all files should be saved in.
        self.resource_main_dir = os.path.join(self.ticket_path,
                                              self.base_directory_name)

        # For each resource, perform fixity check, gather metadata, and save it to disk.
        fixity_info = []
        self.download_fixity = True
        self.source_fts_metadata_actions = []
        self.new_fts_metadata_files = []
        self.download_failed_fixity = []
        for resource in func_dict['resources']:
            # Perform the fixity check and add extra info to the returned fixity object.
            fixity_obj, self.download_fixity = download_fixity_checker.download_fixity_checker(
                resource)
            fixity_info.append(fixity_obj)

            if not fixity_obj['fixity']:
                self.download_failed_fixity.append(resource['path'])

            # Create metadata for this resource. Return True if a valid FTS metadata file is found.
            if create_download_metadata(self, resource, fixity_obj):
                # Don't write valid FTS metadata file.
                continue

            # Save the file to the disk.
            write_file('{}{}'.format(self.resource_main_dir, resource['path']),
                       resource['file'])

        # Create PresQT action metadata
        self.action_metadata = {
            'id': str(uuid4()),
            'actionDateTime': str(timezone.now()),
            'actionType': self.action,
            'sourceTargetName': self.source_target_name,
            'sourceUsername': func_dict['action_metadata']['sourceUsername'],
            'destinationTargetName': 'Local Machine',
            'destinationUsername': None,
            'files': {
                'created': self.new_fts_metadata_files,
                'updated': [],
                'ignored': []
            }
        }

        # Write empty containers to disk
        for container_path in func_dict['empty_containers']:
            # Make sure the container_path has a '/' and the beginning and end
            if container_path[-1] != '/':
                container_path += '/'
            if container_path[0] != '/':
                container_path = '/' + container_path
            os.makedirs(
                os.path.dirname('{}{}'.format(self.resource_main_dir,
                                              container_path)))

        # If we are transferring the downloaded resource then bag it for the resource_upload method
        if self.action == 'resource_transfer_in':
            self.action_metadata[
                'destinationTargetName'] = self.destination_target_name

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])
            self.process_info_obj['download_status'] = get_action_message(
                'Download', self.download_fixity, True, self.action_metadata)
            return True
        # If we are only downloading the resource then create metadata, bag, zip,
        # and update the server process file.
        else:
            # Create and write metadata file.
            final_fts_metadata_data = create_fts_metadata(
                self.action_metadata, self.source_fts_metadata_actions)
            write_file(
                os.path.join(self.resource_main_dir,
                             'PRESQT_FTS_METADATA.json'),
                final_fts_metadata_data, True)

            # Validate the final metadata
            metadata_validation = schema_validator(
                'presqt/json_schemas/metadata_schema.json',
                final_fts_metadata_data)
            self.process_info_obj['message'] = get_action_message(
                'Download', self.download_fixity, metadata_validation,
                self.action_metadata)

            # Add the fixity file to the disk directory
            write_file(
                os.path.join(self.resource_main_dir, 'fixity_info.json'),
                fixity_info, True)

            # Make a BagIt 'bag' of the resources.
            bagit.make_bag(self.resource_main_dir,
                           checksums=['md5', 'sha1', 'sha256', 'sha512'])

            # Zip the BagIt 'bag' to send forward.
            zip_directory(self.resource_main_dir,
                          "{}.zip".format(self.resource_main_dir),
                          self.ticket_path)

            # Everything was a success so update the server metadata file.
            self.process_info_obj['status_code'] = '200'
            self.process_info_obj['status'] = 'finished'
            self.process_info_obj['zip_name'] = '{}.zip'.format(
                self.base_directory_name)
            self.process_info_obj[
                'failed_fixity'] = self.download_failed_fixity

            write_file(self.process_info_path, self.process_info_obj, True)
            return True