def CopyFile(self, src, dst, remote_credentials=None, block_size=DEFAULT_BLOCK_SIZE): """ Copies the given file to the given destination Args: src: The file to copy dst: Where the file should be copied to remote_credentials: Credentials for remote server block_size: Size of each block to copy Returns: None """ src, dst = self.parse_params(src, dst) requests_session = None if remote_credentials: user, passw = decrypt_remote_credentials(remote_credentials) requests_session = requests.Session() requests_session.verify = settings.REQUESTS_VERIFY requests_session.auth = (user, passw) copy_file(src, dst, requests_session=requests_session, block_size=block_size) msg = "Copied %s to %s" % (src, dst) self.create_success_event(msg)
def run(self, delete_source=False, update_path=True): ip = InformationPackage.objects.get(pk=self.ip) reception = Path.objects.get(entity="ingest_reception").value container_format = ip.get_container_format() src = ip.object_path try: remote = ip.get_profile_data('transfer_project').get( 'preservation_organization_receiver_url') except AttributeError: remote = None session = None if remote: if update_path: raise ValueError( 'Cannot update path when submitting to remote host') dst, remote_user, remote_pass = remote.split(',') dst = urljoin(dst, 'api/ip-reception/upload/') session = requests.Session() session.verify = settings.REQUESTS_VERIFY session.auth = (remote_user, remote_pass) else: dst = os.path.join( reception, ip.object_identifier_value + ".%s" % container_format) block_size = 8 * 1000000 # 8MB copy_file(src, dst, requests_session=session, block_size=block_size) src_xml = os.path.join(os.path.dirname(src), ip.object_identifier_value + ".xml") if not remote: dst_xml = os.path.join(reception, ip.object_identifier_value + ".xml") else: dst_xml = dst copy_file(src_xml, dst_xml, requests_session=session, block_size=block_size) if update_path: ip.object_path = dst ip.package_mets_path = dst_xml ip.save() if delete_source: delete_path(src) delete_path(src_xml) self.set_progress(100, total=100)
def test_copy_file_locally(self): src = os.path.join(self.datadir, 'foo.txt') with open(src, 'w') as f: f.write('test') dst = os.path.join(self.datadir, 'bar.txt') copy_file(src, dst) self.assertTrue(os.path.isfile(src)) self.assertTrue(os.path.isfile(dst)) self.assertTrue(cmp(src, dst, shallow=False))
def test_copy_file(self, mock_local, mock_remote): src = 'foo' dst = 'bar' copy_file(src, dst) mock_local.assert_called_once_with(src, dst) session = requests.Session() copy_file(src, dst, requests_session=session) mock_remote.assert_called_once_with(src, dst, session, block_size=mock.ANY)
def run(self, src, dst, requests_session=None, block_size=65536): """ Copies the given file to the given destination Args: src: The file to copy dst: Where the file should be copied to requests_session: The request session to be used block_size: Size of each block to copy Returns: None """ copy_file(src, dst, requests_session=requests_session, block_size=block_size)
def test_copy_with_not_enough_space_at_dst(self): src = os.path.join(self.datadir, 'foo.txt') with open(src, 'w') as f: f.write('test') dst = os.path.join(self.datadir, 'bar.txt') mock_size = mock.patch( 'ESSArch_Core.storage.copy.get_tree_size_and_count', return_value=(10, 1)) ntuple_free = namedtuple('usage', 'free') mock_free = mock.patch('ESSArch_Core.storage.copy.shutil.disk_usage', return_value=ntuple_free(free=5)) with mock_size, mock_free: with self.assertRaises(NoSpaceLeftError): copy_file(src, dst)
def run(self): ip = InformationPackage.objects.get(pk=self.ip) srcdir = Path.objects.get(entity="path_preingest_reception").value reception = Path.objects.get(entity="path_ingest_reception").value container_format = ip.get_container_format() src = os.path.join(srcdir, ip.object_identifier_value + ".%s" % container_format) try: remote = ip.get_profile('transfer_project').specification_data.get( 'preservation_organization_receiver_url' ) except AttributeError: remote = None session = None if remote: try: dst, remote_user, remote_pass = remote.split(',') dst = urljoin(dst, 'api/ip-reception/upload/') session = requests.Session() session.verify = False session.auth = (remote_user, remote_pass) except ValueError: remote = None else: dst = os.path.join(reception, ip.object_identifier_value + ".%s" % container_format) block_size = 8 * 1000000 # 8MB copy_file(src, dst, requests_session=session, block_size=block_size) src = os.path.join(srcdir, ip.object_identifier_value + ".xml") if not remote: dst = os.path.join(reception, ip.object_identifier_value + ".xml") copy_file(src, dst, requests_session=session, block_size=block_size) self.set_progress(100, total=100)
def test_copy_file_remotely(self, mock_copy, _mock_req): src = os.path.join(self.datadir, 'foo.txt') with open(src, 'w') as f: f.write('test') dst = 'bar' session = requests.Session() copy_file(src, dst, requests_session=session, block_size=1) mock_copy.assert_has_calls([ mock.call(src, dst, 0, block_size=1, file_size=4, requests_session=session) ] + [ mock.call(src, dst, i, block_size=1, file_size=4, requests_session=session, upload_id='test_upload_id') for i in range(1, 5) ])
def run(self): ip = InformationPackage.objects.get(pk=self.ip) src = ip.object_path srcdir, srcfile = os.path.split(src) dst = Path.objects.get(entity="gate_reception").value try: remote = ip.get_profile_data('transfer_project').get( 'preservation_organization_receiver_url_epp' ) except AttributeError: remote = None session = None if remote: try: dst, remote_user, remote_pass = remote.split(',') session = requests.Session() session.verify = settings.REQUESTS_VERIFY session.auth = (remote_user, remote_pass) except ValueError: remote = None else: dst = os.path.join(dst, srcfile) block_size = 8 * 1000000 # 8MB copy_file(src, dst, requests_session=session, block_size=block_size) self.set_progress(50, total=100) objid = ip.object_identifier_value src = ip.get_events_file_path() if os.path.isfile(src): if not remote: xml_dst = os.path.join(os.path.dirname(dst), "%s_ipevents.xml" % objid) else: xml_dst = dst copy_file(src, xml_dst, requests_session=session, block_size=block_size) self.set_progress(75, total=100) src = os.path.join(srcdir, "%s.xml" % objid) if not remote: xml_dst = os.path.join(dst, "%s.xml" % objid) copy_file(src, xml_dst, requests_session=session, block_size=block_size) self.set_progress(100, total=100) return dst
def TransferIP(self): ip = InformationPackage.objects.get(pk=self.ip) src = ip.object_path srcdir, srcfile = os.path.split(src) remote = ip.get_profile_data('transfer_project').get( 'transfer_destination_url') session = None if remote: dst, remote_user, remote_pass = remote.split(',') session = requests.Session() session.verify = settings.REQUESTS_VERIFY session.auth = (remote_user, remote_pass) if not remote: dst = Path.objects.get(entity="ingest_transfer").value block_size = 8 * 1000000 # 8MB copy_file(src, dst, requests_session=session, block_size=block_size) self.set_progress(50, total=100) objid = ip.object_identifier_value src = ip.get_events_file_path() if os.path.isfile(src): if not remote: xml_dst = os.path.join(os.path.dirname(dst), "%s_ipevents.xml" % objid) else: xml_dst = dst copy_file(src, xml_dst, requests_session=session, block_size=block_size) self.set_progress(75, total=100) src = os.path.join(srcdir, "%s.xml" % objid) if remote: xml_dst = dst else: xml_dst = os.path.join(dst, "%s.xml" % objid) copy_file(src, xml_dst, requests_session=session, block_size=block_size) self.set_progress(100, total=100) self.create_success_event("Transferred IP") return dst
def test_copy_file(self, mock_copy_file_locally): copy_file(self.src, self.dst) mock_copy_file_locally.assert_called_once_with(self.src, self.dst, block_size=mock.ANY)
def StorageMigration(self, storage_method, temp_path): ip = self.get_information_package() container_format = ip.get_container_format() storage_method = StorageMethod.objects.get(pk=storage_method) try: storage_target = storage_method.enabled_target except StorageTarget.DoesNotExist: raise ValueError('No writeable target available for {}'.format(storage_method)) dir_path = os.path.join(temp_path, ip.object_identifier_value) container_path = os.path.join(temp_path, ip.object_identifier_value + '.{}'.format(container_format)) aip_xml_path = os.path.join(temp_path, ip.object_identifier_value + '.xml') aic_xml_path = os.path.join(temp_path, ip.aic.object_identifier_value + '.xml') if storage_target.master_server and not storage_target.remote_server: # we are on remote host src_container = True else: # we are not on master, access from existing storage object storage_object = ip.get_fastest_readable_storage_object() if storage_object.container: storage_object.read(container_path, self.get_processtask()) else: storage_object.read(dir_path, self.get_processtask()) src_container = storage_object.container dst_container = storage_method.containers # If storage_object is "long term" and storage_method is not (or vice versa), # then we have to do some "conversion" before we go any further if src_container and not dst_container: # extract container if container_format == 'tar': with tarfile.open(container_path) as tar: tar.extractall(temp_path) elif container_format == 'zip': with zipfile.ZipFile(container_path) as zipf: zipf.extractall(temp_path) else: raise ValueError('Invalid container format: {}'.format(container_format)) elif not src_container and dst_container: # create container, aip xml and aic xml if container_format == 'tar': with tarfile.open(container_path, 'w') as new_tar: new_tar.format = settings.TARFILE_FORMAT new_tar.add(dir_path) elif container_format == 'zip': zip_directory(dirname=dir_path, zipname=container_path, compress=False) else: raise ValueError('Invalid container format: {}'.format(container_format)) generate_package_mets(ip, container_path, aip_xml_path) generate_aic_mets(ip, aic_xml_path) if dst_container or storage_target.remote_server: src = [ container_path, aip_xml_path, aic_xml_path, ] else: src = [dir_path] if storage_target.remote_server: # we are on master, copy files to remote host, user, passw = storage_target.remote_server.split(',') dst = urljoin(host, reverse('informationpackage-add-file-from-master')) requests_session = requests.Session() requests_session.verify = settings.REQUESTS_VERIFY requests_session.auth = (user, passw) for s in src: copy_file(s, dst, requests_session=requests_session) obj_id = ip.preserve(src, storage_target, dst_container, self.get_processtask()) Notification.objects.create( message="Migrated {} to {}".format(ip.object_identifier_value, storage_method.name), level=logging.INFO, user_id=self.responsible, refresh=True, ) return obj_id
def read(self, dst, task, extract=False): ip = self.ip is_cached_storage_object = self.is_cache_for_ip(ip) storage_medium = self.storage_medium storage_target = storage_medium.storage_target if storage_target.remote_server: host, user, passw = storage_target.remote_server.split(',') session = requests.Session() session.verify = settings.REQUESTS_VERIFY session.auth = (user, passw) # if the remote server already has completed # then we only want to get the result from it, # not run it again. If it has failed then # we want to retry it r = task.get_remote_copy(session, host) if r.status_code == 404: # the task does not exist task.create_remote_copy(session, host) task.run_remote_copy(session, host) else: remote_data = r.json() task.status = remote_data['status'] task.progress = remote_data['progress'] task.result = remote_data['result'] task.traceback = remote_data['traceback'] task.exception = remote_data['exception'] task.save() if task.status in celery_states.EXCEPTION_STATES: task.retry_remote_copy(session, host) while task.status not in celery_states.READY_STATES: r = task.get_remote_copy(session, host) remote_data = r.json() task.status = remote_data['status'] task.progress = remote_data['progress'] task.result = remote_data['result'] task.traceback = remote_data['traceback'] task.exception = remote_data['exception'] task.save() sleep(5) if task.status in celery_states.EXCEPTION_STATES: task.reraise() else: storage_backend = self.get_storage_backend() storage_medium.prepare_for_read() if storage_target.master_server: # we are on a remote host that has been requested # by master to write to its temp directory temp_dir = Path.objects.get(entity='temp').value user, passw, host = storage_target.master_server.split(',') session = requests.Session() session.verify = settings.REQUESTS_VERIFY session.auth = (user, passw) session.params = {'dst': dst} temp_object_path = ip.get_temp_object_path() temp_container_path = ip.get_temp_container_path() temp_mets_path = ip.get_temp_container_xml_path() temp_aic_mets_path = ip.get_temp_container_aic_xml_path() dst = urljoin( host, reverse('informationpackage-add-file-from-master')) storage_backend.read(self, temp_dir, extract=extract) if is_cached_storage_object or not self.container: with tarfile.open(temp_container_path, 'w') as new_tar: new_tar.format = settings.TARFILE_FORMAT new_tar.add(temp_object_path) copy_file(temp_container_path, dst, requests_session=session) else: copy_file(temp_container_path, dst, requests_session=session) copy_file(temp_mets_path, dst, requests_session=session) copy_file(temp_aic_mets_path, dst, requests_session=session) else: storage_backend.read(self, dst, extract=extract)