def run(self, delete_source=False, update_path=True): ip = InformationPackage.objects.get(pk=self.ip) reception = Path.objects.get(entity="ingest_reception").value container_format = ip.get_container_format() src = ip.object_path try: remote = ip.get_profile_data('transfer_project').get( 'preservation_organization_receiver_url') except AttributeError: remote = None session = None if remote: if update_path: raise ValueError( 'Cannot update path when submitting to remote host') dst, remote_user, remote_pass = remote.split(',') dst = urljoin(dst, 'api/ip-reception/upload/') session = requests.Session() session.verify = settings.REQUESTS_VERIFY session.auth = (remote_user, remote_pass) else: dst = os.path.join( reception, ip.object_identifier_value + ".%s" % container_format) block_size = 8 * 1000000 # 8MB copy_file(src, dst, requests_session=session, block_size=block_size) src_xml = os.path.join(os.path.dirname(src), ip.object_identifier_value + ".xml") if not remote: dst_xml = os.path.join(reception, ip.object_identifier_value + ".xml") else: dst_xml = dst copy_file(src_xml, dst_xml, requests_session=session, block_size=block_size) if update_path: ip.object_path = dst ip.package_mets_path = dst_xml ip.save() if delete_source: delete_path(src) delete_path(src_xml) self.set_progress(100, total=100)
def PostPreservationCleanup(self): ip = self.get_information_package() paths = Path.objects.filter(entity__in=[ 'preingest_reception', 'preingest', 'ingest_reception', ]).values_list('value', flat=True) for p in paths: delete_path(os.path.join(p, ip.object_identifier_value)) delete_path(os.path.join(p, ip.object_identifier_value) + '.tar') delete_path(os.path.join(p, ip.object_identifier_value) + '.xml')
def test_delete_non_existing_path(self): path = os.path.join(self.datadir, 'foo.txt') delete_path(path) self.assertFalse(os.path.exists(path))
def test_delete_file(self): path = os.path.join(self.datadir, 'foo.txt') open(path, 'a').close() delete_path(path) self.assertFalse(os.path.exists(path))
def test_delete_directory(self): path = os.path.join(self.datadir, 'foo') os.mkdir(path) delete_path(path) self.assertFalse(os.path.exists(path))
def run(self, purpose=None, delete_sip=False): self.logger.debug('Receiving SIP') aip = InformationPackage.objects.get(pk=self.ip) algorithm = aip.get_checksum_algorithm() container = aip.object_path objid, container_type = os.path.splitext(os.path.basename(container)) container_type = container_type.lower() xml = aip.package_mets_path aip.package_mets_create_date = timestamp_to_datetime( creation_date(xml)).isoformat() aip.package_mets_size = os.path.getsize(xml) aip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] aip.package_mets_digest = calculate_checksum(xml, algorithm=algorithm) aip.generation = 0 aic = InformationPackage.objects.create( package_type=InformationPackage.AIC, responsible=aip.responsible, label=aip.label, start_date=aip.start_date, end_date=aip.end_date) old_sip_path = aip.object_path aip.aic = aic aip_dir = os.path.join(aip.policy.ingest_path.value, objid) aip.object_path = aip_dir try: os.makedirs(aip_dir) except OSError as e: if e.errno != errno.EEXIST: raise aip.save() dst_path, dst_name = find_destination('sip', aip.get_profile('aip').structure, aip.object_path) if dst_path is None: dst_path, dst_name = find_destination( 'content', aip.get_profile('aip').structure, aip.object_path) dst_name, = self.parse_params(dst_name) dst = os.path.join(dst_path, dst_name) sip_profile = aip.submission_agreement.profile_sip try: shutil.rmtree(dst) except FileNotFoundError: pass if aip.policy.receive_extract_sip: temp = Path.objects.cached('entity', 'temp', 'value') with tempfile.TemporaryDirectory(dir=temp) as tmpdir: self.logger.debug('Extracting {} to {}'.format( container, tmpdir)) if container_type == '.tar': with tarfile.open(container) as tar: root_member_name = tar.getnames()[0] tar.extractall(tmpdir) elif container_type == '.zip': with zipfile.ZipFile(container) as zipf: root_member_name = zipf.namelist()[0] zipf.extractall(tmpdir) else: raise ValueError( 'Invalid container type: {}'.format(container)) dst = os.path.join(dst, '') try: os.makedirs(dst) except OSError as e: if e.errno != errno.EEXIST: raise tmpsrc = tmpdir if len(os.listdir(tmpdir)) == 1 and os.listdir( tmpdir)[0] == root_member_name: new_tmpsrc = os.path.join(tmpdir, root_member_name) if os.path.isdir(new_tmpsrc): tmpsrc = new_tmpsrc self.logger.debug('Moving content of {} to {}'.format( tmpsrc, dst)) for f in os.listdir(tmpsrc): shutil.move(os.path.join(tmpsrc, f), dst) self.logger.debug('Deleting {}'.format(tmpdir)) aip.sip_path = os.path.relpath(dst, aip.object_path) else: self.logger.debug('Copying {} to {}'.format(container, dst)) shutil.copy2(container, dst) aip.sip_path = os.path.relpath( os.path.join(dst, os.path.basename(container)), aip.object_path) sip_mets_dir, sip_mets_file = find_destination('mets_file', sip_profile.structure, aip.sip_path) if os.path.isfile(aip.sip_path): sip_mets_data = parse_mets( open_file( os.path.join(aip.object_path, sip_mets_dir, sip_mets_file), container=aip.sip_path, container_prefix=aip.object_identifier_value, )) else: sip_mets_data = parse_mets( open_file( os.path.join(aip.object_path, sip_mets_dir, sip_mets_file))) # prefix all SIP data sip_mets_data = { f'SIP_{k.upper()}': v for k, v in sip_mets_data.items() } aip_profile_rel_data = aip.get_profile_rel('aip').data aip_profile_rel_data.data.update(sip_mets_data) aip_profile_rel_data.save() if delete_sip: delete_path(old_sip_path) delete_path(pathlib.Path(old_sip_path).with_suffix('.xml')) self.logger.debug('sip_path set to {}'.format(aip.sip_path)) aip.save()
def ReceiveSIP(self, purpose=None, delete_sip=False): logger = logging.getLogger('essarch.workflow.tasks.ReceiveSIP') logger.debug('Receiving SIP') ip = self.get_information_package() algorithm = ip.get_checksum_algorithm() container = ip.object_path objid, container_type = os.path.splitext(os.path.basename(container)) container_type = container_type.lower() xml = ip.package_mets_path ip.package_mets_create_date = timestamp_to_datetime( creation_date(xml)).isoformat() ip.package_mets_size = os.path.getsize(xml) ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.package_mets_digest = calculate_checksum(xml, algorithm=algorithm) ip.object_path = os.path.join(ip.policy.ingest_path.value, ip.object_identifier_value) ip.save() sip_dst_path, sip_dst_name = find_destination('sip', ip.get_structure(), ip.object_path) if sip_dst_path is None: sip_dst_path, sip_dst_name = find_destination('content', ip.get_structure(), ip.object_path) sip_dst_name, = self.parse_params(sip_dst_name) sip_dst = os.path.join(sip_dst_path, sip_dst_name) if ip.policy.receive_extract_sip: # remove any existing directory from previous attempts delete_path(sip_dst) temp = Path.objects.get(entity='temp').value with tempfile.TemporaryDirectory(dir=temp) as tmpdir: logger.debug('Extracting {} to {}'.format(container, tmpdir)) if container_type == '.tar': with tarfile.open(container) as tar: root_member_name = tar.getnames()[0] tar.extractall(tmpdir) elif container_type == '.zip': with zipfile.ZipFile(container) as zipf: root_member_name = zipf.namelist()[0] zipf.extractall(tmpdir) else: raise ValueError( 'Invalid container type: {}'.format(container)) sip_dst = os.path.join(sip_dst, '') os.makedirs(sip_dst) tmpsrc = tmpdir if len(os.listdir(tmpdir)) == 1 and os.listdir( tmpdir)[0] == root_member_name: new_tmpsrc = os.path.join(tmpdir, root_member_name) if os.path.isdir(new_tmpsrc): tmpsrc = new_tmpsrc logger.debug('Moving content of {} to {}'.format(tmpsrc, sip_dst)) for f in os.listdir(tmpsrc): shutil.move(os.path.join(tmpsrc, f), sip_dst) logger.debug('Deleting {}'.format(tmpdir)) else: logger.debug('Copying {} to {}'.format(container, sip_dst)) shutil.copy2(container, sip_dst) ip.sip_path = os.path.relpath(sip_dst, ip.object_path) ip.save() self.create_success_event("Received SIP") return sip_dst
def DeleteFiles(self, path): path, = self.parse_params(path) delete_path(path) msg = "Deleted %s" % path self.create_success_event(msg)
def run(self, path): path, = self.parse_params(path) delete_path(path)