コード例 #1
0
    def run(self, delete_source=False, update_path=True):
        ip = InformationPackage.objects.get(pk=self.ip)

        reception = Path.objects.get(entity="ingest_reception").value
        container_format = ip.get_container_format()
        src = ip.object_path

        try:
            remote = ip.get_profile_data('transfer_project').get(
                'preservation_organization_receiver_url')
        except AttributeError:
            remote = None

        session = None

        if remote:
            if update_path:
                raise ValueError(
                    'Cannot update path when submitting to remote host')

            dst, remote_user, remote_pass = remote.split(',')
            dst = urljoin(dst, 'api/ip-reception/upload/')

            session = requests.Session()
            session.verify = settings.REQUESTS_VERIFY
            session.auth = (remote_user, remote_pass)
        else:
            dst = os.path.join(
                reception,
                ip.object_identifier_value + ".%s" % container_format)

        block_size = 8 * 1000000  # 8MB
        copy_file(src, dst, requests_session=session, block_size=block_size)

        src_xml = os.path.join(os.path.dirname(src),
                               ip.object_identifier_value + ".xml")
        if not remote:
            dst_xml = os.path.join(reception,
                                   ip.object_identifier_value + ".xml")
        else:
            dst_xml = dst
        copy_file(src_xml,
                  dst_xml,
                  requests_session=session,
                  block_size=block_size)

        if update_path:
            ip.object_path = dst
            ip.package_mets_path = dst_xml
            ip.save()

        if delete_source:
            delete_path(src)
            delete_path(src_xml)

        self.set_progress(100, total=100)
コード例 #2
0
def PostPreservationCleanup(self):
    ip = self.get_information_package()

    paths = Path.objects.filter(entity__in=[
        'preingest_reception',
        'preingest',
        'ingest_reception',
    ]).values_list('value', flat=True)

    for p in paths:
        delete_path(os.path.join(p, ip.object_identifier_value))
        delete_path(os.path.join(p, ip.object_identifier_value) + '.tar')
        delete_path(os.path.join(p, ip.object_identifier_value) + '.xml')
コード例 #3
0
 def test_delete_non_existing_path(self):
     path = os.path.join(self.datadir, 'foo.txt')
     delete_path(path)
     self.assertFalse(os.path.exists(path))
コード例 #4
0
    def test_delete_file(self):
        path = os.path.join(self.datadir, 'foo.txt')
        open(path, 'a').close()

        delete_path(path)
        self.assertFalse(os.path.exists(path))
コード例 #5
0
    def test_delete_directory(self):
        path = os.path.join(self.datadir, 'foo')
        os.mkdir(path)

        delete_path(path)
        self.assertFalse(os.path.exists(path))
コード例 #6
0
ファイル: tasks.py プロジェクト: OskarPersson/ESSArch
    def run(self, purpose=None, delete_sip=False):
        self.logger.debug('Receiving SIP')
        aip = InformationPackage.objects.get(pk=self.ip)
        algorithm = aip.get_checksum_algorithm()
        container = aip.object_path
        objid, container_type = os.path.splitext(os.path.basename(container))
        container_type = container_type.lower()
        xml = aip.package_mets_path
        aip.package_mets_create_date = timestamp_to_datetime(
            creation_date(xml)).isoformat()
        aip.package_mets_size = os.path.getsize(xml)
        aip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[
            algorithm.upper()]
        aip.package_mets_digest = calculate_checksum(xml, algorithm=algorithm)
        aip.generation = 0
        aic = InformationPackage.objects.create(
            package_type=InformationPackage.AIC,
            responsible=aip.responsible,
            label=aip.label,
            start_date=aip.start_date,
            end_date=aip.end_date)
        old_sip_path = aip.object_path
        aip.aic = aic
        aip_dir = os.path.join(aip.policy.ingest_path.value, objid)
        aip.object_path = aip_dir
        try:
            os.makedirs(aip_dir)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

        aip.save()

        dst_path, dst_name = find_destination('sip',
                                              aip.get_profile('aip').structure,
                                              aip.object_path)
        if dst_path is None:
            dst_path, dst_name = find_destination(
                'content',
                aip.get_profile('aip').structure, aip.object_path)

        dst_name, = self.parse_params(dst_name)
        dst = os.path.join(dst_path, dst_name)

        sip_profile = aip.submission_agreement.profile_sip

        try:
            shutil.rmtree(dst)
        except FileNotFoundError:
            pass

        if aip.policy.receive_extract_sip:
            temp = Path.objects.cached('entity', 'temp', 'value')
            with tempfile.TemporaryDirectory(dir=temp) as tmpdir:
                self.logger.debug('Extracting {} to {}'.format(
                    container, tmpdir))
                if container_type == '.tar':
                    with tarfile.open(container) as tar:
                        root_member_name = tar.getnames()[0]
                        tar.extractall(tmpdir)
                elif container_type == '.zip':
                    with zipfile.ZipFile(container) as zipf:
                        root_member_name = zipf.namelist()[0]
                        zipf.extractall(tmpdir)
                else:
                    raise ValueError(
                        'Invalid container type: {}'.format(container))

                dst = os.path.join(dst, '')
                try:
                    os.makedirs(dst)
                except OSError as e:
                    if e.errno != errno.EEXIST:
                        raise

                tmpsrc = tmpdir
                if len(os.listdir(tmpdir)) == 1 and os.listdir(
                        tmpdir)[0] == root_member_name:
                    new_tmpsrc = os.path.join(tmpdir, root_member_name)
                    if os.path.isdir(new_tmpsrc):
                        tmpsrc = new_tmpsrc

                self.logger.debug('Moving content of {} to {}'.format(
                    tmpsrc, dst))

                for f in os.listdir(tmpsrc):
                    shutil.move(os.path.join(tmpsrc, f), dst)

                self.logger.debug('Deleting {}'.format(tmpdir))

            aip.sip_path = os.path.relpath(dst, aip.object_path)
        else:
            self.logger.debug('Copying {} to {}'.format(container, dst))
            shutil.copy2(container, dst)
            aip.sip_path = os.path.relpath(
                os.path.join(dst, os.path.basename(container)),
                aip.object_path)

        sip_mets_dir, sip_mets_file = find_destination('mets_file',
                                                       sip_profile.structure,
                                                       aip.sip_path)
        if os.path.isfile(aip.sip_path):
            sip_mets_data = parse_mets(
                open_file(
                    os.path.join(aip.object_path, sip_mets_dir, sip_mets_file),
                    container=aip.sip_path,
                    container_prefix=aip.object_identifier_value,
                ))
        else:
            sip_mets_data = parse_mets(
                open_file(
                    os.path.join(aip.object_path, sip_mets_dir,
                                 sip_mets_file)))

        # prefix all SIP data
        sip_mets_data = {
            f'SIP_{k.upper()}': v
            for k, v in sip_mets_data.items()
        }

        aip_profile_rel_data = aip.get_profile_rel('aip').data
        aip_profile_rel_data.data.update(sip_mets_data)
        aip_profile_rel_data.save()

        if delete_sip:
            delete_path(old_sip_path)
            delete_path(pathlib.Path(old_sip_path).with_suffix('.xml'))

        self.logger.debug('sip_path set to {}'.format(aip.sip_path))
        aip.save()
コード例 #7
0
def ReceiveSIP(self, purpose=None, delete_sip=False):
    logger = logging.getLogger('essarch.workflow.tasks.ReceiveSIP')
    logger.debug('Receiving SIP')
    ip = self.get_information_package()
    algorithm = ip.get_checksum_algorithm()
    container = ip.object_path
    objid, container_type = os.path.splitext(os.path.basename(container))
    container_type = container_type.lower()
    xml = ip.package_mets_path
    ip.package_mets_create_date = timestamp_to_datetime(
        creation_date(xml)).isoformat()
    ip.package_mets_size = os.path.getsize(xml)
    ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[
        algorithm.upper()]
    ip.package_mets_digest = calculate_checksum(xml, algorithm=algorithm)

    ip.object_path = os.path.join(ip.policy.ingest_path.value,
                                  ip.object_identifier_value)
    ip.save()

    sip_dst_path, sip_dst_name = find_destination('sip', ip.get_structure(),
                                                  ip.object_path)
    if sip_dst_path is None:
        sip_dst_path, sip_dst_name = find_destination('content',
                                                      ip.get_structure(),
                                                      ip.object_path)

    sip_dst_name, = self.parse_params(sip_dst_name)
    sip_dst = os.path.join(sip_dst_path, sip_dst_name)

    if ip.policy.receive_extract_sip:
        # remove any existing directory from previous attempts
        delete_path(sip_dst)

        temp = Path.objects.get(entity='temp').value
        with tempfile.TemporaryDirectory(dir=temp) as tmpdir:
            logger.debug('Extracting {} to {}'.format(container, tmpdir))
            if container_type == '.tar':
                with tarfile.open(container) as tar:
                    root_member_name = tar.getnames()[0]
                    tar.extractall(tmpdir)
            elif container_type == '.zip':
                with zipfile.ZipFile(container) as zipf:
                    root_member_name = zipf.namelist()[0]
                    zipf.extractall(tmpdir)
            else:
                raise ValueError(
                    'Invalid container type: {}'.format(container))

            sip_dst = os.path.join(sip_dst, '')
            os.makedirs(sip_dst)

            tmpsrc = tmpdir
            if len(os.listdir(tmpdir)) == 1 and os.listdir(
                    tmpdir)[0] == root_member_name:
                new_tmpsrc = os.path.join(tmpdir, root_member_name)
                if os.path.isdir(new_tmpsrc):
                    tmpsrc = new_tmpsrc

            logger.debug('Moving content of {} to {}'.format(tmpsrc, sip_dst))

            for f in os.listdir(tmpsrc):
                shutil.move(os.path.join(tmpsrc, f), sip_dst)

            logger.debug('Deleting {}'.format(tmpdir))
    else:
        logger.debug('Copying {} to {}'.format(container, sip_dst))
        shutil.copy2(container, sip_dst)

    ip.sip_path = os.path.relpath(sip_dst, ip.object_path)
    ip.save()
    self.create_success_event("Received SIP")
    return sip_dst
コード例 #8
0
def DeleteFiles(self, path):
    path, = self.parse_params(path)
    delete_path(path)

    msg = "Deleted %s" % path
    self.create_success_event(msg)
コード例 #9
0
ファイル: tasks.py プロジェクト: OskarPersson/ESSArch
 def run(self, path):
     path, = self.parse_params(path)
     delete_path(path)