Exemple #1
0
    def in_valid_paths(self, root, path, valid_paths):
        for valid_path in [p for p in valid_paths if isinstance(p, str)]:
            if path in list(map(normalize_path, glob(valid_path))):
                return True

        for valid_path in [p for p in valid_paths if not isinstance(p, str)]:
            for nested_valid_path in valid_path:
                for found_nested_path, matches in iglob(nested_valid_path,
                                                        with_matches=True):
                    found_nested_path = normalize_path(found_nested_path)
                    if found_nested_path == path:
                        # check matches
                        matches = map(normalize_path, matches)
                        for match in matches:
                            for related_path in valid_path:
                                if related_path != found_nested_path:
                                    related_path = related_path.replace(
                                        '*', match, 1)

                                    if not os.path.isfile(related_path):
                                        rel_path = normalize_path(
                                            os.path.relpath(path, root))
                                        rel_related_path = normalize_path(
                                            os.path.relpath(
                                                related_path, root))
                                        raise ValidationError(
                                            '{file} missing related file {related}'
                                            .format(file=rel_path,
                                                    related=rel_related_path))

                        return True

        raise ValidationError('{file} is not allowed'.format(file=path))
Exemple #2
0
    def validate_folder(self, path, node):
        valid_paths = node.get('valid_paths', [])
        allow_empty = node.get('allow_empty', True)
        required_files = list(
            map(normalize_path, [
                req.format(**self.data)
                for req in node.get('required_files', [])
            ]))
        file_count = 0

        for idx, valid in enumerate(valid_paths):
            if isinstance(valid, str):
                valid_paths[idx] = normalize_path(
                    os.path.join(path, valid).format(**self.data))
            else:
                for nested_idx, nested_valid in enumerate(valid):
                    valid[nested_idx] = normalize_path(
                        os.path.join(path, nested_valid).format(**self.data))

        for root, dirs, files in walk(path):
            for f in files:
                file_count += 1
                if len(valid_paths):
                    try:
                        self.in_valid_paths(
                            path, normalize_path(os.path.join(root, f)),
                            valid_paths)
                    except ValidationError as validation_exc:
                        try:
                            self.update_required_files(
                                os.path.relpath(root, path), f, required_files)
                        except ValueError:
                            raise validation_exc

                if len(required_files):
                    try:
                        self.update_required_files(os.path.relpath(root, path),
                                                   f, required_files)
                    except ValueError:
                        pass

        if not allow_empty and file_count == 0:
            raise ValidationError(
                '{path} is not allowed to be empty'.format(path=path))

        if len(required_files):
            raise ValidationError('Missing {files} in {path}'.format(
                files=','.join(required_files), path=path))
Exemple #3
0
def generate_package_mets(ip):
    sa = ip.submission_agreement
    if ip.package_type == InformationPackage.SIP:
        profile_type = 'submit_description'
    elif ip.package_type == InformationPackage.AIP:
        profile_type = 'aip_description'
    else:
        raise ValueError(
            'Cannot create package mets for IP of type {package_type}'.format(
                package_type=ip.package_type
            )
        )
    profile_rel = ip.get_profile_rel(profile_type)
    profile_data = ip.get_profile_data(profile_type)
    xmlpath = os.path.splitext(ip.object_path)[0] + '.xml'
    data = fill_specification_data(profile_data, ip=ip, sa=sa)
    data["_IP_CREATEDATE"] = timestamp_to_datetime(creation_date(ip.object_path)).isoformat()
    files_to_create = {
        xmlpath: {
            'spec': profile_rel.profile.specification,
            'data': data
        }
    }
    algorithm = ip.get_checksum_algorithm()

    generator = XMLGenerator()
    generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)

    ip.package_mets_path = normalize_path(xmlpath)
    ip.package_mets_create_date = timestamp_to_datetime(creation_date(xmlpath)).isoformat()
    ip.package_mets_size = os.path.getsize(xmlpath)
    ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[algorithm.upper()]
    ip.package_mets_digest = calculate_checksum(xmlpath, algorithm=algorithm)
    ip.save()
Exemple #4
0
    def run(self):
        ip = self.get_information_package()
        container_format = ip.get_container_format().lower()
        tpp = ip.get_profile_rel('transfer_project').profile
        compress = tpp.specification_data.get('container_format_compression',
                                              False)

        src = ip.object_path
        dst_dir = Path.objects.cached('entity', 'path_preingest_reception',
                                      'value')
        dst_filename = ip.object_identifier_value + '.' + container_format
        dst = normalize_path(os.path.join(dst_dir, dst_filename))

        if container_format == 'zip':
            self.event_type = 50410
            zip_directory(dirname=src, zipname=dst, compress=compress)
        else:
            self.event_type = 50400
            compression = ':gz' if compress else ''
            base_dir = os.path.basename(os.path.normpath(ip.object_path))
            with tarfile.open(dst, 'w%s' % compression) as new_tar:
                new_tar.add(src, base_dir)

        ip.object_path = dst
        ip.save()
        shutil.rmtree(src)
        return dst
Exemple #5
0
    def test_create_tar(self):
        root = tempfile.mkdtemp(dir=self.datadir)
        foo = os.path.join(root, 'foo')
        os.makedirs(foo)
        bar = os.path.join(root, 'bar')
        os.makedirs(bar)
        open(os.path.join(foo, '1.txt'), 'a').close()

        dst = os.path.join(self.datadir, 'container.tar')

        self.create_profile({'container_format': 'tar'}, self.ip)

        task = ProcessTask.objects.create(
            name='ESSArch_Core.ip.tasks.CreateContainer',
            information_package=self.ip,
            responsible=self.user,
            args=[root, dst])
        task.run().get()

        root_base = os.path.basename(root)
        expected = [
            root_base,
            os.path.join(root_base, 'foo'),
            os.path.join(root_base, 'foo/1.txt'),
            os.path.join(root_base, 'bar'),
        ]
        expected = [normalize_path(x) for x in expected]
        with tarfile.open(dst) as tar:
            self.assertCountEqual(expected, tar.getnames())
Exemple #6
0
    def setUp(self):
        self.datadir = normalize_path(tempfile.mkdtemp())
        self.subdir = os.path.join(self.datadir, "subdir")
        self.addCleanup(shutil.rmtree, self.datadir)

        try:
            os.makedirs(self.subdir)
        except OSError as e:
            if e.errno != 17:
                raise

        self.medium_location_param = Parameter.objects.create(
            entity=str(uuid.uuid4()), value="dummy_medium_location")
        self.agent_id_param = Parameter.objects.create(entity=str(
            uuid.uuid4()),
                                                       value="dummy_agent_id")

        self.ip = InformationPackage.objects.create()
        self.storage_target = StorageTarget.objects.create(
            type=DISK,
            name=f'dummy_st_name_{uuid.uuid4()}',
            target="my_storage_target",
        )
        self.storage_medium = StorageMedium.objects.create(
            medium_id=f"some_name_{uuid.uuid4()}",
            storage_target=self.storage_target,
            status=20,
            location_status=50,
            location=self.medium_location_param.value,
            block_size=self.storage_target.default_block_size,
            format=self.storage_target.default_format,
            agent=self.agent_id_param.value,
        )
Exemple #7
0
def index_document(tag_version, filepath):
    with open(filepath, 'rb') as f:
        content = f.read()

    ip = tag_version.tag.information_package
    encoded_content = base64.b64encode(content).decode("ascii")
    extension = os.path.splitext(tag_version.name)[1][1:]
    dirname = os.path.dirname(filepath)
    href = normalize_path(os.path.relpath(dirname, ip.object_path))
    href = '' if href == '.' else href
    size, _ = get_tree_size_and_count(filepath)
    modified = timestamp_to_datetime(os.stat(filepath).st_mtime)

    tag_version.custom_fields = {
        'extension': extension,
        'dirname': dirname,
        'href': href,
        'filename': tag_version.name,
        'size': size,
        'modified': modified,
    }

    doc = File.from_obj(tag_version)
    doc.data = encoded_content

    try:
        doc.save(pipeline='ingest_attachment')
    except ElasticsearchException:
        logger.exception('Failed to index {}'.format(filepath))
        raise
    return doc, tag_version
Exemple #8
0
    def path(self, path):
        if path is None:
            self.paths = self.props.get('path', [''])

            if isinstance(self.paths, str):
                self.paths = [self.paths]

            for path in self.paths:
                path = get_value_from_path(self.el, path)

                if path is not None:
                    break

            self.path_prefix = self.props.get('pathprefix', [])
            for prefix in sorted(self.path_prefix, key=len, reverse=True):
                no_prefix = remove_prefix(path, prefix)

                if no_prefix != path:
                    path = no_prefix
                    break

            if self.props.get('path_includes_root', False):
                path = path.split('/', 1)[-1]

            path = path.lstrip('/ ')

        self._path = normalize_path(path)
Exemple #9
0
def index_document(ip, filepath, id):
    with open(filepath, 'rb') as f:
        content = f.read()

    encoded_content = base64.b64encode(content).decode("ascii")
    filename = os.path.basename(filepath)
    extension = os.path.splitext(filename)[1][1:]
    dirname = os.path.dirname(filepath)
    href = normalize_path(os.path.relpath(dirname, ip.object_path))
    href = '' if href == '.' else href
    size, _ = get_tree_size_and_count(filepath)
    modified = timestamp_to_datetime(os.stat(filepath).st_mtime)

    doc = File(_id=id,
               name=filename,
               type="document",
               filename=filename,
               extension=extension,
               href=href,
               ip=str(ip.pk),
               data=encoded_content,
               size=size,
               modified=modified,
               current_version=True)
    doc.save(pipeline='ingest_attachment')
    return doc
Exemple #10
0
    def run(self, src, dst):
        src, dst = self.parse_params(src, dst)

        ip = self.get_information_package()
        container_format = ip.get_container_format().lower()
        tpp = ip.get_profile_rel('transfer_project').profile
        compress = tpp.specification_data.get('container_format_compression',
                                              False)

        dst = normalize_path(dst)

        if os.path.isdir(dst):
            dst_filename = ip.object_identifier_value + '.' + ip.get_container_format(
            ).lower()
            dst = os.path.join(dst, dst_filename)

        if container_format == 'zip':
            self.event_type = 50410
            zip_directory(dirname=src, zipname=dst, compress=compress)
        else:
            self.event_type = 50400
            compression = ':gz' if compress else ''
            base_dir = os.path.basename(os.path.normpath(src))
            with tarfile.open(dst, 'w%s' % compression) as new_tar:
                new_tar.add(src, base_dir)

        return dst
Exemple #11
0
    def open_file(self, path='', *args, **kwargs):
        if self.archived:
            storage_obj = self.storage.readable().fastest().first()
            if storage_obj is None:
                raise ValueError("No readable storage configured for IP")
            return storage_obj.open(path, *args, **kwargs)
        if os.path.isfile(self.object_path) and path:
            if path == self.object_path:
                return open(path, *args, **kwargs)

            xmlfile = self.package_mets_path
            if not xmlfile:
                xmlfile = os.path.join(
                    os.path.dirname(self.object_path),
                    u'{}.xml'.format(self.object_identifier_value))
            if os.path.join(os.path.dirname(self.object_path),
                            path) == xmlfile:
                return open(xmlfile, *args)

            try:
                with tarfile.open(self.object_path) as tar:
                    try:
                        f = tar.extractfile(path)
                    except KeyError:
                        full_path = normalize_path(
                            os.path.join(self.object_identifier_value, path))
                        f = tar.extractfile(full_path)
                    return six.BytesIO(f.read())
            except tarfile.ReadError:
                logger.debug('Invalid tar file, trying zipfile instead')
                try:
                    with zipfile.ZipFile(self.object_path) as zipf:
                        try:
                            f = zipf.open(path)
                        except KeyError:
                            full_path = normalize_path(
                                os.path.join(self.object_identifier_value,
                                             path))
                            f = zipf.open(full_path)
                        return six.BytesIO(f.read())
                except zipfile.BadZipfile:
                    logger.debug('Invalid zip file')
            except KeyError:
                raise OSError(errno.ENOENT, os.strerror(errno.ENOENT),
                              os.path.join(self.object_path, path))

        return open(os.path.join(self.object_path, path), *args, **kwargs)
Exemple #12
0
    def setUp(self):
        self.file = tempfile.NamedTemporaryFile(delete=False)
        self.file.close()
        self.file = normalize_path(self.file.name)
        self.ip = InformationPackage.objects.create(object_path=self.file)
        self.request = APIRequestFactory()

        self.addCleanup(os.remove, self.file)
Exemple #13
0
    def delete_file(self, old_ip, filepath, relpath, new_ip):
        filepath, relpath = normalize_path(filepath), normalize_path(relpath)
        entry = AppraisalJobEntry.objects.create(job=self,
                                                 start_date=timezone.now(),
                                                 ip=old_ip,
                                                 document=relpath)
        os.remove(filepath)
        entry.end_date = timezone.now()
        entry.save()

        EventIP.objects.create(
            eventType=self.delete_event_type,
            eventOutcome=EventIP.SUCCESS,
            eventOutcomeDetailNote='Deleted {}'.format(relpath),
            linkingObjectIdentifierValue=new_ip.object_identifier_value,
        )
        return entry
Exemple #14
0
    def _run_python(self,
                    filepath,
                    rootdir,
                    options,
                    t=None,
                    ip=None,
                    context=None):
        from ESSArch_Core.util import normalize_path

        old_cwd = os.getcwd()
        try:
            os.chdir(rootdir)
            filepath = normalize_path(filepath)
            cmd = eval(self.prepare_cmd(filepath, options))
            try:
                [module, task] = self.path.rsplit('.', 1)
                p = getattr(importlib.import_module(module),
                            task)(task=t, ip=ip, context=context)
                if self.type == ExternalTool.Type.CONVERSION_TOOL and isinstance(
                        cmd, dict):
                    p.convert(**cmd)
                elif self.type == ExternalTool.Type.CONVERSION_TOOL and isinstance(
                        cmd, tuple):
                    p.convert(*cmd)
                elif self.type == ExternalTool.Type.COLLECTION_TOOL and isinstance(
                        cmd, dict):
                    p.collect(**cmd)
                elif self.type == ExternalTool.Type.COLLECTION_TOOL and isinstance(
                        cmd, tuple):
                    p.collect(*cmd)
                elif self.type == ExternalTool.Type.TRANSFORMATION_TOOL and isinstance(
                        cmd, dict):
                    p.transform(**cmd)
                elif self.type == ExternalTool.Type.TRANSFORMATION_TOOL and isinstance(
                        cmd, tuple):
                    p.transform(*cmd)
                elif self.type == ExternalTool.Type.VALIDATION_TOOL and isinstance(
                        cmd, dict):
                    p.validate(**cmd)
                elif self.type == ExternalTool.Type.VALIDATION_TOOL and isinstance(
                        cmd, tuple):
                    p.validate(*cmd)
                else:
                    raise ValueError(cmd)
            except Exception as err:
                message = 'Module "{module}" command "{cmd}" exited with error message "{err}"'.format(
                    module=self.path, cmd=cmd, err=err)
                if self.type == ExternalTool.Type.CONVERSION_TOOL:
                    raise ConversionError(message)
                elif self.type == ExternalTool.Type.COLLECTION_TOOL:
                    raise CollectionError(message)
                elif self.type == ExternalTool.Type.TRANSFORMATION_TOOL:
                    raise TransformationError(message)
                elif self.type == ExternalTool.Type.VALIDATION_TOOL:
                    raise ValidationError(message)
        finally:
            os.chdir(old_cwd)
Exemple #15
0
def index_directory(ip, dirpath, id):
    dirname = os.path.basename(dirpath)
    parent_dir = os.path.dirname(dirpath)
    href = normalize_path(os.path.relpath(parent_dir, ip.object_path))
    href = '' if href == '.' else href

    doc = Directory(_id=id, name=dirname, href=href, ip=str(ip.pk), current_version=True)
    doc.save()
    return doc
Exemple #16
0
    def _validate(self, filepath):
        relpath = normalize_path(
            os.path.relpath(self._get_filepath(filepath), self.rootdir))

        newhash = self._get_checksum(filepath, relpath=relpath)
        newsize = self._get_size(filepath)

        if newhash not in self.present:
            self.present[newhash] = [relpath]
        elif relpath not in self.present[newhash]:
            self.present[newhash].append(relpath)

        if relpath not in self.checksums:
            return

        oldhash = self.checksums[relpath]
        oldsize = self.sizes[relpath]

        if (oldhash is None and self.checksum_algorithms[relpath] is None
            ) or oldhash == newhash:
            if (oldsize is None or newsize is None
                    or (oldsize is not None and newsize is not None
                        and oldsize == newsize)):
                if oldhash is None:
                    self._pop_checksum_dict(self.deleted, None, relpath)
                else:
                    self._pop_checksum_dict(self.deleted, oldhash, relpath)
            if oldhash is None:
                self._pop_checksum_dict(self.present, None, relpath)
            else:
                self._pop_checksum_dict(self.present, oldhash, relpath)
            if oldhash != newhash:
                self._pop_checksum_dict(self.present, newhash, relpath)
        elif (oldhash is None
              and self.checksum_algorithms[relpath]) or oldhash != newhash:
            self._pop_checksum_dict(self.deleted, oldhash, relpath)
            self.changed += 1
            msg = '{f} checksum has been changed: {old} != {new}'.format(
                f=relpath, old=oldhash, new=newhash)
            logger.error(msg)
            self._pop_checksum_dict(self.present, oldhash, relpath)
            self._pop_checksum_dict(self.present, newhash, relpath)
            return self._create_obj(relpath, False, msg)

        if oldsize is not None and newsize is not None and oldsize != newsize:
            self._pop_checksum_dict(self.deleted, oldhash, relpath)
            self.changed += 1
            msg = '{f} size has been changed: {old} != {new}'.format(
                f=relpath, old=oldsize, new=newsize)
            logger.error(msg)
            return self._create_obj(relpath, False, msg)

        self.confirmed += 1
        msg = '{f} confirmed in xml'.format(f=relpath)
        logger.debug(msg)
        return self._create_obj(relpath, True, msg)
Exemple #17
0
    def get_content_mets_file_path(self):
        mets_dir, mets_name = find_destination("mets_file",
                                               self.get_structure())
        if mets_dir is not None:
            path = os.path.join(mets_dir, mets_name)
            path = parseContent(path, fill_specification_data(ip=self))
        else:
            path = 'mets.xml'

        return normalize_path(os.path.join(self.object_path, path))
Exemple #18
0
    def get_premis_file_path(self):
        premis_dir, premis_name = find_destination(
            "preservation_description_file", self.get_structure())
        if premis_dir is not None:
            path = os.path.join(premis_dir, premis_name)
            path = parseContent(path, fill_specification_data(ip=self))
        else:
            path = 'metadata/premis.xml'

        return normalize_path(os.path.join(self.object_path, path))
Exemple #19
0
    def _validate(self, filepath):
        relpath = normalize_path(
            os.path.relpath(self._get_filepath(filepath), self.rootdir))

        newhash = self._get_checksum(filepath, relpath=relpath)
        newsize = self._get_size(filepath)

        try:
            self._pop_checksum_dict(self.deleted, newhash, relpath)
        except (KeyError, ValueError):
            pass

        if newhash in self.present:
            try:
                self._pop_checksum_dict(self.present, newhash, relpath)
            except ValueError:
                self.present[newhash].append(relpath)
                return
        else:
            self.present[newhash] = [relpath]

        if relpath not in self.checksums:
            return

        oldhash = self.checksums[relpath]

        if oldhash is None:
            self._pop_checksum_dict(self.deleted, oldhash, relpath)
            self._pop_checksum_dict(self.present, oldhash, relpath)
            self._pop_checksum_dict(self.present, newhash, relpath)
        elif oldhash != newhash:
            self.deleted.pop(oldhash, None)
            self.changed += 1
            msg = '{f} checksum has been changed: {old} != {new}'.format(
                f=relpath, old=oldhash, new=newhash)
            logger.error(msg)
            self._pop_checksum_dict(self.present, oldhash, relpath)
            self._pop_checksum_dict(self.present, newhash, relpath)
            return self._create_obj(relpath, False, msg)

        oldsize = self.sizes[relpath]
        if oldsize is not None and newsize is not None and oldsize != newsize:
            self.deleted.pop(oldhash, None)
            self.changed += 1
            msg = '{f} size has been changed: {old} != {new}'.format(
                f=relpath, old=oldsize, new=newsize)
            logger.error(msg)
            return self._create_obj(relpath, False, msg)

        self.confirmed += 1
        msg = '{f} confirmed in xml'.format(f=relpath)
        logger.debug(msg)
        return self._create_obj(relpath, True, msg)
Exemple #20
0
    def test_list_files_path_to_file_in_zip(self, generate_file_response):
        file_path = self.create_archive_file('zip')
        sub_path_file = '0.txt'
        new_folder = os.path.join(file_path, sub_path_file)

        new_folder = normalize_path(new_folder)

        list_files(new_folder)

        generate_file_response.assert_called_once_with(mock.ANY,
                                                       'text/plain',
                                                       False,
                                                       name=sub_path_file)
Exemple #21
0
    def test_list_files_path_to_file_in_tar(self, generate_file_response):
        file_path = self.create_archive_file('tar')
        sub_path_file = './0.txt'  # TODO: bug in shutil for tar is adding an extra './'
        new_folder = os.path.join(file_path, sub_path_file)

        new_folder = normalize_path(new_folder)

        list_files(new_folder)

        generate_file_response.assert_called_once_with(mock.ANY,
                                                       'text/plain',
                                                       False,
                                                       name=sub_path_file)
Exemple #22
0
def index_directory(tag_version, dirpath):
    ip = tag_version.tag.information_package
    parent_dir = os.path.dirname(dirpath)
    href = normalize_path(os.path.relpath(parent_dir, ip.object_path))
    href = '' if href == '.' else href

    tag_version.custom_fields = {
        'href': href,
    }

    doc = Directory.from_obj(tag_version)
    doc.save()
    return doc, tag_version
Exemple #23
0
    def get_events_file_path(self, from_container=False):
        if not from_container and os.path.isfile(self.object_path):
            return os.path.splitext(self.object_path)[0] + '_ipevents.xml'

        ip_profile = self.get_profile(self.get_package_type_display().lower())
        structure = ip_profile.structure

        events_dir, events_file = find_destination('events_file', structure)
        if events_dir is not None:
            full_path = os.path.join(events_dir, events_file)
            return normalize_path(
                parseContent(full_path, fill_specification_data(ip=self)))

        return 'ipevents.xml'
Exemple #24
0
    def list_files(self, storage_object, pattern, case_sensitive=True):
        if storage_object.container:
            raise NotImplementedError

        datadir = storage_object.get_full_path()

        if pattern is None:
            for root, _dirs, files in walk(datadir):
                rel = os.path.relpath(root, datadir)
                for f in files:
                    yield normalize_path(os.path.join(rel, f))
        else:
            for path in iglob(datadir + '/' + pattern,
                              case_sensitive=case_sensitive):
                if os.path.isdir(path):
                    for root, _dirs, files in walk(path):
                        rel = os.path.relpath(root, datadir)

                        for f in files:
                            yield normalize_path(os.path.join(rel, f))

                else:
                    yield normalize_path(os.path.relpath(path, datadir))
Exemple #25
0
    def get_path_response(self,
                          path,
                          request,
                          force_download=False,
                          paginator=None):
        self.validate_path(path)
        try:
            if not path:
                raise OSError(errno.EISDIR, os.strerror(errno.EISDIR), path)

            if os.path.isfile(self.object_path):
                container_path = os.path.join(
                    os.path.dirname(self.object_path),
                    path.split('/', 1)[0])
                container_path = normalize_path(container_path)
                if container_path == self.object_path:
                    path = path.split('/', 1)[1]

            fid = FormatIdentifier(allow_unknown_file_types=True)
            content_type = fid.get_mimetype(path)
            return generate_file_response(self.open_file(path, 'rb'),
                                          content_type,
                                          force_download=force_download,
                                          name=path)
        except (IOError, OSError) as e:
            if e.errno == errno.ENOENT:
                raise exceptions.NotFound

            # Windows raises PermissionDenied (errno.EACCES) when trying to use
            # open() on a directory
            if os.name == 'nt':
                if e.errno not in (errno.EACCES, errno.EISDIR):
                    raise
            elif e.errno != errno.EISDIR:
                raise
        except IndexError:
            if force_download:
                fid = FormatIdentifier(allow_unknown_file_types=True)
                content_type = fid.get_mimetype(path)
                return generate_file_response(self.open_file(
                    self.object_path, 'rb'),
                                              content_type,
                                              force_download=force_download,
                                              name=path)

        entries = self.list_files(path)
        if paginator is not None:
            paginated = paginator.paginate_queryset(entries, request)
            return paginator.get_paginated_response(paginated)
        return Response(entries)
def forward(apps, schema_editor):
    InformationPackage = apps.get_model("ip", "InformationPackage")
    db_alias = schema_editor.connection.alias

    for ip in InformationPackage.objects.using(db_alias).filter(
            package_type=IP.AIP, sip_objid='').iterator():
        ip.sip_objid = ip.object_identifier_value
        if ip.state in ('Prepared', 'Receiving'):
            ip.sip_path = ip.sip_objid
        else:
            structure = get_structure(ip)
            content_dir, content_name = find_destination('content', structure)
            content_path = os.path.join(content_dir, content_name)
            ip.sip_path = normalize_path(
                os.path.join(content_path, ip.sip_objid))
        ip.save()
Exemple #27
0
def index_document(tag_version, filepath):
    exclude_file_format_from_indexing_content = settings.EXCLUDE_FILE_FORMAT_FROM_INDEXING_CONTENT

    fid = FormatIdentifier()
    (format_name, format_version, format_registry_key) = fid.identify_file_format(filepath)
    if format_registry_key not in exclude_file_format_from_indexing_content:
        index_file_content = True
    else:
        index_file_content = False

    ip = tag_version.tag.information_package
    extension = os.path.splitext(tag_version.name)[1][1:]
    dirname = os.path.dirname(filepath)
    href = normalize_path(os.path.relpath(dirname, ip.object_path))
    href = '' if href == '.' else href
    size, _ = get_tree_size_and_count(filepath)
    modified = timestamp_to_datetime(os.stat(filepath).st_mtime)

    tag_version.custom_fields = {
        'extension': extension,
        'dirname': dirname,
        'href': href,
        'filename': tag_version.name,
        'size': size,
        'modified': modified,
        'formatname': format_name,
        'formatversion': format_version,
        'formatkey': format_registry_key,
    }

    doc = File.from_obj(tag_version)

    try:
        if index_file_content:
            with open(filepath, 'rb') as f:
                content = f.read()
            doc.data = base64.b64encode(content).decode("ascii")
            doc.save(pipeline='ingest_attachment')
        else:
            logger.debug('Skip to index file content for {}'.format(filepath))
            doc.save()
    except ElasticsearchException:
        logger.exception('Failed to index {}'.format(filepath))
        raise
    return doc, tag_version
Exemple #28
0
    def validate(self, filepath, expected=None):
        root = self.options.get('tree', [])
        filepath = normalize_path(filepath)
        logger.debug("Validating structure of %s" % filepath)

        val_obj = Validation.objects.create(filename=filepath,
                                            time_started=timezone.now(),
                                            validator=self.__class__.__name__,
                                            required=self.required,
                                            task=self.task,
                                            information_package=self.ip,
                                            responsible=self.responsible,
                                            specification={
                                                'context': self.context,
                                                'options': self.options,
                                            })

        passed = False
        try:
            for node in root:
                if node['type'] == 'root':
                    self.validate_folder(filepath, node)
                elif node['type'] == 'folder':
                    self.validate_folder(os.path.join(filepath, node['name']),
                                         node)

            passed = True
        except Exception:
            logger.exception(
                "Structure validation of {} failed".format(filepath))
            val_obj.message = traceback.format_exc()
            raise
        else:
            message = u"Successful structure validation of %s" % filepath
            val_obj.message = message
            logger.info(message)
        finally:
            val_obj.time_done = timezone.now()
            val_obj.passed = passed
            val_obj.save(update_fields=['time_done', 'passed', 'message'])

        return message
Exemple #29
0
    def validate(self, path, expected=None):
        xmlfile = self.context
        objs = []
        self._reset_dicts()
        self._reset_counters()
        logger.debug('Validating {path} against {xml}'.format(path=path,
                                                              xml=xmlfile))

        if os.path.isdir(path):
            for root, _dirs, files in walk(path):
                for f in files:
                    filepath = normalize_path(os.path.join(root, f))
                    if filepath in self.exclude or filepath == xmlfile:
                        continue
                    objs.append(self._validate(filepath))
        else:
            objs.append(self._validate(path))

        delete_count = self._validate_deleted_files(objs)
        self._validate_present_files(objs)

        objs = [o for o in objs if o is not None]
        Validation.objects.bulk_create(objs, batch_size=100)

        if delete_count + self.added + self.changed + self.renamed > 0:
            msg = (
                'Diff-check validation of {path} against {xml} failed: '
                '{cfmd} confirmed, {a} added, {c} changed, {r} renamed, {d} deleted'
            ).format(path=path,
                     xml=self.context,
                     cfmd=self.confirmed,
                     a=self.added,
                     c=self.changed,
                     r=self.renamed,
                     d=delete_count)
            logger.warning(msg)
            raise ValidationError(msg)

        logger.info(
            "Successful diff-check validation of {path} against {xml}".format(
                path=path, xml=self.context))
    def test_application(self, mock_popen):
        popen_obj = mock.MagicMock()
        popen_obj.returncode = 0
        popen_obj.communicate.return_value = ('output', 'error')
        mock_popen.return_value = popen_obj

        t = ActionTool.objects.create(
            name='ffmpeg',
            enabled=True,
            type=ActionTool.Type.CONVERSION_TOOL,
            environment=ActionTool.EnvironmentType.CLI_ENV,
            path='ffmpeg',
            cmd='-i {input} {input_name}.{output}',
        )
        f = os.path.join(self.datadir, 'foo.mkv')
        t.run(f, self.datadir, {'output': 'mp4'})

        mock_popen.assert_called_once_with(
            ['ffmpeg', '-i', normalize_path(f), "foo.mp4"],
            stdout=PIPE,
            stderr=PIPE,
        )