Esempio n. 1
0
    def test_pointer(self):
        xmlfile = os.path.join(self.datadir, "test.xml")
        ext1 = os.path.join(self.datadir, "ext1.xml")
        ext2 = os.path.join(self.datadir, "ext2.xml")

        with open(xmlfile, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink">
                <mptr xlink:href="ext1.xml"/>
                <mptr xlink:href="ext2.xml"/>
            </root>
            ''')

        with open(ext1, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink">
                <file><FLocat href="1.txt"/></file>
                <file><FLocat href="2.txt"/></file>
            </root>
            ''')

        with open(ext2, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink">
                <file><FLocat href="1.pdf"/></file>
                <file><FLocat href="2.pdf"/></file>
            </root>
            ''')

        expected = ['ext1.xml', 'ext2.xml', '1.txt', '1.pdf', '2.txt', '2.pdf']
        found = find_files(xmlfile, rootdir=self.datadir)
        self.assertEqual(len(found), len(expected))
        self.assertCountEqual(found, expected)
Esempio n. 2
0
    def test_files_object_element(self):
        xmlfile = os.path.join(self.datadir, "test.xml")

        with open(xmlfile, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink">
                <object>
                    <storage>
                        <contentLocation>
                            <contentLocationValue>file:///1.txt</contentLocationValue>
                        </contentLocation>
                    </storage>
                </object>
                <object>
                    <storage>
                        <contentLocation>
                            <contentLocationValue>file:///2.txt</contentLocationValue>
                        </contentLocation>
                    </storage>
                </object>
            </root>
            ''')

        expected = ['1.txt', '2.txt']
        found = find_files(xmlfile, rootdir=self.datadir)
        self.assertEqual(len(found), len(expected))
Esempio n. 3
0
    def validate(self, path, expected=None):
        xmlfile = self.context
        objs = []
        self._reset_dicts()
        self._reset_counters()
        logger.debug(u'Validating {path} against {xml}'.format(path=path,
                                                               xml=xmlfile))
        checksum_in_context_file = self.checksums.get(path)

        if checksum_in_context_file:
            try:
                self._pop_checksum_dict(self.deleted, checksum_in_context_file,
                                        path)
                self._pop_checksum_dict(self.present, checksum_in_context_file,
                                        path)
            except (KeyError, ValueError):
                pass

        skip_files = [os.path.relpath(xmlfile, self.rootdir)]
        skip_files.extend([p.path for p in find_pointers(path)])
        skip_files = list(map(normalize_path, skip_files))
        for f in find_files(path, rootdir=self.rootdir, skip_files=skip_files):
            if f in self.exclude:
                continue
            objs.append(self._validate(f))

        delete_count = self._validate_deleted_files(objs)
        self._validate_present_files(objs)

        if checksum_in_context_file:
            try:
                self.deleted[checksum_in_context_file].append(path)
            except KeyError:
                self.deleted[checksum_in_context_file] = [path]

            try:
                self.present[checksum_in_context_file].append(path)
            except KeyError:
                self.present[checksum_in_context_file] = [path]

        objs = [o for o in objs if o is not None]
        Validation.objects.bulk_create(objs, batch_size=100)

        if delete_count + self.added + self.changed + self.renamed > 0:
            msg = (
                'Comparison of {path} against {xml} failed: '
                '{cfmd} confirmed, {a} added, {c} changed, {r} renamed, {d} deleted'
            ).format(path=path,
                     xml=self.context,
                     cfmd=self.confirmed,
                     a=self.added,
                     c=self.changed,
                     r=self.renamed,
                     d=delete_count)
            logger.warning(msg)
            raise ValidationError(msg)

        logger.info(u"Successful comparison of {path} against {xml}".format(
            path=path, xml=self.context))
Esempio n. 4
0
 def _get_files(self):
     skip_files = [p.path for p in find_pointers(self.context)]
     self.logical_files = find_files(
         self.context,
         rootdir=self.rootdir,
         skip_files=skip_files,
         recursive=self.recursive,
     )
Esempio n. 5
0
    def test_empty(self):
        xmlfile = os.path.join(self.datadir, "test.xml")

        with open(xmlfile, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink"></root>
            ''')

        expected = []
        found = find_files(xmlfile, rootdir=self.datadir)
        self.assertEqual(len(found), len(expected))
Esempio n. 6
0
    def test_files_mdRef_element(self):
        xmlfile = os.path.join(self.datadir, "test.xml")

        with open(xmlfile, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink">
                <mdRef href="file:///1.txt"/>
                <mdRef href="2.txt"/>
            </root>
            ''')

        expected = ['1.txt', '2.txt']
        found = find_files(xmlfile, rootdir=self.datadir)
        self.assertEqual(len(found), len(expected))
Esempio n. 7
0
    def run(self,
            ip=None,
            xmlfile=None,
            validate_fileformat=True,
            validate_integrity=True,
            rootdir=None):
        step = ProcessStep.objects.create(name="Validate Files",
                                          parallel=True,
                                          parent_step_id=self.step)

        if any([validate_fileformat, validate_integrity]):
            if rootdir is None:
                rootdir = InformationPackage.objects.values_list(
                    'object_path', flat=True).get(pk=ip)

            tasks = []

            for f in find_files(xmlfile, rootdir):
                if validate_fileformat and f.format is not None:
                    tasks.append(
                        ProcessTask(
                            name=self.fileformat_task,
                            params={
                                "filename": os.path.join(rootdir, f.path),
                                "format_name": f.format,
                            },
                            information_package_id=ip,
                            responsible_id=self.responsible,
                            processstep=step,
                        ))

                if validate_integrity and f.checksum is not None and f.checksum_type is not None:
                    tasks.append(
                        ProcessTask(
                            name=self.checksum_task,
                            params={
                                "filename": os.path.join(rootdir, f.path),
                                "checksum": f.checksum,
                                "algorithm": f.checksum_type,
                            },
                            information_package_id=ip,
                            responsible_id=self.responsible,
                            processstep=step,
                        ))

            ProcessTask.objects.bulk_create(tasks)

        with allow_join_result():
            return step.run().get()
Esempio n. 8
0
    def test_files_file_element(self):
        xmlfile = os.path.join(self.datadir, "test.xml")

        with open(xmlfile, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink">
                <file><FLocat href="file:///1.txt"/></file>
                <file><FLocat href="file:2.txt"/></file>
                <file><FLocat href="3.txt"/></file>
            </root>
            ''')

        expected = ['1.txt', '2.txt', '3.txt']
        found = find_files(xmlfile, rootdir=self.datadir)
        self.assertItemsEqual([x.path for x in found], expected)
Esempio n. 9
0
    def test_files_object_element(self):
        xmlfile = os.path.join(self.datadir, "test.xml")

        with open(xmlfile, 'w') as xml:
            xml.write('''<?xml version="1.0" encoding="UTF-8" ?>
            <root xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
                <object xsi:type="premis:file">
                    <storage>
                        <contentLocation>
                            <contentLocationValue>file:///1.tar</contentLocationValue>
                        </contentLocation>
                    </storage>
                    <objectCharacteristics>
                        <compositionLevel>0</compositionLevel>
                        <format>
                            <formatDesignation>
                                <formatName>TaR</formatName>
                            </formatDesignation>
                        </format>
                    </objectCharacteristics>
                </object>
                <object xsi:type="premis:file">
                    <storage>
                        <contentLocation>
                            <contentLocationValue>file:///2.tar</contentLocationValue>
                        </contentLocation>
                    </storage>
                    <objectCharacteristics>
                        <compositionLevel>0</compositionLevel>
                        <format>
                            <formatDesignation>
                                <formatName>TaR</formatName>
                            </formatDesignation>
                        </format>
                    </objectCharacteristics>
                </object>
            </root>
            ''')

        expected = ['2.tar']
        found = find_files(xmlfile, rootdir=self.datadir)
        self.assertCountEqual(found, expected)
Esempio n. 10
0
    def run(self,
            ip=None,
            xmlfile=None,
            validate_fileformat=True,
            validate_integrity=True,
            rootdir=None):
        if any([validate_fileformat, validate_integrity]):
            if rootdir is None:
                rootdir = InformationPackage.objects.values_list(
                    'object_path', flat=True).get(pk=ip)

            format_validator = FormatValidator()

            for f in find_files(xmlfile, rootdir):
                filename = os.path.join(rootdir, f.path)

                if validate_fileformat and f.format is not None:
                    format_validator.validate(filename, (f.format, None, None))

                if validate_integrity and f.checksum is not None and f.checksum_type is not None:
                    options = {
                        'expected': f.checksum,
                        'algorithm': f.checksum_type
                    }
                    validator = ChecksumValidator(context='checksum_str',
                                                  options=options)
                    try:
                        validator.validate(filename)
                    except Exception as e:
                        recipient = User.objects.get(pk=self.responsible).email
                        if recipient and self.ip:
                            ip = InformationPackage.objects.get(pk=self.ip)
                            subject = 'Rejected "%s"' % ip.object_identifier_value
                            body = '"%s" was rejected:\n%s' % (
                                ip.object_identifier_value, str(e))
                            send_mail(subject,
                                      body,
                                      None, [recipient],
                                      fail_silently=False)

                        raise
Esempio n. 11
0
    def run(self,
            dirname=None,
            files=[],
            files_reldir=None,
            xmlfile=None,
            rootdir=""):
        if dirname:
            xmlrelpath = os.path.relpath(xmlfile, dirname)
            xmlrelpath = remove_prefix(xmlrelpath, "./")
        else:
            xmlrelpath = xmlfile

        logical_files = find_files(xmlfile, rootdir)
        physical_files = set()

        if dirname:
            for root, dirs, filenames in walk(dirname):
                for f in filenames:
                    reldir = os.path.relpath(root, dirname)
                    relfile = os.path.join(reldir, f)
                    relfile = win_to_posix(relfile)
                    relfile = remove_prefix(relfile, "./")

                    if relfile != xmlrelpath:
                        physical_files.add(relfile)

        for f in files:
            if files_reldir:
                if f == files_reldir:
                    physical_files.add(os.path.basename(f))
                    continue

                f = os.path.relpath(f, files_reldir)
            physical_files.add(f)

        assert logical_files == physical_files, "the logical representation differs from the physical"
        return "Success"
Esempio n. 12
0
 def _get_files(self):
     self.logical_files = find_files(self.context,
                                     rootdir=self.rootdir,
                                     recursive=self.recursive)
Esempio n. 13
0
 def _get_files(self):
     self.logical_files = find_files(self.context, rootdir=self.rootdir)