def test_pointer(self): xmlfile = os.path.join(self.datadir, "test.xml") ext1 = os.path.join(self.datadir, "ext1.xml") ext2 = os.path.join(self.datadir, "ext2.xml") with open(xmlfile, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"> <mptr xlink:href="ext1.xml"/> <mptr xlink:href="ext2.xml"/> </root> ''') with open(ext1, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"> <file><FLocat href="1.txt"/></file> <file><FLocat href="2.txt"/></file> </root> ''') with open(ext2, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"> <file><FLocat href="1.pdf"/></file> <file><FLocat href="2.pdf"/></file> </root> ''') expected = ['ext1.xml', 'ext2.xml', '1.txt', '1.pdf', '2.txt', '2.pdf'] found = find_files(xmlfile, rootdir=self.datadir) self.assertEqual(len(found), len(expected)) self.assertCountEqual(found, expected)
def test_files_object_element(self): xmlfile = os.path.join(self.datadir, "test.xml") with open(xmlfile, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"> <object> <storage> <contentLocation> <contentLocationValue>file:///1.txt</contentLocationValue> </contentLocation> </storage> </object> <object> <storage> <contentLocation> <contentLocationValue>file:///2.txt</contentLocationValue> </contentLocation> </storage> </object> </root> ''') expected = ['1.txt', '2.txt'] found = find_files(xmlfile, rootdir=self.datadir) self.assertEqual(len(found), len(expected))
def validate(self, path, expected=None): xmlfile = self.context objs = [] self._reset_dicts() self._reset_counters() logger.debug(u'Validating {path} against {xml}'.format(path=path, xml=xmlfile)) checksum_in_context_file = self.checksums.get(path) if checksum_in_context_file: try: self._pop_checksum_dict(self.deleted, checksum_in_context_file, path) self._pop_checksum_dict(self.present, checksum_in_context_file, path) except (KeyError, ValueError): pass skip_files = [os.path.relpath(xmlfile, self.rootdir)] skip_files.extend([p.path for p in find_pointers(path)]) skip_files = list(map(normalize_path, skip_files)) for f in find_files(path, rootdir=self.rootdir, skip_files=skip_files): if f in self.exclude: continue objs.append(self._validate(f)) delete_count = self._validate_deleted_files(objs) self._validate_present_files(objs) if checksum_in_context_file: try: self.deleted[checksum_in_context_file].append(path) except KeyError: self.deleted[checksum_in_context_file] = [path] try: self.present[checksum_in_context_file].append(path) except KeyError: self.present[checksum_in_context_file] = [path] objs = [o for o in objs if o is not None] Validation.objects.bulk_create(objs, batch_size=100) if delete_count + self.added + self.changed + self.renamed > 0: msg = ( 'Comparison of {path} against {xml} failed: ' '{cfmd} confirmed, {a} added, {c} changed, {r} renamed, {d} deleted' ).format(path=path, xml=self.context, cfmd=self.confirmed, a=self.added, c=self.changed, r=self.renamed, d=delete_count) logger.warning(msg) raise ValidationError(msg) logger.info(u"Successful comparison of {path} against {xml}".format( path=path, xml=self.context))
def _get_files(self): skip_files = [p.path for p in find_pointers(self.context)] self.logical_files = find_files( self.context, rootdir=self.rootdir, skip_files=skip_files, recursive=self.recursive, )
def test_empty(self): xmlfile = os.path.join(self.datadir, "test.xml") with open(xmlfile, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"></root> ''') expected = [] found = find_files(xmlfile, rootdir=self.datadir) self.assertEqual(len(found), len(expected))
def test_files_mdRef_element(self): xmlfile = os.path.join(self.datadir, "test.xml") with open(xmlfile, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"> <mdRef href="file:///1.txt"/> <mdRef href="2.txt"/> </root> ''') expected = ['1.txt', '2.txt'] found = find_files(xmlfile, rootdir=self.datadir) self.assertEqual(len(found), len(expected))
def run(self, ip=None, xmlfile=None, validate_fileformat=True, validate_integrity=True, rootdir=None): step = ProcessStep.objects.create(name="Validate Files", parallel=True, parent_step_id=self.step) if any([validate_fileformat, validate_integrity]): if rootdir is None: rootdir = InformationPackage.objects.values_list( 'object_path', flat=True).get(pk=ip) tasks = [] for f in find_files(xmlfile, rootdir): if validate_fileformat and f.format is not None: tasks.append( ProcessTask( name=self.fileformat_task, params={ "filename": os.path.join(rootdir, f.path), "format_name": f.format, }, information_package_id=ip, responsible_id=self.responsible, processstep=step, )) if validate_integrity and f.checksum is not None and f.checksum_type is not None: tasks.append( ProcessTask( name=self.checksum_task, params={ "filename": os.path.join(rootdir, f.path), "checksum": f.checksum, "algorithm": f.checksum_type, }, information_package_id=ip, responsible_id=self.responsible, processstep=step, )) ProcessTask.objects.bulk_create(tasks) with allow_join_result(): return step.run().get()
def test_files_file_element(self): xmlfile = os.path.join(self.datadir, "test.xml") with open(xmlfile, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink"> <file><FLocat href="file:///1.txt"/></file> <file><FLocat href="file:2.txt"/></file> <file><FLocat href="3.txt"/></file> </root> ''') expected = ['1.txt', '2.txt', '3.txt'] found = find_files(xmlfile, rootdir=self.datadir) self.assertItemsEqual([x.path for x in found], expected)
def test_files_object_element(self): xmlfile = os.path.join(self.datadir, "test.xml") with open(xmlfile, 'w') as xml: xml.write('''<?xml version="1.0" encoding="UTF-8" ?> <root xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <object xsi:type="premis:file"> <storage> <contentLocation> <contentLocationValue>file:///1.tar</contentLocationValue> </contentLocation> </storage> <objectCharacteristics> <compositionLevel>0</compositionLevel> <format> <formatDesignation> <formatName>TaR</formatName> </formatDesignation> </format> </objectCharacteristics> </object> <object xsi:type="premis:file"> <storage> <contentLocation> <contentLocationValue>file:///2.tar</contentLocationValue> </contentLocation> </storage> <objectCharacteristics> <compositionLevel>0</compositionLevel> <format> <formatDesignation> <formatName>TaR</formatName> </formatDesignation> </format> </objectCharacteristics> </object> </root> ''') expected = ['2.tar'] found = find_files(xmlfile, rootdir=self.datadir) self.assertCountEqual(found, expected)
def run(self, ip=None, xmlfile=None, validate_fileformat=True, validate_integrity=True, rootdir=None): if any([validate_fileformat, validate_integrity]): if rootdir is None: rootdir = InformationPackage.objects.values_list( 'object_path', flat=True).get(pk=ip) format_validator = FormatValidator() for f in find_files(xmlfile, rootdir): filename = os.path.join(rootdir, f.path) if validate_fileformat and f.format is not None: format_validator.validate(filename, (f.format, None, None)) if validate_integrity and f.checksum is not None and f.checksum_type is not None: options = { 'expected': f.checksum, 'algorithm': f.checksum_type } validator = ChecksumValidator(context='checksum_str', options=options) try: validator.validate(filename) except Exception as e: recipient = User.objects.get(pk=self.responsible).email if recipient and self.ip: ip = InformationPackage.objects.get(pk=self.ip) subject = 'Rejected "%s"' % ip.object_identifier_value body = '"%s" was rejected:\n%s' % ( ip.object_identifier_value, str(e)) send_mail(subject, body, None, [recipient], fail_silently=False) raise
def run(self, dirname=None, files=[], files_reldir=None, xmlfile=None, rootdir=""): if dirname: xmlrelpath = os.path.relpath(xmlfile, dirname) xmlrelpath = remove_prefix(xmlrelpath, "./") else: xmlrelpath = xmlfile logical_files = find_files(xmlfile, rootdir) physical_files = set() if dirname: for root, dirs, filenames in walk(dirname): for f in filenames: reldir = os.path.relpath(root, dirname) relfile = os.path.join(reldir, f) relfile = win_to_posix(relfile) relfile = remove_prefix(relfile, "./") if relfile != xmlrelpath: physical_files.add(relfile) for f in files: if files_reldir: if f == files_reldir: physical_files.add(os.path.basename(f)) continue f = os.path.relpath(f, files_reldir) physical_files.add(f) assert logical_files == physical_files, "the logical representation differs from the physical" return "Success"
def _get_files(self): self.logical_files = find_files(self.context, rootdir=self.rootdir, recursive=self.recursive)
def _get_files(self): self.logical_files = find_files(self.context, rootdir=self.rootdir)