def test_validation_with_unchanged_files(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis)
def CompareRepresentationXMLFiles(self): Validation.objects.filter(task=self.get_processtask()).delete() ip = InformationPackage.objects.get(pk=self.ip) reps_path, reps_dir = find_destination("representations", ip.get_structure(), ip.object_path) if reps_path is None: return None representations_dir = os.path.join(reps_path, reps_dir) for p in find_pointers(os.path.join(ip.object_path, ip.content_mets_path)): rep_mets_path = p.path rep_mets_path = os.path.join(ip.object_path, rep_mets_path) rep_path = os.path.relpath(rep_mets_path, representations_dir) rep_path = PurePath(rep_path).parts[0] rep_premis_path = get_premis_ref(etree.parse(rep_mets_path)).path rep_premis_path = os.path.join(representations_dir, rep_path, rep_premis_path) validator = XMLComparisonValidator( context=rep_premis_path, options={ 'rootdir': os.path.join(representations_dir, rep_path), 'representation': rep_path, 'recursive': False, }, task=self.get_processtask(), ip=self.ip, responsible=ip.responsible, ) validator.validate(rep_mets_path) msg = "All XML files in the representations have the same set of files" self.create_success_event(msg)
def run(self): Validation.objects.filter(task=self.get_processtask()).delete() ip = InformationPackage.objects.get(pk=self.ip) reps_path, reps_dir = find_destination("representations", ip.get_structure(), ip.object_path) if reps_path is None: return None representations_dir = os.path.join(reps_path, reps_dir) for p in find_pointers(ip.content_mets_path): rep_mets_path = p.path rep_mets_path = os.path.join(ip.object_path, rep_mets_path) rep_path = os.path.relpath(rep_mets_path, representations_dir) rep_path = PurePath(rep_path).parts[0] rep_premis_path = get_premis_ref(etree.parse(rep_mets_path)).path rep_premis_path = os.path.join(representations_dir, rep_path, rep_premis_path) validator = XMLComparisonValidator( context=rep_premis_path, options={ 'rootdir': os.path.join(representations_dir, rep_path), 'representation': rep_path, }, task=self.get_processtask(), ip=self.ip, responsible=ip.responsible, ) validator.validate(rep_mets_path)
def test_validation_three_identical_files_two_renamed_one_added(self): files = [] for i in range(3): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) old = files[1] new = os.path.join(self.datadir, 'newer.txt') os.rename(old, new) added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: f.write('foo') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '1 confirmed, 1 added, 0 changed, 2 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def test_validation_without_files(self): root = etree.fromstring('<root></root>') with open(self.mets, 'wb') as f: f.write(etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')) with open(self.premis, 'wb') as f: f.write(etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')) self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis)
def test_validation_with_deleted_file(self): files = self.create_files() self.generate_mets_xml() os.remove(files[0]) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 0 changed, 0 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def test_validation_with_size_attribute_missing(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() tree = etree.parse(self.premis) file_el = tree.xpath('*[local-name()="file"]')[1] file_el.attrib.pop('SIZE') tree.write(self.premis, xml_declaration=True, encoding='UTF-8') self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis)
def test_validation_with_changed_file(self): files = self.create_files() self.generate_mets_xml() with open(files[0], 'a') as f: f.write('changed') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def run(self, first, second, rootdir=None): Validation.objects.filter(task=self.task_id).delete() first, second = self.parse_params(first, second) ip = InformationPackage.objects.get(pk=self.ip) if rootdir is None: rootdir = ip.object_path else: rootdir, = self.parse_params(rootdir) validator = XMLComparisonValidator(context=first, options={'rootdir': rootdir}, task=self.task_id, ip=self.ip, responsible=ip.responsible) validator.validate(second)
def test_validation_with_unchanged_files_with_same_content(self): files = [os.path.join(self.datadir, 'first.txt'), os.path.join(self.datadir, 'second.txt')] for f in files: with open(os.path.join(f), 'w') as fp: fp.write('foo') self.generate_mets_xml() self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis)
def test_validation_with_added_file(self): files = self.create_files() self.generate_mets_xml() added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: f.write('added') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '3 confirmed, 1 added, 0 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def test_validation_with_renamed_file(self): files = self.create_files() self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 0 changed, 1 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def test_validation_with_checksum_attribute_missing(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() tree = etree.parse(self.premis) file_el = tree.xpath('*[local-name()="file"]')[1] file_el.attrib.pop('CHECKSUM') tree.write(self.premis, xml_declaration=True, encoding='UTF-8') self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def test_validation_with_incorrect_size(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() tree = etree.parse(self.premis) file_el = tree.xpath('*[local-name()="file"]')[1] file_el.attrib['SIZE'] = str(os.path.getsize(files[1])*2) tree.write(self.premis, xml_declaration=True, encoding='UTF-8') self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def test_validation_two_identical_files_one_missing(self): files = [] for i in range(2): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() os.remove(files[0]) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '1 confirmed, 0 added, 0 changed, 0 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def run(self, first, second, rootdir=None, recursive=True): Validation.objects.filter(task=self.get_processtask()).delete() first, second = self.parse_params(first, second) ip = InformationPackage.objects.get(pk=self.ip) if rootdir is None: rootdir = ip.object_path else: rootdir, = self.parse_params(rootdir) validator = XMLComparisonValidator( context=first, options={'rootdir': rootdir, 'recursive': recursive}, task=self.get_processtask(), ip=self.ip, responsible=ip.responsible, ) validator.validate(second)
def test_validation_with_all_alterations(self): files = self.create_files() self.generate_mets_xml() with open(files[0], 'a') as f: f.write('changed') os.remove(files[1]) os.rename(files[2], os.path.join(self.datadir, 'new.txt')) added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: f.write('added') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '0 confirmed, 1 added, 1 changed, 1 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)
def CompareXMLFiles(self, first, second, rootdir=None, recursive=True): Validation.objects.filter(task=self.get_processtask()).delete() first, second = self.parse_params(first, second) ip = InformationPackage.objects.get(pk=self.ip) if rootdir is None: rootdir = ip.object_path else: rootdir, = self.parse_params(rootdir) validator = XMLComparisonValidator( context=first, options={'rootdir': rootdir, 'recursive': recursive}, task=self.get_processtask(), ip=self.ip, responsible=ip.responsible, ) validator.validate(second) msg = "%s and %s has the same set of files" % (first, second) self.create_success_event(msg)
class XMLComparisonValidatorTests(TestCase): @classmethod def setUpClass(cls): cls.generator = XMLGenerator() @classmethod def tearDownClass(cls): Path.objects.all().delete() def setUp(self): self.root = os.path.dirname(os.path.realpath(__file__)) self.datadir = os.path.join(self.root, "datadir") self.xmldir = os.path.join(self.datadir, "metadata") self.mets = os.path.join(self.xmldir, 'mets.xml') self.premis = os.path.join(self.xmldir, 'premis.xml') self.options = {'rootdir': self.datadir} self.mets_spec = { self.mets: { 'data': {}, 'spec': { '-name': 'root', '-children': [ { "-name": "file", "-containsFiles": True, "-attr": [ { "-name": "MIMETYPE", "#content": "{{FMimetype}}", }, { "-name": "CHECKSUM", "#content": "{{FChecksum}}" }, { "-name": "CHECKSUMTYPE", "#content": "{{FChecksumType}}" }, { "-name": "SIZE", "#content": "{{FSize}}" } ], "-children": [ { "-name": "FLocat", "-attr": [ { "-name": "href", "-namespace": "xlink", "#content": "file:///{{href}}" }, ] } ] } ] } } } self.premis_spec = { self.premis: { 'data': {}, 'spec': { '-name': 'root', '-children': [ { "-name": "file", "-containsFiles": True, "-attr": [ { "-name": "MIMETYPE", "#content": "{{FMimetype}}", }, { "-name": "CHECKSUM", "#content": "{{FChecksum}}" }, { "-name": "CHECKSUMTYPE", "#content": "{{FChecksumType}}" }, { "-name": "SIZE", "#content": "{{FSize}}" } ], "-children": [ { "-name": "FLocat", "-attr": [ { "-name": "href", "-namespace": "xlink", "#content": "file:///{{href}}" }, ] } ] } ] } } } try: os.mkdir(self.datadir) except OSError as e: if e.errno != 17: raise try: os.mkdir(self.xmldir) except OSError as e: if e.errno != 17: raise def tearDown(self): shutil.rmtree(self.datadir) def create_files(self): files = [] for i in range(3): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('%s' % i) files.append(fname) return files def generate_mets_xml(self): self.generator.generate(self.mets_spec, folderToParse=self.datadir) def generate_premis_xml(self): self.generator.generate(self.premis_spec, folderToParse=self.datadir) def test_validation_without_files(self): root = etree.fromstring('<root></root>') with open(self.mets, 'wb') as f: f.write(etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')) with open(self.premis, 'wb') as f: f.write(etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')) self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis) def test_validation_with_unchanged_files(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis) def test_validation_with_unchanged_files_multiple_times(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis) self.validator.validate(self.premis) def test_validation_with_unchanged_files_with_same_content(self): files = [os.path.join(self.datadir, 'first.txt'), os.path.join(self.datadir, 'second.txt')] for f in files: with open(os.path.join(f), 'w') as fp: fp.write('foo') self.generate_mets_xml() self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis) def test_validation_with_deleted_file(self): files = self.create_files() self.generate_mets_xml() os.remove(files[0]) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 0 changed, 0 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_added_file(self): files = self.create_files() self.generate_mets_xml() added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: f.write('added') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '3 confirmed, 1 added, 0 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_renamed_file(self): files = self.create_files() self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 0 changed, 1 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_changed_file(self): files = self.create_files() self.generate_mets_xml() with open(files[0], 'a') as f: f.write('changed') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_checksum_attribute_missing(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() tree = etree.parse(self.premis) file_el = tree.xpath('*[local-name()="file"]')[1] file_el.attrib.pop('CHECKSUM') tree.write(self.premis, xml_declaration=True, encoding='UTF-8') self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_incorrect_size(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() tree = etree.parse(self.premis) file_el = tree.xpath('*[local-name()="file"]')[1] file_el.attrib['SIZE'] = str(os.path.getsize(files[1])*2) tree.write(self.premis, xml_declaration=True, encoding='UTF-8') self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '2 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_size_attribute_missing(self): files = self.create_files() self.generate_mets_xml() self.generate_premis_xml() tree = etree.parse(self.premis) file_el = tree.xpath('*[local-name()="file"]')[1] file_el.attrib.pop('SIZE') tree.write(self.premis, xml_declaration=True, encoding='UTF-8') self.validator = XMLComparisonValidator(context=self.mets, options=self.options) self.validator.validate(self.premis) def test_validation_two_identical_files_one_missing(self): files = [] for i in range(2): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() os.remove(files[0]) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '1 confirmed, 0 added, 0 changed, 0 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_two_identical_files_one_renamed(self): files = [] for i in range(2): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '1 confirmed, 0 added, 0 changed, 1 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_two_identical_files_one_renamed_one_deleted(self): files = [] for i in range(2): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) os.remove(files[1]) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '0 confirmed, 0 added, 0 changed, 1 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_three_identical_files_two_renamed_one_deleted(self): files = [] for i in range(3): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) old = files[1] new = os.path.join(self.datadir, 'newer.txt') os.rename(old, new) os.remove(files[2]) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '0 confirmed, 0 added, 0 changed, 2 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_three_identical_files_two_renamed_one_added(self): files = [] for i in range(3): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() old = files[0] new = os.path.join(self.datadir, 'new.txt') os.rename(old, new) old = files[1] new = os.path.join(self.datadir, 'newer.txt') os.rename(old, new) added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: f.write('foo') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '1 confirmed, 1 added, 0 changed, 2 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_two_identical_files_one_changed(self): files = [] for i in range(2): fname = os.path.join(self.datadir, '%s.txt' % i) with open(fname, 'w') as f: f.write('foo') files.append(fname) self.generate_mets_xml() with open(files[0], 'a') as f: f.write('changed') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '1 confirmed, 0 added, 1 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_added_identical_file(self): files = self.create_files() self.generate_mets_xml() added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: with open(files[1]) as f1: f.write(f1.read()) self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '3 confirmed, 1 added, 0 changed, 0 renamed, 0 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis) def test_validation_with_all_alterations(self): files = self.create_files() self.generate_mets_xml() with open(files[0], 'a') as f: f.write('changed') os.remove(files[1]) os.rename(files[2], os.path.join(self.datadir, 'new.txt')) added = os.path.join(self.datadir, 'added.txt') with open(added, 'w') as f: f.write('added') self.generate_premis_xml() self.validator = XMLComparisonValidator(context=self.mets, options=self.options) msg = '0 confirmed, 1 added, 1 changed, 1 renamed, 1 deleted$' with self.assertRaisesRegexp(ValidationError, msg): self.validator.validate(self.premis)