def generate_content_mets(ip): mets_path = ip.get_content_mets_file_path() full_mets_path = os.path.join(ip.object_path, mets_path) profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create = { full_mets_path: { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(full_mets_path)).isoformat() ip.content_mets_size = os.path.getsize(full_mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(full_mets_path, algorithm=algorithm) ip.save()
def run(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ if filesToCreate is None: filesToCreate = {} if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] ip = InformationPackage.objects.filter(pk=self.ip).first() sa = None if ip is not None: sa = ip.submission_agreement for _, v in filesToCreate.items(): v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa) generator = XMLGenerator() generator.generate( filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse, parsed_files=parsed_files, algorithm=algorithm, )
def test_generate_empty_element_with_multiple_attribute(self): specification = { '-name': "foo", "-attr": [ { "-name": "attr1", "#content": [{ "text": "bar" }] }, { "-name": "attr2", "#content": [{ "text": "baz" }] }, ] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo attr1="bar" attr2="baz"/>', etree.tostring(tree.getroot()))
def test_position_non_alphabetically(self): specification = { '-name': 'foo', '-children': [ { '-name': 'b', '-allowEmpty': True }, { '-name': 'a', '-allowEmpty': True }, ] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) root = tree.getroot() a = root.find('.//a') b = root.find('.//b') self.assertLess(root.index(b), root.index(a))
def run(self): ip = self.get_information_package() mets_path = ip.get_content_mets_file_path() profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create = { mets_path: { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(mets_path)).isoformat() ip.content_mets_size = os.path.getsize(mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(mets_path, algorithm=algorithm) ip.save()
def generate_package_mets(ip): sa = ip.submission_agreement if ip.package_type == InformationPackage.SIP: profile_type = 'submit_description' elif ip.package_type == InformationPackage.AIP: profile_type = 'aip_description' else: raise ValueError( 'Cannot create package mets for IP of type {package_type}'.format( package_type=ip.package_type ) ) profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) xmlpath = os.path.splitext(ip.object_path)[0] + '.xml' data = fill_specification_data(profile_data, ip=ip, sa=sa) data["_IP_CREATEDATE"] = timestamp_to_datetime(creation_date(ip.object_path)).isoformat() files_to_create = { xmlpath: { 'spec': profile_rel.profile.specification, 'data': data } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.package_mets_path = normalize_path(xmlpath) ip.package_mets_create_date = timestamp_to_datetime(creation_date(xmlpath)).isoformat() ip.package_mets_size = os.path.getsize(xmlpath) ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[algorithm.upper()] ip.package_mets_digest = calculate_checksum(xmlpath, algorithm=algorithm) ip.save()
def test_element_with_containsFiles_without_files(self): specification = { '-name': 'foo', '-allowEmpty': True, '-children': [{ '-name': 'bar', '-containsFiles': True, '-attr': [{ '-name': 'name', '#content': [{ 'var': 'FName' }] }], }], } generator = XMLGenerator({self.fname: specification}) generator.generate() self.assertTrue(os.path.exists(self.fname))
def test_insert_element_at_index(self): specification = { '-name': 'root', '-children': [{ '-name': 'foo', '-allowEmpty': "1", }, { '-name': 'bar', '-allowEmpty': "1", }] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertIsNone(tree.find('.//appended')) append_specification = { '-name': 'appended', '#content': [{ 'text': 'append text' }] } generator.insert(self.fname, 'root', append_specification, {}, index=0) tree = etree.parse(self.fname) root = tree.getroot() foo = tree.find('.//foo') appended = tree.find('.//appended') self.assertLess(root.index(appended), root.index(foo)) generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertIsNone(tree.find('.//appended')) append_specification = { '-name': 'appended', '#content': [{ 'text': 'append text' }] } generator.insert(self.fname, 'root', append_specification, {}, index=1) tree = etree.parse(self.fname) root = tree.getroot() foo = tree.find('.//foo') appended = tree.find('.//appended') self.assertLess(root.index(foo), root.index(appended))
def test_generate_empty_element(self): specification = {'-name': "foo"} with self.assertRaises(AssertionError): generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertFalse(os.path.exists(self.fname))
def test_generate_element_with_content(self): specification = {'-name': "foo", "#content": [{"text": "bar"}]} generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertEqual("<foo>bar</foo>", etree.tostring(tree.getroot()))
def test_generate_element_with_content_using_var(self): specification = {'-name': "foo", "#content": [{"var": "bar"}]} generator = XMLGenerator({self.fname: specification}, {'bar': 'baz'}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo>baz</foo>', etree.tostring(tree.getroot()))
def test_element_with_filtered_files(self): specification = { '-name': 'foo', '-children': [{ '-name': 'bar', '-containsFiles': True, "-filters": { "href": "record1/*" }, '-attr': [{ '-name': 'name', '#content': [{ 'var': 'FName' }] }], '-children': [{ '-name': 'baz', '-attr': [{ '-name': 'href', '#content': [{ 'var': 'href' }] }] }] }], } generator = XMLGenerator({self.fname: specification}) generator.generate(folderToParse=self.datadir) tree = etree.parse(self.fname) num_of_files = 0 for root, dirs, files in os.walk(self.datadir): for f in files: filepath = os.path.join(root, f) relpath = os.path.relpath(filepath, self.datadir) filepath_element = tree.find( ".//bar[@name='%s']/baz[@href='%s']" % (f, relpath)) if relpath.startswith('record1'): self.assertIsNotNone(filepath_element) num_of_files += 1 else: self.assertIsNone(filepath_element) file_elements = tree.findall('.//bar') self.assertEqual(len(file_elements), num_of_files)
def test_generate_empty_element_with_allowEmpty(self): specification = {'-name': "foo", "-allowEmpty": 1} generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertTrue(os.path.exists(self.fname)) tree = etree.parse(self.fname) self.assertEqual(etree.tostring(tree.getroot()), "<foo/>")
def generate_events_xml(ip): xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def run(self): ip = self.get_information_package() xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def generate_premis(ip): premis_path = ip.get_premis_file_path() premis_profile_rel = ip.get_profile_rel('preservation_metadata') premis_profile_data = ip.get_profile_data('preservation_metadata') files_to_create = { premis_path: { 'spec': premis_profile_rel.profile.specification, 'data': fill_specification_data(premis_profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)
def test_insert_nested_elements_with_namespace(self): nsmap = {'premis': 'http://www.loc.gov/premis/v3'} specification = { '-name': 'root', '-nsmap': nsmap, '-children': [{ '-name': 'foo', '-allowEmpty': "1", }] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertIsNone(tree.find('.//appended')) append_specification = { '-name': 'appended', '-namespace': 'premis', '#content': [{ 'text': 'append text' }], '-children': [{ '-name': 'bar', '-namespace': 'premis', '#content': [{ 'text': 'bar text' }] }] } for i in range(3): generator.insert( self.fname, 'foo', append_specification, {}, ) tree = etree.parse(self.fname) bar = tree.find('.//{%s}bar' % nsmap.get('premis')) self.assertIsNotNone(bar) self.assertEqual(bar.text, 'bar text')
def generate_content_metadata(ip): files_to_create = {} generate_premis = ip.profile_locked('preservation_metadata') if generate_premis: premis_profile_type = 'preservation_metadata' premis_profile_rel = ip.get_profile_rel(premis_profile_type) premis_profile_data = ip.get_profile_data(premis_profile_type) data = fill_specification_data(premis_profile_data, ip=ip) premis_path = parseContent(ip.get_premis_file_path(), data) full_premis_path = os.path.join(ip.object_path, premis_path) files_to_create[full_premis_path] = { 'spec': premis_profile_rel.profile.specification, 'data': data, } mets_path = ip.get_content_mets_file_path() full_mets_path = os.path.join(ip.object_path, mets_path) profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create[full_mets_path] = { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip), } parsed_files = profile_rel.data.parsed_files extra_paths_to_parse = profile_rel.data.extra_paths_to_parse algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm, parsed_files=parsed_files, extra_paths_to_parse=extra_paths_to_parse) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(full_mets_path)).isoformat() ip.content_mets_size = os.path.getsize(full_mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(full_mets_path, algorithm=algorithm) ip.save()
def run(self, info={}, filesToCreate={}, folderToParse=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ generator = XMLGenerator( filesToCreate, info, self.taskobj ) generator.generate( folderToParse=folderToParse, algorithm=algorithm, ) self.set_progress(100, total=100)
def GenerateXML(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ if filesToCreate is None: filesToCreate = {} if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] ip = InformationPackage.objects.filter(pk=self.ip).first() sa = None allow_unknown_file_types = False allow_encrypted_files = False if ip is not None: sa = ip.submission_agreement allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() for _, v in filesToCreate.items(): v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa) generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate( filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse, parsed_files=parsed_files, algorithm=algorithm, ) if filesToCreate is None: filesToCreate = {} msg = "Generated %s" % ", ".join(filesToCreate.keys()) self.create_success_event(msg)
def test_generate_empty_element_with_empty_children(self): specification = { '-name': 'foo', '-children': [ { '-name': 'bar', }, ] } with self.assertRaises(AssertionError): generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertFalse(os.path.exists(self.fname))
def test_generate_empty_element_with_empty_children_with_allow_empty(self): specification = { '-name': 'foo', '-children': [ { '-name': 'bar', '-allowEmpty': True }, ] } generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertTrue(os.path.exists(self.fname))
def test_generate_empty_element_with_children(self): specification = { '-name': 'foo', '-children': [ { '-name': 'bar', '#content': [{ 'text': 'baz' }] }, ] } generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertTrue(os.path.exists(self.fname))
def test_generate_element_with_children(self): specification = { '-name': 'foo', '-children': [{ '-name': 'bar', '#content': [{ 'text': 'baz' }] }] } generator = XMLGenerator({self.fname: specification}, {'bar': 'baz'}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo>\n <bar>baz</bar>\n</foo>', etree.tostring(tree.getroot()))
def test_generate_empty_element_with_empty_attribute(self): specification = { '-name': 'foo', '-attr': [ { '-name': 'bar', '#content': [{ 'text': '' }] }, ] } with self.assertRaises(AssertionError): generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertFalse(os.path.exists(self.fname))
def generate_premis(ip): premis_profile_rel = ip.get_profile_rel('preservation_metadata') premis_profile_data = ip.get_profile_data('preservation_metadata') data = fill_specification_data(premis_profile_data, ip=ip) premis_path = parseContent(ip.get_premis_file_path(), data) files_to_create = { premis_path: { 'spec': premis_profile_rel.profile.specification, 'data': data, } } algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)
def test_generate_multiple_element_same_name_same_level(self): specification = { '-name': "foo", '-allowEmpty': True, '-children': [{ '-name': 'bar', '-allowEmpty': True, }, { '-name': 'bar#1', '-allowEmpty': True, }] } generator = XMLGenerator({self.fname: specification}, {}) generator.generate() tree = etree.parse(self.fname) self.assertEqual(len(tree.findall('.//bar')), 2)
def test_generate_namespaces(self): nsmap = { 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', } specification = { "-name": "foo", '-nsmap': nsmap, "-attr": [ { "-name": "schemaLocation", "-namespace": "xsi", "-req": 1, "#content": [{ "var": "xsi:schemaLocation" }] }, ] } info = { "xsi:schemaLocation": "http://www.w3.org/1999/xlink schemas/xlink.xsd", } generator = XMLGenerator({self.fname: specification}, info) generator.generate() tree = etree.parse(self.fname) root = tree.getroot() xsi_ns = root.nsmap.get("xsi") self.assertEqual(xsi_ns, nsmap.get("xsi")) self.assertEqual(root.attrib.get("{%s}schemaLocation" % xsi_ns), info["xsi:schemaLocation"])
def test_insert_element_with_attribute(self): specification = { '-name': 'root', '-children': [{ '-name': 'foo', '-allowEmpty': "1", }] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertIsNone(tree.find('.//appended')) append_specification = { '-name': 'appended', '-attr': [{ '-name': 'bar', '#content': [{ 'text': 'append text' }] }] } generator.insert( self.fname, 'foo', append_specification, {}, ) tree = etree.parse(self.fname) appended = tree.find('.//appended') self.assertIsNotNone(appended) self.assertEqual(appended.get('bar'), 'append text')
def test_generate_element_with_content_and_attribute(self): specification = { '-name': "foo", '#content': [{ 'text': 'bar' }], '-attr': [ { "-name": "attr1", "#content": [{ "text": "baz" }] }, ] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo attr1="baz">bar</foo>', etree.tostring(tree.getroot()))