def test_element_with_containsFiles_without_files(self): specification = { '-name': 'foo', '-allowEmpty': True, '-children': [{ '-name': 'bar', '-containsFiles': True, '-attr': [{ '-name': 'name', '#content': [{ 'var': 'FName' }] }], }], } generator = XMLGenerator({self.fname: specification}) generator.generate() self.assertTrue(os.path.exists(self.fname))
def generate_content_mets(ip): mets_path = ip.get_content_mets_file_path() full_mets_path = os.path.join(ip.object_path, mets_path) profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create = { full_mets_path: { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(full_mets_path)).isoformat() ip.content_mets_size = os.path.getsize(full_mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(full_mets_path, algorithm=algorithm) ip.save()
def generate_package_mets(ip): sa = ip.submission_agreement if ip.package_type == InformationPackage.SIP: profile_type = 'submit_description' elif ip.package_type == InformationPackage.AIP: profile_type = 'aip_description' else: raise ValueError( 'Cannot create package mets for IP of type {package_type}'.format( package_type=ip.package_type ) ) profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) xmlpath = os.path.splitext(ip.object_path)[0] + '.xml' data = fill_specification_data(profile_data, ip=ip, sa=sa) data["_IP_CREATEDATE"] = timestamp_to_datetime(creation_date(ip.object_path)).isoformat() files_to_create = { xmlpath: { 'spec': profile_rel.profile.specification, 'data': data } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.package_mets_path = normalize_path(xmlpath) ip.package_mets_create_date = timestamp_to_datetime(creation_date(xmlpath)).isoformat() ip.package_mets_size = os.path.getsize(xmlpath) ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[algorithm.upper()] ip.package_mets_digest = calculate_checksum(xmlpath, algorithm=algorithm) ip.save()
def run(self): ip = self.get_information_package() mets_path = ip.get_content_mets_file_path() profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create = { mets_path: { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(mets_path)).isoformat() ip.content_mets_size = os.path.getsize(mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(mets_path, algorithm=algorithm) ip.save()
def test_generate_empty_element_with_multiple_attribute(self): specification = { '-name': "foo", "-attr": [ { "-name": "attr1", "#content": [{ "text": "bar" }] }, { "-name": "attr2", "#content": [{ "text": "baz" }] }, ] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo attr1="bar" attr2="baz"/>', etree.tostring(tree.getroot()))
def test_position_non_alphabetically(self): specification = { '-name': 'foo', '-children': [ { '-name': 'b', '-allowEmpty': True }, { '-name': 'a', '-allowEmpty': True }, ] } generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) root = tree.getroot() a = root.find('.//a') b = root.find('.//b') self.assertLess(root.index(b), root.index(a))
def run(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ if filesToCreate is None: filesToCreate = {} if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] ip = InformationPackage.objects.filter(pk=self.ip).first() sa = None if ip is not None: sa = ip.submission_agreement for _, v in filesToCreate.items(): v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa) generator = XMLGenerator() generator.generate( filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse, parsed_files=parsed_files, algorithm=algorithm, )
def test_generate_element_with_content(self): specification = {'-name': "foo", "#content": [{"text": "bar"}]} generator = XMLGenerator({self.fname: specification}) generator.generate() tree = etree.parse(self.fname) self.assertEqual("<foo>bar</foo>", etree.tostring(tree.getroot()))
def test_generate_empty_element(self): specification = {'-name': "foo"} with self.assertRaises(AssertionError): generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertFalse(os.path.exists(self.fname))
def test_generate_element_with_content_using_var(self): specification = {'-name': "foo", "#content": [{"var": "bar"}]} generator = XMLGenerator({self.fname: specification}, {'bar': 'baz'}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo>baz</foo>', etree.tostring(tree.getroot()))
def test_element_with_filtered_files(self): specification = { '-name': 'foo', '-children': [{ '-name': 'bar', '-containsFiles': True, "-filters": { "href": "record1/*" }, '-attr': [{ '-name': 'name', '#content': [{ 'var': 'FName' }] }], '-children': [{ '-name': 'baz', '-attr': [{ '-name': 'href', '#content': [{ 'var': 'href' }] }] }] }], } generator = XMLGenerator({self.fname: specification}) generator.generate(folderToParse=self.datadir) tree = etree.parse(self.fname) num_of_files = 0 for root, dirs, files in os.walk(self.datadir): for f in files: filepath = os.path.join(root, f) relpath = os.path.relpath(filepath, self.datadir) filepath_element = tree.find( ".//bar[@name='%s']/baz[@href='%s']" % (f, relpath)) if relpath.startswith('record1'): self.assertIsNotNone(filepath_element) num_of_files += 1 else: self.assertIsNone(filepath_element) file_elements = tree.findall('.//bar') self.assertEqual(len(file_elements), num_of_files)
def generate_events_xml(ip): xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def test_generate_empty_element_with_allowEmpty(self): specification = {'-name': "foo", "-allowEmpty": 1} generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertTrue(os.path.exists(self.fname)) tree = etree.parse(self.fname) self.assertEqual(etree.tostring(tree.getroot()), "<foo/>")
def run(self): ip = self.get_information_package() xml_path = os.path.join(ip.object_path, ip.get_events_file_path()) files_to_create = { xml_path: { 'spec': get_event_spec(), 'data': fill_specification_data(ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, algorithm=algorithm)
def generate_premis(ip): premis_path = ip.get_premis_file_path() premis_profile_rel = ip.get_profile_rel('preservation_metadata') premis_profile_data = ip.get_profile_data('preservation_metadata') files_to_create = { premis_path: { 'spec': premis_profile_rel.profile.specification, 'data': fill_specification_data(premis_profile_data, ip=ip) } } algorithm = ip.get_checksum_algorithm() generator = XMLGenerator() generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)
def run(self, filename=None, elementToAppendTo=None, spec={}, info={}, index=None): generator = XMLGenerator() generator.insert(filename, elementToAppendTo, spec, info=info, index=index)
def generate_content_metadata(ip): files_to_create = {} generate_premis = ip.profile_locked('preservation_metadata') if generate_premis: premis_profile_type = 'preservation_metadata' premis_profile_rel = ip.get_profile_rel(premis_profile_type) premis_profile_data = ip.get_profile_data(premis_profile_type) data = fill_specification_data(premis_profile_data, ip=ip) premis_path = parseContent(ip.get_premis_file_path(), data) full_premis_path = os.path.join(ip.object_path, premis_path) files_to_create[full_premis_path] = { 'spec': premis_profile_rel.profile.specification, 'data': data, } mets_path = ip.get_content_mets_file_path() full_mets_path = os.path.join(ip.object_path, mets_path) profile_type = ip.get_package_type_display().lower() profile_rel = ip.get_profile_rel(profile_type) profile_data = ip.get_profile_data(profile_type) files_to_create[full_mets_path] = { 'spec': profile_rel.profile.specification, 'data': fill_specification_data(profile_data, ip=ip), } parsed_files = profile_rel.data.parsed_files extra_paths_to_parse = profile_rel.data.extra_paths_to_parse algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm, parsed_files=parsed_files, extra_paths_to_parse=extra_paths_to_parse) ip.content_mets_path = mets_path ip.content_mets_create_date = timestamp_to_datetime( creation_date(full_mets_path)).isoformat() ip.content_mets_size = os.path.getsize(full_mets_path) ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[ algorithm.upper()] ip.content_mets_digest = calculate_checksum(full_mets_path, algorithm=algorithm) ip.save()
def run(self, info={}, filesToCreate={}, folderToParse=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ generator = XMLGenerator( filesToCreate, info, self.taskobj ) generator.generate( folderToParse=folderToParse, algorithm=algorithm, ) self.set_progress(100, total=100)
def GenerateXML(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None, parsed_files=None, algorithm='SHA-256'): """ Generates the XML using the specified data and folder, and adds the XML to the specified files """ if filesToCreate is None: filesToCreate = {} if extra_paths_to_parse is None: extra_paths_to_parse = [] if parsed_files is None: parsed_files = [] ip = InformationPackage.objects.filter(pk=self.ip).first() sa = None allow_unknown_file_types = False allow_encrypted_files = False if ip is not None: sa = ip.submission_agreement allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() for _, v in filesToCreate.items(): v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa) generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate( filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse, parsed_files=parsed_files, algorithm=algorithm, ) if filesToCreate is None: filesToCreate = {} msg = "Generated %s" % ", ".join(filesToCreate.keys()) self.create_success_event(msg)
def test_generate_empty_element_with_empty_children_with_allow_empty(self): specification = { '-name': 'foo', '-children': [ { '-name': 'bar', '-allowEmpty': True }, ] } generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertTrue(os.path.exists(self.fname))
def test_generate_empty_element_with_empty_children(self): specification = { '-name': 'foo', '-children': [ { '-name': 'bar', }, ] } with self.assertRaises(AssertionError): generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertFalse(os.path.exists(self.fname))
def test_generate_empty_element_with_children(self): specification = { '-name': 'foo', '-children': [ { '-name': 'bar', '#content': [{ 'text': 'baz' }] }, ] } generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertTrue(os.path.exists(self.fname))
def test_generate_element_with_children(self): specification = { '-name': 'foo', '-children': [{ '-name': 'bar', '#content': [{ 'text': 'baz' }] }] } generator = XMLGenerator({self.fname: specification}, {'bar': 'baz'}) generator.generate() tree = etree.parse(self.fname) self.assertEqual('<foo>\n <bar>baz</bar>\n</foo>', etree.tostring(tree.getroot()))
def test_generate_empty_element_with_empty_attribute(self): specification = { '-name': 'foo', '-attr': [ { '-name': 'bar', '#content': [{ 'text': '' }] }, ] } with self.assertRaises(AssertionError): generator = XMLGenerator({self.fname: specification}, {}) generator.generate() self.assertFalse(os.path.exists(self.fname))
def generate_premis(ip): premis_profile_rel = ip.get_profile_rel('preservation_metadata') premis_profile_data = ip.get_profile_data('preservation_metadata') data = fill_specification_data(premis_profile_data, ip=ip) premis_path = parseContent(ip.get_premis_file_path(), data) files_to_create = { premis_path: { 'spec': premis_profile_rel.profile.specification, 'data': data, } } algorithm = ip.get_checksum_algorithm() allow_unknown_file_types = ip.get_allow_unknown_file_types() allow_encrypted_files = ip.get_allow_encrypted_files() generator = XMLGenerator( allow_unknown_file_types=allow_unknown_file_types, allow_encrypted_files=allow_encrypted_files, ) generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)
def test_generate_multiple_element_same_name_same_level(self): specification = { '-name': "foo", '-allowEmpty': True, '-children': [{ '-name': 'bar', '-allowEmpty': True, }, { '-name': 'bar#1', '-allowEmpty': True, }] } generator = XMLGenerator({self.fname: specification}, {}) generator.generate() tree = etree.parse(self.fname) self.assertEqual(len(tree.findall('.//bar')), 2)
def test_generate_namespaces(self): nsmap = { 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', } specification = { "-name": "foo", '-nsmap': nsmap, "-attr": [ { "-name": "schemaLocation", "-namespace": "xsi", "-req": 1, "#content": [{ "var": "xsi:schemaLocation" }] }, ] } info = { "xsi:schemaLocation": "http://www.w3.org/1999/xlink schemas/xlink.xsd", } generator = XMLGenerator({self.fname: specification}, info) generator.generate() tree = etree.parse(self.fname) root = tree.getroot() xsi_ns = root.nsmap.get("xsi") self.assertEqual(xsi_ns, nsmap.get("xsi")) self.assertEqual(root.attrib.get("{%s}schemaLocation" % xsi_ns), info["xsi:schemaLocation"])
def append_events(ip, events, filename): if not filename: ip_obj = InformationPackage.objects.get(pk=ip) filename = os.path.join(ip_obj.object_path, ip_obj.get_events_file_path()) generator = XMLGenerator(filepath=filename) template = get_event_element_spec() if not events: events = EventIP.objects.filter(linkingObjectIdentifierValue=ip) id_types = {} for id_type in ['event', 'linking_agent', 'linking_object']: entity = '%s_identifier_type' % id_type id_types[id_type] = Parameter.objects.cached('entity', entity, 'value') target = generator.find_element('premis') for event in events.iterator(): ip = InformationPackage.objects.get( pk=event.linkingObjectIdentifierValue) objid = ip.object_identifier_value data = { "eventIdentifierType": id_types['event'], "eventIdentifierValue": str(event.eventIdentifierValue), "eventType": (str(event.eventType.code) if event.eventType.code is not None and event.eventType.code != '' else str(event.eventType.eventType)), "eventDateTime": str(event.eventDateTime), "eventDetail": event.eventType.eventDetail, "eventOutcome": str(event.eventOutcome), "eventOutcomeDetailNote": event.eventOutcomeDetailNote, "linkingAgentIdentifierType": id_types['linking_agent'], "linkingAgentIdentifierValue": event.linkingAgentIdentifierValue, "linkingAgentRole": event.linkingAgentRole, "linkingObjectIdentifierType": id_types['linking_object'], "linkingObjectIdentifierValue": objid, } generator.insert_from_specification(target, template, data) generator.write(filename)
def run(self, filename=None, elementToAppendTo=None, spec=None, info=None, index=None): if spec is None: spec = {} if info is None: info = {} generator = XMLGenerator(filepath=filename) target = generator.find_element(elementToAppendTo) generator.insert_from_specification(target, spec, data=info, index=index) generator.write(filename)
def create(self, template, destination, outcome, short_message, message, date=None, ip=None, task=None): logger.debug(u'Creating XML receipt: {}'.format(destination)) spec = json.loads(get_template(template).template.source) data = {} if ip is not None: data = fill_specification_data(data=data, ip=ip) data['outcome'] = outcome data['message'] = message data['date'] = date or timezone.now() if task is not None: validations = Validation.objects.filter( task=task).order_by('time_started') data['validations'] = ValidationSerializer(validations, many=True).data data[u'ärenden'] = [] if ip is not None: cts = ip.get_content_type_file() if cts is not None: tree = etree.parse(ip.open_file(cts)) for arende in tree.xpath( "//*[local-name()='ArkivobjektArende']"): arende_id = arende.xpath( "*[local-name()='ArkivobjektID']")[0].text a_data = {'ArkivobjektID': arende_id} try: a_data['id'] = Search(index=['component']).filter( 'bool', must=[ Q('term', type=u"Ärende"), Q('term', **{'reference_code.keyword': arende_id}), Q('term', ip=str(ip.pk)) ]).execute().hits[0].meta.id except IndexError: pass data[u'ärenden'].append(a_data) files_to_create = {destination: {'spec': spec, 'data': data}} XMLGenerator().generate(files_to_create) logger.info(u'XML receipt created: {}'.format(destination))