Exemple #1
0
    def test_element_with_containsFiles_without_files(self):
        specification = {
            '-name':
            'foo',
            '-allowEmpty':
            True,
            '-children': [{
                '-name':
                'bar',
                '-containsFiles':
                True,
                '-attr': [{
                    '-name': 'name',
                    '#content': [{
                        'var': 'FName'
                    }]
                }],
            }],
        }

        generator = XMLGenerator({self.fname: specification})

        generator.generate()

        self.assertTrue(os.path.exists(self.fname))
Exemple #2
0
def generate_content_mets(ip):
    mets_path = ip.get_content_mets_file_path()
    full_mets_path = os.path.join(ip.object_path, mets_path)
    profile_type = ip.get_package_type_display().lower()
    profile_rel = ip.get_profile_rel(profile_type)
    profile_data = ip.get_profile_data(profile_type)
    files_to_create = {
        full_mets_path: {
            'spec': profile_rel.profile.specification,
            'data': fill_specification_data(profile_data, ip=ip)
        }
    }
    algorithm = ip.get_checksum_algorithm()

    allow_unknown_file_types = ip.get_allow_unknown_file_types()
    allow_encrypted_files = ip.get_allow_encrypted_files()
    generator = XMLGenerator(
        allow_unknown_file_types=allow_unknown_file_types,
        allow_encrypted_files=allow_encrypted_files,
    )
    generator.generate(files_to_create,
                       folderToParse=ip.object_path,
                       algorithm=algorithm)

    ip.content_mets_path = mets_path
    ip.content_mets_create_date = timestamp_to_datetime(
        creation_date(full_mets_path)).isoformat()
    ip.content_mets_size = os.path.getsize(full_mets_path)
    ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[
        algorithm.upper()]
    ip.content_mets_digest = calculate_checksum(full_mets_path,
                                                algorithm=algorithm)
    ip.save()
Exemple #3
0
def generate_package_mets(ip):
    sa = ip.submission_agreement
    if ip.package_type == InformationPackage.SIP:
        profile_type = 'submit_description'
    elif ip.package_type == InformationPackage.AIP:
        profile_type = 'aip_description'
    else:
        raise ValueError(
            'Cannot create package mets for IP of type {package_type}'.format(
                package_type=ip.package_type
            )
        )
    profile_rel = ip.get_profile_rel(profile_type)
    profile_data = ip.get_profile_data(profile_type)
    xmlpath = os.path.splitext(ip.object_path)[0] + '.xml'
    data = fill_specification_data(profile_data, ip=ip, sa=sa)
    data["_IP_CREATEDATE"] = timestamp_to_datetime(creation_date(ip.object_path)).isoformat()
    files_to_create = {
        xmlpath: {
            'spec': profile_rel.profile.specification,
            'data': data
        }
    }
    algorithm = ip.get_checksum_algorithm()

    generator = XMLGenerator()
    generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)

    ip.package_mets_path = normalize_path(xmlpath)
    ip.package_mets_create_date = timestamp_to_datetime(creation_date(xmlpath)).isoformat()
    ip.package_mets_size = os.path.getsize(xmlpath)
    ip.package_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[algorithm.upper()]
    ip.package_mets_digest = calculate_checksum(xmlpath, algorithm=algorithm)
    ip.save()
Exemple #4
0
    def run(self):
        ip = self.get_information_package()
        mets_path = ip.get_content_mets_file_path()
        profile_type = ip.get_package_type_display().lower()
        profile_rel = ip.get_profile_rel(profile_type)
        profile_data = ip.get_profile_data(profile_type)
        files_to_create = {
            mets_path: {
                'spec': profile_rel.profile.specification,
                'data': fill_specification_data(profile_data, ip=ip)
            }
        }
        algorithm = ip.get_checksum_algorithm()

        generator = XMLGenerator()
        generator.generate(files_to_create,
                           folderToParse=ip.object_path,
                           algorithm=algorithm)

        ip.content_mets_path = mets_path
        ip.content_mets_create_date = timestamp_to_datetime(
            creation_date(mets_path)).isoformat()
        ip.content_mets_size = os.path.getsize(mets_path)
        ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[
            algorithm.upper()]
        ip.content_mets_digest = calculate_checksum(mets_path,
                                                    algorithm=algorithm)
        ip.save()
Exemple #5
0
    def test_generate_empty_element_with_multiple_attribute(self):
        specification = {
            '-name':
            "foo",
            "-attr": [
                {
                    "-name": "attr1",
                    "#content": [{
                        "text": "bar"
                    }]
                },
                {
                    "-name": "attr2",
                    "#content": [{
                        "text": "baz"
                    }]
                },
            ]
        }

        generator = XMLGenerator({self.fname: specification})

        generator.generate()

        tree = etree.parse(self.fname)
        self.assertEqual('<foo attr1="bar" attr2="baz"/>',
                         etree.tostring(tree.getroot()))
Exemple #6
0
    def test_position_non_alphabetically(self):
        specification = {
            '-name':
            'foo',
            '-children': [
                {
                    '-name': 'b',
                    '-allowEmpty': True
                },
                {
                    '-name': 'a',
                    '-allowEmpty': True
                },
            ]
        }

        generator = XMLGenerator({self.fname: specification})

        generator.generate()

        tree = etree.parse(self.fname)
        root = tree.getroot()
        a = root.find('.//a')
        b = root.find('.//b')

        self.assertLess(root.index(b), root.index(a))
Exemple #7
0
    def run(self, filesToCreate=None, folderToParse=None, extra_paths_to_parse=None,
            parsed_files=None, algorithm='SHA-256'):
        """
        Generates the XML using the specified data and folder, and adds the XML
        to the specified files
        """

        if filesToCreate is None:
            filesToCreate = {}

        if extra_paths_to_parse is None:
            extra_paths_to_parse = []

        if parsed_files is None:
            parsed_files = []

        ip = InformationPackage.objects.filter(pk=self.ip).first()
        sa = None
        if ip is not None:
            sa = ip.submission_agreement

        for _, v in filesToCreate.items():
            v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa)

        generator = XMLGenerator()
        generator.generate(
            filesToCreate, folderToParse=folderToParse, extra_paths_to_parse=extra_paths_to_parse,
            parsed_files=parsed_files, algorithm=algorithm,
        )
Exemple #8
0
    def test_generate_element_with_content(self):
        specification = {'-name': "foo", "#content": [{"text": "bar"}]}

        generator = XMLGenerator({self.fname: specification})

        generator.generate()

        tree = etree.parse(self.fname)
        self.assertEqual("<foo>bar</foo>", etree.tostring(tree.getroot()))
Exemple #9
0
    def test_generate_empty_element(self):
        specification = {'-name': "foo"}

        with self.assertRaises(AssertionError):
            generator = XMLGenerator({self.fname: specification}, {})

            generator.generate()

        self.assertFalse(os.path.exists(self.fname))
Exemple #10
0
    def test_generate_element_with_content_using_var(self):
        specification = {'-name': "foo", "#content": [{"var": "bar"}]}

        generator = XMLGenerator({self.fname: specification}, {'bar': 'baz'})

        generator.generate()

        tree = etree.parse(self.fname)
        self.assertEqual('<foo>baz</foo>', etree.tostring(tree.getroot()))
Exemple #11
0
    def test_element_with_filtered_files(self):
        specification = {
            '-name':
            'foo',
            '-children': [{
                '-name':
                'bar',
                '-containsFiles':
                True,
                "-filters": {
                    "href": "record1/*"
                },
                '-attr': [{
                    '-name': 'name',
                    '#content': [{
                        'var': 'FName'
                    }]
                }],
                '-children': [{
                    '-name':
                    'baz',
                    '-attr': [{
                        '-name': 'href',
                        '#content': [{
                            'var': 'href'
                        }]
                    }]
                }]
            }],
        }

        generator = XMLGenerator({self.fname: specification})

        generator.generate(folderToParse=self.datadir)

        tree = etree.parse(self.fname)

        num_of_files = 0

        for root, dirs, files in os.walk(self.datadir):
            for f in files:
                filepath = os.path.join(root, f)
                relpath = os.path.relpath(filepath, self.datadir)

                filepath_element = tree.find(
                    ".//bar[@name='%s']/baz[@href='%s']" % (f, relpath))

                if relpath.startswith('record1'):
                    self.assertIsNotNone(filepath_element)
                    num_of_files += 1
                else:
                    self.assertIsNone(filepath_element)

        file_elements = tree.findall('.//bar')
        self.assertEqual(len(file_elements), num_of_files)
Exemple #12
0
def generate_events_xml(ip):
    xml_path = os.path.join(ip.object_path, ip.get_events_file_path())
    files_to_create = {
        xml_path: {
            'spec': get_event_spec(),
            'data': fill_specification_data(ip=ip)
        }
    }
    algorithm = ip.get_checksum_algorithm()
    generator = XMLGenerator()
    generator.generate(files_to_create, algorithm=algorithm)
Exemple #13
0
    def test_generate_empty_element_with_allowEmpty(self):
        specification = {'-name': "foo", "-allowEmpty": 1}

        generator = XMLGenerator({self.fname: specification}, {})

        generator.generate()

        self.assertTrue(os.path.exists(self.fname))

        tree = etree.parse(self.fname)
        self.assertEqual(etree.tostring(tree.getroot()), "<foo/>")
Exemple #14
0
 def run(self):
     ip = self.get_information_package()
     xml_path = os.path.join(ip.object_path, ip.get_events_file_path())
     files_to_create = {
         xml_path: {
             'spec': get_event_spec(),
             'data': fill_specification_data(ip=ip)
         }
     }
     algorithm = ip.get_checksum_algorithm()
     generator = XMLGenerator()
     generator.generate(files_to_create, algorithm=algorithm)
Exemple #15
0
def generate_premis(ip):
    premis_path = ip.get_premis_file_path()
    premis_profile_rel = ip.get_profile_rel('preservation_metadata')
    premis_profile_data = ip.get_profile_data('preservation_metadata')
    files_to_create = {
        premis_path: {
            'spec': premis_profile_rel.profile.specification,
            'data': fill_specification_data(premis_profile_data, ip=ip)
        }
    }
    algorithm = ip.get_checksum_algorithm()
    generator = XMLGenerator()
    generator.generate(files_to_create, folderToParse=ip.object_path, algorithm=algorithm)
Exemple #16
0
    def run(self,
            filename=None,
            elementToAppendTo=None,
            spec={},
            info={},
            index=None):
        generator = XMLGenerator()

        generator.insert(filename,
                         elementToAppendTo,
                         spec,
                         info=info,
                         index=index)
Exemple #17
0
def generate_content_metadata(ip):
    files_to_create = {}

    generate_premis = ip.profile_locked('preservation_metadata')
    if generate_premis:
        premis_profile_type = 'preservation_metadata'
        premis_profile_rel = ip.get_profile_rel(premis_profile_type)
        premis_profile_data = ip.get_profile_data(premis_profile_type)
        data = fill_specification_data(premis_profile_data, ip=ip)
        premis_path = parseContent(ip.get_premis_file_path(), data)
        full_premis_path = os.path.join(ip.object_path, premis_path)
        files_to_create[full_premis_path] = {
            'spec': premis_profile_rel.profile.specification,
            'data': data,
        }

    mets_path = ip.get_content_mets_file_path()
    full_mets_path = os.path.join(ip.object_path, mets_path)
    profile_type = ip.get_package_type_display().lower()
    profile_rel = ip.get_profile_rel(profile_type)
    profile_data = ip.get_profile_data(profile_type)
    files_to_create[full_mets_path] = {
        'spec': profile_rel.profile.specification,
        'data': fill_specification_data(profile_data, ip=ip),
    }

    parsed_files = profile_rel.data.parsed_files
    extra_paths_to_parse = profile_rel.data.extra_paths_to_parse
    algorithm = ip.get_checksum_algorithm()
    allow_unknown_file_types = ip.get_allow_unknown_file_types()
    allow_encrypted_files = ip.get_allow_encrypted_files()
    generator = XMLGenerator(
        allow_unknown_file_types=allow_unknown_file_types,
        allow_encrypted_files=allow_encrypted_files,
    )
    generator.generate(files_to_create,
                       folderToParse=ip.object_path,
                       algorithm=algorithm,
                       parsed_files=parsed_files,
                       extra_paths_to_parse=extra_paths_to_parse)

    ip.content_mets_path = mets_path
    ip.content_mets_create_date = timestamp_to_datetime(
        creation_date(full_mets_path)).isoformat()
    ip.content_mets_size = os.path.getsize(full_mets_path)
    ip.content_mets_digest_algorithm = MESSAGE_DIGEST_ALGORITHM_CHOICES_DICT[
        algorithm.upper()]
    ip.content_mets_digest = calculate_checksum(full_mets_path,
                                                algorithm=algorithm)
    ip.save()
Exemple #18
0
    def run(self, info={}, filesToCreate={}, folderToParse=None, algorithm='SHA-256'):
        """
        Generates the XML using the specified data and folder, and adds the XML
        to the specified files
        """

        generator = XMLGenerator(
            filesToCreate, info, self.taskobj
        )

        generator.generate(
            folderToParse=folderToParse, algorithm=algorithm,
        )

        self.set_progress(100, total=100)
Exemple #19
0
def GenerateXML(self,
                filesToCreate=None,
                folderToParse=None,
                extra_paths_to_parse=None,
                parsed_files=None,
                algorithm='SHA-256'):
    """
    Generates the XML using the specified data and folder, and adds the XML
    to the specified files
    """

    if filesToCreate is None:
        filesToCreate = {}

    if extra_paths_to_parse is None:
        extra_paths_to_parse = []

    if parsed_files is None:
        parsed_files = []

    ip = InformationPackage.objects.filter(pk=self.ip).first()
    sa = None
    allow_unknown_file_types = False
    allow_encrypted_files = False
    if ip is not None:
        sa = ip.submission_agreement
        allow_unknown_file_types = ip.get_allow_unknown_file_types()
        allow_encrypted_files = ip.get_allow_encrypted_files()

    for _, v in filesToCreate.items():
        v['data'] = fill_specification_data(v['data'], ip=ip, sa=sa)

    generator = XMLGenerator(
        allow_unknown_file_types=allow_unknown_file_types,
        allow_encrypted_files=allow_encrypted_files,
    )
    generator.generate(
        filesToCreate,
        folderToParse=folderToParse,
        extra_paths_to_parse=extra_paths_to_parse,
        parsed_files=parsed_files,
        algorithm=algorithm,
    )

    if filesToCreate is None:
        filesToCreate = {}
    msg = "Generated %s" % ", ".join(filesToCreate.keys())
    self.create_success_event(msg)
Exemple #20
0
    def test_generate_empty_element_with_empty_children_with_allow_empty(self):
        specification = {
            '-name': 'foo',
            '-children': [
                {
                    '-name': 'bar',
                    '-allowEmpty': True
                },
            ]
        }

        generator = XMLGenerator({self.fname: specification}, {})

        generator.generate()

        self.assertTrue(os.path.exists(self.fname))
Exemple #21
0
    def test_generate_empty_element_with_empty_children(self):
        specification = {
            '-name': 'foo',
            '-children': [
                {
                    '-name': 'bar',
                },
            ]
        }

        with self.assertRaises(AssertionError):
            generator = XMLGenerator({self.fname: specification}, {})

            generator.generate()

        self.assertFalse(os.path.exists(self.fname))
Exemple #22
0
    def test_generate_empty_element_with_children(self):
        specification = {
            '-name': 'foo',
            '-children': [
                {
                    '-name': 'bar',
                    '#content': [{
                        'text': 'baz'
                    }]
                },
            ]
        }

        generator = XMLGenerator({self.fname: specification}, {})

        generator.generate()

        self.assertTrue(os.path.exists(self.fname))
Exemple #23
0
    def test_generate_element_with_children(self):
        specification = {
            '-name': 'foo',
            '-children': [{
                '-name': 'bar',
                '#content': [{
                    'text': 'baz'
                }]
            }]
        }

        generator = XMLGenerator({self.fname: specification}, {'bar': 'baz'})

        generator.generate()

        tree = etree.parse(self.fname)

        self.assertEqual('<foo>\n  <bar>baz</bar>\n</foo>',
                         etree.tostring(tree.getroot()))
Exemple #24
0
    def test_generate_empty_element_with_empty_attribute(self):
        specification = {
            '-name': 'foo',
            '-attr': [
                {
                    '-name': 'bar',
                    '#content': [{
                        'text': ''
                    }]
                },
            ]
        }

        with self.assertRaises(AssertionError):
            generator = XMLGenerator({self.fname: specification}, {})

            generator.generate()

        self.assertFalse(os.path.exists(self.fname))
Exemple #25
0
def generate_premis(ip):
    premis_profile_rel = ip.get_profile_rel('preservation_metadata')
    premis_profile_data = ip.get_profile_data('preservation_metadata')
    data = fill_specification_data(premis_profile_data, ip=ip)
    premis_path = parseContent(ip.get_premis_file_path(), data)
    files_to_create = {
        premis_path: {
            'spec': premis_profile_rel.profile.specification,
            'data': data,
        }
    }
    algorithm = ip.get_checksum_algorithm()
    allow_unknown_file_types = ip.get_allow_unknown_file_types()
    allow_encrypted_files = ip.get_allow_encrypted_files()
    generator = XMLGenerator(
        allow_unknown_file_types=allow_unknown_file_types,
        allow_encrypted_files=allow_encrypted_files,
    )
    generator.generate(files_to_create,
                       folderToParse=ip.object_path,
                       algorithm=algorithm)
Exemple #26
0
    def test_generate_multiple_element_same_name_same_level(self):
        specification = {
            '-name':
            "foo",
            '-allowEmpty':
            True,
            '-children': [{
                '-name': 'bar',
                '-allowEmpty': True,
            }, {
                '-name': 'bar#1',
                '-allowEmpty': True,
            }]
        }

        generator = XMLGenerator({self.fname: specification}, {})

        generator.generate()

        tree = etree.parse(self.fname)
        self.assertEqual(len(tree.findall('.//bar')), 2)
Exemple #27
0
    def test_generate_namespaces(self):
        nsmap = {
            'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
        }

        specification = {
            "-name":
            "foo",
            '-nsmap':
            nsmap,
            "-attr": [
                {
                    "-name": "schemaLocation",
                    "-namespace": "xsi",
                    "-req": 1,
                    "#content": [{
                        "var": "xsi:schemaLocation"
                    }]
                },
            ]
        }

        info = {
            "xsi:schemaLocation":
            "http://www.w3.org/1999/xlink schemas/xlink.xsd",
        }

        generator = XMLGenerator({self.fname: specification}, info)

        generator.generate()

        tree = etree.parse(self.fname)
        root = tree.getroot()

        xsi_ns = root.nsmap.get("xsi")

        self.assertEqual(xsi_ns, nsmap.get("xsi"))
        self.assertEqual(root.attrib.get("{%s}schemaLocation" % xsi_ns),
                         info["xsi:schemaLocation"])
Exemple #28
0
def append_events(ip, events, filename):
    if not filename:
        ip_obj = InformationPackage.objects.get(pk=ip)
        filename = os.path.join(ip_obj.object_path,
                                ip_obj.get_events_file_path())
    generator = XMLGenerator(filepath=filename)
    template = get_event_element_spec()

    if not events:
        events = EventIP.objects.filter(linkingObjectIdentifierValue=ip)
    id_types = {}

    for id_type in ['event', 'linking_agent', 'linking_object']:
        entity = '%s_identifier_type' % id_type
        id_types[id_type] = Parameter.objects.cached('entity', entity, 'value')

    target = generator.find_element('premis')
    for event in events.iterator():
        ip = InformationPackage.objects.get(
            pk=event.linkingObjectIdentifierValue)
        objid = ip.object_identifier_value

        data = {
            "eventIdentifierType":
            id_types['event'],
            "eventIdentifierValue":
            str(event.eventIdentifierValue),
            "eventType":
            (str(event.eventType.code)
             if event.eventType.code is not None and event.eventType.code != ''
             else str(event.eventType.eventType)),
            "eventDateTime":
            str(event.eventDateTime),
            "eventDetail":
            event.eventType.eventDetail,
            "eventOutcome":
            str(event.eventOutcome),
            "eventOutcomeDetailNote":
            event.eventOutcomeDetailNote,
            "linkingAgentIdentifierType":
            id_types['linking_agent'],
            "linkingAgentIdentifierValue":
            event.linkingAgentIdentifierValue,
            "linkingAgentRole":
            event.linkingAgentRole,
            "linkingObjectIdentifierType":
            id_types['linking_object'],
            "linkingObjectIdentifierValue":
            objid,
        }

        generator.insert_from_specification(target, template, data)

    generator.write(filename)
Exemple #29
0
    def run(self, filename=None, elementToAppendTo=None, spec=None, info=None, index=None):
        if spec is None:
            spec = {}

        if info is None:
            info = {}

        generator = XMLGenerator(filepath=filename)
        target = generator.find_element(elementToAppendTo)
        generator.insert_from_specification(target, spec, data=info, index=index)
        generator.write(filename)
Exemple #30
0
    def create(self,
               template,
               destination,
               outcome,
               short_message,
               message,
               date=None,
               ip=None,
               task=None):
        logger.debug(u'Creating XML receipt: {}'.format(destination))
        spec = json.loads(get_template(template).template.source)

        data = {}
        if ip is not None:
            data = fill_specification_data(data=data, ip=ip)
        data['outcome'] = outcome
        data['message'] = message
        data['date'] = date or timezone.now()
        if task is not None:
            validations = Validation.objects.filter(
                task=task).order_by('time_started')
            data['validations'] = ValidationSerializer(validations,
                                                       many=True).data

        data[u'ärenden'] = []
        if ip is not None:
            cts = ip.get_content_type_file()
            if cts is not None:
                tree = etree.parse(ip.open_file(cts))
                for arende in tree.xpath(
                        "//*[local-name()='ArkivobjektArende']"):
                    arende_id = arende.xpath(
                        "*[local-name()='ArkivobjektID']")[0].text
                    a_data = {'ArkivobjektID': arende_id}

                    try:
                        a_data['id'] = Search(index=['component']).filter(
                            'bool',
                            must=[
                                Q('term', type=u"Ärende"),
                                Q('term',
                                  **{'reference_code.keyword': arende_id}),
                                Q('term', ip=str(ip.pk))
                            ]).execute().hits[0].meta.id
                    except IndexError:
                        pass
                    data[u'ärenden'].append(a_data)

        files_to_create = {destination: {'spec': spec, 'data': data}}
        XMLGenerator().generate(files_to_create)
        logger.info(u'XML receipt created: {}'.format(destination))