Ejemplo n.º 1
0
    def test_raw_to_dict_throws_exception_when_invalid_xml(self):
        # Arrange
        raw_xml = "<root><test>Hello</test?</root>"

        # Act # Assert
        with self.assertRaises(exceptions.XMLError):
            raw_xml_to_dict(raw_xml)
    def test_raw_to_dict_with_bad_type_raises_error(self):
        # Arrange
        raw_xml = "<root><test>Hello</test?</root>"

        # Act # Assert
        with self.assertRaises(exceptions.CoreError):
            raw_xml_to_dict(raw_xml, postprocessor=1)
def transform_dict_identifier_to_oai_identifier(data):
    """Transforms a dict to an OaiIdentify object.

    Args:
        data: Data to transform.

    Returns:
        OaiIdentify instance.

    """
    return OaiIdentify(
        admin_email=data["adminEmail"],
        base_url=data["baseURL"],
        repository_name=data["repositoryName"],
        deleted_record=data["deletedRecord"],
        delimiter=data["delimiter"],
        description=data["description"],
        earliest_datestamp=data["earliestDatestamp"],
        granularity=data["granularity"],
        oai_identifier=data["oai_identifier"],
        protocol_version=data["protocolVersion"],
        repository_identifier=data["repositoryIdentifier"],
        sample_identifier=data["sampleIdentifier"],
        scheme=data["scheme"],
        raw=raw_xml_to_dict(data["raw"]),
    )
Ejemplo n.º 4
0
    def test_raw_to_dict_valid(self):
        # Arrange
        raw_xml = '<root><test>Hello</test></root>'
        expected_dict = OrderedDict([(u'root', OrderedDict([(u'test', u'Hello')]))])

        # Act
        xml_dict = raw_xml_to_dict(raw_xml)

        # Assert
        self.assertEquals(expected_dict, xml_dict)
Ejemplo n.º 5
0
    def test_raw_to_dict_valid(self):
        # Arrange
        raw_xml = "<root><test>Hello</test></root>"
        expected_dict = OrderedDict([("root", OrderedDict([("test", "Hello")]))
                                     ])

        # Act
        xml_dict = raw_xml_to_dict(raw_xml)

        # Assert
        self.assertEquals(expected_dict, xml_dict)
    def test_raw_to_dict_with_numeric_post_processor(self):
        # Arrange
        raw_xml = "<root><test>Hello</test><test>1</test></root>"
        expected_dict = OrderedDict([("root",
                                      OrderedDict([("test", ["Hello", 1])]))])

        # Act
        xml_dict = raw_xml_to_dict(raw_xml, postprocessor="NUMERIC")

        # Assert
        self.assertEquals(expected_dict, xml_dict)
Ejemplo n.º 7
0
    def insert_oai_records(self):
        oai_records = OaiPmhMock.mock_oai_record(version=1)
        saved_oai_records = []
        for oai_record in oai_records:
            oai_record.title = oai_record.identifier
            oai_record.registry = self.registry
            oai_record.harvester_metadata_format = self.oai_metadata_formats[0]
            oai_record.dict_content = xml_utils.raw_xml_to_dict(
                oai_record.xml_content, xml_utils.post_processor)
            saved_oai_records.append(oai_record.save())

        return saved_oai_records
Ejemplo n.º 8
0
def get_role(curate_data_structure):
    """Get the role saved in the curate_data_structure's form string

    Args:
        curate_data_structure:

    Returns:

    """
    return role_extraction(
        xml_utils.raw_xml_to_dict(curate_data_structure.form_string,
                                  xml_utils.post_processor))
    def test_raw_to_dict_with_callable_post_processor(self):
        def test_processor(path, key, value):
            return key, "test"

        # Arrange
        raw_xml = "<root><test>Hello</test><test>1</test></root>"
        expected_dict = OrderedDict([("root", "test")])

        # Act
        xml_dict = raw_xml_to_dict(raw_xml, postprocessor=test_processor)

        # Assert
        self.assertEquals(expected_dict, xml_dict)
Ejemplo n.º 10
0
def transform_dict_set_to_oai_harvester_set(data):
    """ Transforms a dict to a list of OaiHarvesterSet object.

    Args:
        data: Data to transform.

    Returns:
        List of OaiHarvesterSet instances.

    """
    return [
        OaiHarvesterSet(set_name=obj['setName'],
                        set_spec=obj['setSpec'],
                        raw=raw_xml_to_dict(obj['raw'])) for obj in data
    ]
Ejemplo n.º 11
0
    def convert_to_dict(self):
        """ Convert the xml contained in xml_content into a dictionary.

        Returns:

        """
        # transform xml content into a dictionary
        dict_content = xml_utils.raw_xml_to_dict(self.xml_content, xml_utils.post_processor)
        # if limit on element occurrences is set
        if SEARCHABLE_DATA_OCCURRENCES_LIMIT is not None:
            # Remove lists which size exceed the limit size
            xml_utils.remove_lists_from_xml_dict(dict_content,
                                                 SEARCHABLE_DATA_OCCURRENCES_LIMIT)
        # store dictionary
        self.dict_content = dict_content
Ejemplo n.º 12
0
def upsert(oai_identify):
    """Create or update an OaiIdentify.
    Args:
        oai_identify: OaiIdentify to create or update.

    Returns:
        OaiIdentify instance.

    """
    if oai_identify.raw and isinstance(oai_identify.raw, str):
        try:
            oai_identify.raw = raw_xml_to_dict(oai_identify.raw)
        except exceptions.XMLError:
            oai_identify.raw = {}

    return oai_identify.save()
Ejemplo n.º 13
0
def transform_dict_metadata_format_to_oai_harvester_metadata_format(data):
    """ Transforms a dict to a list of OaiHarvesterMetadataFormat object.

    Args:
        data: Data to transform.

    Returns:
        List of OaiHarvesterMetadataFormat instances.

    """
    return [
        OaiHarvesterMetadataFormat(metadata_prefix=obj['metadataPrefix'],
                                   metadata_namespace=obj['metadataNamespace'],
                                   schema=obj['schema'],
                                   raw=raw_xml_to_dict(obj['raw']))
        for obj in data
    ]
Ejemplo n.º 14
0
    def transform(self, xml_inputs, session_key):
        """Transforms the input to a json content

        Args:
            xml_inputs:
            session_key:

        Returns:

        """
        results_transform = []
        # loops on all xml input
        for xml_item in xml_inputs:
            # generate the title document with the sha
            document_name_with_sha = AbstractExporter.get_title_document(
                xml_item["title"], xml_item["xml_content"])
            transform_result = TransformResult()
            # set the document name to the collection
            transform_result.source_document_name = document_name_with_sha
            # for an JSON transformation there is a list of one element
            transform_result_content = TransformResultContent()
            transform_result_content.file_name = document_name_with_sha
            # Transform to JSON
            transformed_content = xml_utils.raw_xml_to_dict(
                xml_item["xml_content"],
                postprocessor=XML_POST_PROCESSOR,
                force_list=XML_FORCE_LIST,
            )
            # sets the content and extension
            try:
                transform_result_content.content_converted = json.dumps(
                    transformed_content, indent=4, ensure_ascii=False)
            except:
                transform_result_content.content_converted = json.dumps(
                    transformed_content, indent=4)

            transform_result_content.content_extension = self.extension
            # add the content to the list of content
            transform_result.transform_result_content.append(
                transform_result_content)
            # add the result to the list of result
            results_transform.append(transform_result)
        return results_transform
Ejemplo n.º 15
0
def transform_dict_identifier_to_oai_identifier(data):
    """ Transforms a dict to an OaiIdentify object.

    Args:
        data: Data to transform.

    Returns:
        OaiIdentify instance.

    """
    return OaiIdentify(admin_email=data['adminEmail'],
                       base_url=data['baseURL'],
                       repository_name=data['repositoryName'],
                       deleted_record=data['deletedRecord'],
                       delimiter=data['delimiter'],
                       description=data['description'],
                       earliest_datestamp=data['earliestDatestamp'],
                       granularity=data['granularity'],
                       oai_identifier=data['oai_identifier'],
                       protocol_version=data['protocolVersion'],
                       repository_identifier=data['repositoryIdentifier'],
                       sample_identifier=data['sampleIdentifier'],
                       scheme=data['scheme'],
                       raw=raw_xml_to_dict(data['raw']))