Exemple #1
0
    def test_update_sentences(self):
        xml_content = self.xml_content1
        sentences = [
            Sentence('7e985ffb692bb6f617f25619ecca39a9'),
            Sentence('7e985ffb692bb6f617f25619ecca3910')
        ]

        for s in sentences:
            s.pos_tags = 'nn nn'
            s.significance = 3
            s.sem_orient = 1

        xml = XMLContent(xml_content)

        print(xml.get_xml_document())

        for sentence in xml.sentences:
            print(sentence.md5sum, sentence.value, sentence.significance)

        xml.sentences = sentences

        xml_out = xml.get_xml_document()

        for sentence in xml.sentences:
            assert sentence.significance == 3
            assert sentence.sem_orient == 1

        assert 'CDATA' in xml_out
    def test_update_sentences(self):
        xml_content = self.xml_content1
        sentences = [Sentence('7e985ffb692bb6f617f25619ecca39a9'),
                     Sentence('7e985ffb692bb6f617f25619ecca3910')]

        for s in sentences:
            s.pos_tags = 'nn nn'
            s.significance = 3
            s.sem_orient = 1

        xml = XMLContent(xml_content)

        print(xml.get_xml_document())

        for sentence in xml.sentences:
            print(sentence.md5sum, sentence.value, sentence.significance)

        xml.sentences = sentences

        xml_out = xml.get_xml_document()

        for sentence in xml.sentences:
            assert sentence.significance == 3
            assert sentence.sem_orient == 1

        assert 'CDATA' in xml_out
    def from_api_dict(cls, api_dict):
        '''
        Parses a dict with a structure analoguous to the JSON format defined
        in the API specification.

        :param api_dict: The document to parse.
        :type api_dict: dict
        :returns: The parsed document as XMLContent object.
        :rtype: :py:class:`weblyzard_api.model.xml_content.XMLContent`
        '''
        cls._check_document_format(api_dict, strict=True)
        # This basically creates an empty XMLContent object
        xml_content = XMLContent(xml_content=None, remove_duplicates=True)
        # add all items in api_dict unless they need special handling
        xml_content.update_attributes({
            key: value
            for key, value in api_dict.items()
            if key not in ('sentences', 'annotations', 'language_id',
                           'features', 'relations', 'content')
        })
        # parse sentences
        sentences = [
            JSON10ParserSentence.from_api_dict(sentence_dict)
            for sentence_dict in api_dict.get('sentences', [])
        ]
        xml_content.sentences = sentences

        # parse annotations
        annotations = [
            JSON10ParserAnnotation.from_api_dict(annotation_dict)
            for annotation_dict in api_dict.get('annotations', [])
        ]
        xml_content.body_annotations = annotations

        # add relations and features
        xml_content.relations = api_dict.get('relations', {})
        xml_content.features = api_dict.get('features', {})

        # map the language_id to XMLContent.lang
        if 'language_id' in api_dict:
            xml_content.attributes['lang'] = api_dict['language_id']

        # removed this: title is already set via attributes
        if 'title' in api_dict:
            for sentence in sentences:
                if sentence.is_title and sentence.value != api_dict['title']:
                    raise MalformedJSONException(
                        'The sentence marked with "is_title": "True" must ' +
                        'match the "title" attribute.')
        else:
            for sentence in sentences:
                if sentence.is_title:
                    api_dict['title'] = sentence.value
        return xml_content
Exemple #4
0
    def from_api_dict(cls, api_dict):
        '''
        Parses a dict with a structure analoguous to the JSON format defined
        in the API specification.

        :param api_dict: The document to parse.
        :type api_dict: dict
        :returns: The parsed document as XMLContent object.
        :rtype: :py:class:`weblyzard_api.model.xml_content.XMLContent`
        '''
        cls._check_document_format(api_dict, strict=True)
        # This basically creates an empty XMLContent object
        xml_content = XMLContent(xml_content=None, remove_duplicates=True)
        # add all items in api_dict unless they need special handling
        xml_content.update_attributes({key: value for key, value in api_dict.iteritems() if
                                       key not in ('sentences', 'annotations',
                                                   'language_id', 'features',
                                                   'relations', 'content')})
        # parse sentences
        sentences = [JSON10ParserSentence.from_api_dict(sentence_dict) for
                     sentence_dict in api_dict.get('sentences', [])]
        xml_content.sentences = sentences

        # parse annotations
        annotations = [JSON10ParserAnnotation.from_api_dict(annotation_dict) for
                       annotation_dict in api_dict.get('annotations', [])]
        xml_content.body_annotations = annotations

        # add relations and features
        xml_content.relations = api_dict.get('relations', {})
        xml_content.features = api_dict.get('features', {})

        # map the language_id to XMLContent.lang
        if 'language_id' in api_dict:
            xml_content.attributes['lang'] = api_dict['language_id']

        # removed this: title is already set via attributes
        if 'title' in api_dict:
            for sentence in sentences:
                if sentence.is_title and sentence.value != api_dict['title']:
                    raise MalformedJSONException('The sentence marked with "is_title": "True" must ' +
                                                 'match the "title" attribute.')
        else:
            for sentence in sentences:
                if sentence.is_title:
                    api_dict['title'] = sentence.value
        return xml_content