def test_update_sentences(self): xml_content = self.xml_content1 sentences = [ Sentence('7e985ffb692bb6f617f25619ecca39a9'), Sentence('7e985ffb692bb6f617f25619ecca3910') ] for s in sentences: s.pos_tags = 'nn nn' s.significance = 3 s.sem_orient = 1 xml = XMLContent(xml_content) print(xml.get_xml_document()) for sentence in xml.sentences: print(sentence.md5sum, sentence.value, sentence.significance) xml.sentences = sentences xml_out = xml.get_xml_document() for sentence in xml.sentences: assert sentence.significance == 3 assert sentence.sem_orient == 1 assert 'CDATA' in xml_out
def test_update_sentences(self): xml_content = self.xml_content1 sentences = [Sentence('7e985ffb692bb6f617f25619ecca39a9'), Sentence('7e985ffb692bb6f617f25619ecca3910')] for s in sentences: s.pos_tags = 'nn nn' s.significance = 3 s.sem_orient = 1 xml = XMLContent(xml_content) print(xml.get_xml_document()) for sentence in xml.sentences: print(sentence.md5sum, sentence.value, sentence.significance) xml.sentences = sentences xml_out = xml.get_xml_document() for sentence in xml.sentences: assert sentence.significance == 3 assert sentence.sem_orient == 1 assert 'CDATA' in xml_out
def from_api_dict(cls, api_dict): ''' Parses a dict with a structure analoguous to the JSON format defined in the API specification. :param api_dict: The document to parse. :type api_dict: dict :returns: The parsed document as XMLContent object. :rtype: :py:class:`weblyzard_api.model.xml_content.XMLContent` ''' cls._check_document_format(api_dict, strict=True) # This basically creates an empty XMLContent object xml_content = XMLContent(xml_content=None, remove_duplicates=True) # add all items in api_dict unless they need special handling xml_content.update_attributes({ key: value for key, value in api_dict.items() if key not in ('sentences', 'annotations', 'language_id', 'features', 'relations', 'content') }) # parse sentences sentences = [ JSON10ParserSentence.from_api_dict(sentence_dict) for sentence_dict in api_dict.get('sentences', []) ] xml_content.sentences = sentences # parse annotations annotations = [ JSON10ParserAnnotation.from_api_dict(annotation_dict) for annotation_dict in api_dict.get('annotations', []) ] xml_content.body_annotations = annotations # add relations and features xml_content.relations = api_dict.get('relations', {}) xml_content.features = api_dict.get('features', {}) # map the language_id to XMLContent.lang if 'language_id' in api_dict: xml_content.attributes['lang'] = api_dict['language_id'] # removed this: title is already set via attributes if 'title' in api_dict: for sentence in sentences: if sentence.is_title and sentence.value != api_dict['title']: raise MalformedJSONException( 'The sentence marked with "is_title": "True" must ' + 'match the "title" attribute.') else: for sentence in sentences: if sentence.is_title: api_dict['title'] = sentence.value return xml_content
def from_api_dict(cls, api_dict): ''' Parses a dict with a structure analoguous to the JSON format defined in the API specification. :param api_dict: The document to parse. :type api_dict: dict :returns: The parsed document as XMLContent object. :rtype: :py:class:`weblyzard_api.model.xml_content.XMLContent` ''' cls._check_document_format(api_dict, strict=True) # This basically creates an empty XMLContent object xml_content = XMLContent(xml_content=None, remove_duplicates=True) # add all items in api_dict unless they need special handling xml_content.update_attributes({key: value for key, value in api_dict.iteritems() if key not in ('sentences', 'annotations', 'language_id', 'features', 'relations', 'content')}) # parse sentences sentences = [JSON10ParserSentence.from_api_dict(sentence_dict) for sentence_dict in api_dict.get('sentences', [])] xml_content.sentences = sentences # parse annotations annotations = [JSON10ParserAnnotation.from_api_dict(annotation_dict) for annotation_dict in api_dict.get('annotations', [])] xml_content.body_annotations = annotations # add relations and features xml_content.relations = api_dict.get('relations', {}) xml_content.features = api_dict.get('features', {}) # map the language_id to XMLContent.lang if 'language_id' in api_dict: xml_content.attributes['lang'] = api_dict['language_id'] # removed this: title is already set via attributes if 'title' in api_dict: for sentence in sentences: if sentence.is_title and sentence.value != api_dict['title']: raise MalformedJSONException('The sentence marked with "is_title": "True" must ' + 'match the "title" attribute.') else: for sentence in sentences: if sentence.is_title: api_dict['title'] = sentence.value return xml_content