def _paragraph2XML(self, paragraph, document_order=""): xml_par = XMLTree.Element('paragraph') xml_par.attrib = {'documentOrder': not_none_to_str(document_order), 'ID': not_none_to_str(paragraph.id)} xml_sents = XMLTree.Element('sentences') for sent in paragraph.sentences: xml_sents.append(self._sentence2XML(sent)) xml_par.append(xml_sents) return xml_par
def _anno2XML(self, anno): xml_anno = XMLTree.Element('label') xml_anno.attrib = {'start': not_none_to_str(anno.start), 'end': not_none_to_str(anno.end), 'itype': not_none_to_str(anno.itype), 'name': not_none_to_str(anno.name)} for key, val in xml_anno.attrib.items(): if val is None: del (xml_anno.attrib[key]) return xml_anno
def _anno_set2XML(self, annoset): xml_anno_set = XMLTree.Element('annotationSet') xml_anno_set.attrib = {'ID': not_none_to_str(annoset.id), 'status': not_none_to_str(annoset.status), 'frameName': not_none_to_str(annoset.frameName), 'luName': not_none_to_str(annoset.luName), } xml_anno_set.attrib = {k: value for (k, value) in xml_anno_set.attrib.items() if value is not None} for layer in annoset: xml_anno_set.append(self._layer2XML(layer)) return xml_anno_set
def _doc2XML(self, doc): xml_corpus = XMLTree.Element('corpus') xml_corpus.attrib = {'description': '', 'name': doc.corpus, 'ID': doc.corpusID} xml_docs = XMLTree.Element('documents') xml_doc = XMLTree.Element('document') xml_doc.attrib = {'description': not_none_to_str(doc.desc), 'name': not_none_to_str(doc.name), 'ID': not_none_to_str(doc.id)} xml_pars = XMLTree.Element('paragraphs') for i, paragraph in enumerate(doc.elements): xml_pars.append(self._paragraph2XML(paragraph, document_order=str(i + 1))) xml_doc.append(xml_pars) xml_docs.append(xml_doc) xml_corpus.append(xml_docs) return xml_corpus
def _sentence2XML(self, sent, **kwargs): xml_sent = XMLTree.Element('sentence') xml_sent.attrib = {'ID': not_none_to_str(sent.id)} xml_sent.attrib.update(kwargs) xml_text = XMLTree.Element('text') xml_text.text = sent.text.decode(self.encoding) xml_sent.append(xml_text) for annoSet in sent.annotation_sets: xml_sent.append(self._anno_set2XML(annoSet)) return xml_sent
def _sentence2XML(self, sent, **kwargs): xml_sent = XMLTree.Element('sentence') xml_sent.attrib = {'ID': sent.id} xml_sent.attrib.update(kwargs) xml_text = XMLTree.Element('text') xml_text.text = sent.text.decode(self.encoding) xml_pos = XMLTree.Element('parts-of-speech') for word in sent.parts_of_speech: attrib = {'start': not_none_to_str(word[0]), 'end': not_none_to_str(word[1])} xml_pos.append(XMLTree.Element('pos', attrib=attrib)) xml_anno_sets = XMLTree.Element('annotationSets') for annoSet in sent.annotation_sets: xml_anno_sets.append(self._anno_set2XML(annoSet)) xml_sent.append(xml_text) xml_sent.append(xml_pos) xml_sent.append(xml_anno_sets) return xml_sent