def _sentence_tagged_words(sent_elem): res = [] for tok in sent_elem.findall('*//token'): text = text_type(tok.get('text')) parse = tok.find('*//l') tag = text_type(',').join(_grammemes(parse)) res.append((text, tag)) return res
def _sentence_parsed_words(sent_elem): res = [] for tok in sent_elem.findall('*//token'): text = text_type(tok.get('text')) parses = tok.findall('*//l') annotations = [ (text_type(p.get('t')), text_type(',').join(_grammemes(p))) for p in parses ] res.append((text, annotations)) return res
def _grammemes(l_element): return [text_type(grammeme.get('v')) for grammeme in l_element.getchildren()]
def _sentence_words(sent_elem): return [text_type(tok.get('text')) for tok in sent_elem.findall('*//token')]
def _sentence_source(sent_elem): return text_type(sent_elem.find('source').text)
def categories(self): return [text_type(tag.text) for tag in self.root.findall('tags//tag')]
def title(self): return text_type(self.root.get('name'))