def read(self, text: str): w_parts = text.split(self.separator) if len(w_parts) != 3: raise ParsingException( "Malformed input: '{}'".format(text)) # todo dobja el a # mondatot, írja ki, de menjen tovább! return Token(w_parts[0], w_parts[1].replace('_', ' '), w_parts[2])
def analysises(self, pos: int) -> set: fanals = self.anals[pos].keys() ret = set() for fa in fanals: ret.add( Token(self.words[pos], self.anal2lemma(fa), self.anal2tag(fa))) return ret
def read(self, text: str): w_parts = text.split(self.separator) if len(w_parts) != 2: raise ParsingException("Malformed input: '{}'".format(text)) return Token(w_parts[0], None, w_parts[1])
def read(self, text: str): return Token(text)
def add_sentence_markers(sentence: Sentence): sentence.insert(0, Token(ModelData.BOS_TOKEN, None, ModelData.BOS_TAG))
def convert(self, word: str, vocab: BaseVocabulary) -> Token: anal = self.analyse(word) # (str, int) tag = vocab.word(anal[1]) # str return Token(word, anal[0], tag)
def token(word: str, lemma: str, tag: str) -> Token: return Token(word, lemma, tag)