Beispiel #1
0
 def read(self, text: str):
     w_parts = text.split(self.separator)
     if len(w_parts) != 3:
         raise ParsingException(
             "Malformed input: '{}'".format(text))  # todo dobja el a
         # mondatot, írja ki, de menjen tovább!
     return Token(w_parts[0], w_parts[1].replace('_', ' '), w_parts[2])
Beispiel #2
0
 def analysises(self, pos: int) -> set:
     fanals = self.anals[pos].keys()
     ret = set()
     for fa in fanals:
         ret.add(
             Token(self.words[pos], self.anal2lemma(fa), self.anal2tag(fa)))
     return ret
Beispiel #3
0
 def read(self, text: str):
     w_parts = text.split(self.separator)
     if len(w_parts) != 2:
         raise ParsingException("Malformed input: '{}'".format(text))
     return Token(w_parts[0], None, w_parts[1])
Beispiel #4
0
 def read(self, text: str):
     return Token(text)
Beispiel #5
0
 def add_sentence_markers(sentence: Sentence):
     sentence.insert(0, Token(ModelData.BOS_TOKEN, None, ModelData.BOS_TAG))
 def convert(self, word: str, vocab: BaseVocabulary) -> Token:
     anal = self.analyse(word)  # (str, int)
     tag = vocab.word(anal[1])  # str
     return Token(word, anal[0], tag)
 def token(word: str, lemma: str, tag: str) -> Token:
     return Token(word, lemma, tag)