def from_db_format(key: str, val: list) -> 'MarkovNeighbor': key = key text = val[NeighborIdx.TEXT.value] pos = Pos(val[NeighborIdx.POS.value]) compound = val[NeighborIdx.COMPOUND.value] values = val[NeighborIdx.VALUE_MATRIX.value] dist = val[NeighborIdx.DISTANCE_MATRIX.value] return MarkovNeighbor(key, text, pos, compound, values, dist)
def from_token(token: Token) -> 'MarkovWord': if CapitalizationMode.from_token( token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND: compound = True else: compound = False return MarkovWord(token.text, Pos.from_token(token), compound=compound, neighbors={})
def from_token(token: Token) -> 'MarkovNeighbor': key = token.text.lower() text = token.text if CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND: compound = True else: compound = False pos = Pos.from_token(token) values = [0, 0] dist = [0] * (MARKOV_WINDOW_SIZE * 2 + 1) return MarkovNeighbor(key, text, pos, compound, values, dist)
def from_db_format(row: dict) -> 'MarkovWord': word = MarkovWord(row[MarkovTrieDb.WORD_KEY][WordKey.TEXT], Pos(row[MarkovTrieDb.WORD_KEY][WordKey.POS]), row[MarkovTrieDb.WORD_KEY][WordKey.COMPOUND], row[MarkovTrieDb.NEIGHBORS_KEY]) return word
def from_token(token: Token) -> 'MarkovWord': if CapitalizationMode.from_token(token, CAPITALIZATION_COMPOUND_RULES) == CapitalizationMode.COMPOUND: compound = True else: compound = False return MarkovWord(token.text, Pos.from_token(token), compound=compound, neighbors={})
def analyze(token: Token, mode: CapitalizationMode): pos = Pos.from_token(token) mode = PoSCapitalizationMode(pos, mode) return mode.to_embedding()
def from_embedding(embedding: int): pos_part = int(embedding / len(CapitalizationMode)) mode_part = int(embedding % len(CapitalizationMode)) return PoSCapitalizationMode(Pos(pos_part), CapitalizationMode(mode_part))