Example #1
0
def add_tokens_to_vocabulary(vocab: Vocabulary, tokens=None):
    if not tokens:
        return vocab
    token_count = get_token_count_dict(tokens)
    next_token_id = get_next_token_id(vocab)
    words = db.session.query(Word).filter_by(vocabulary_id=vocab.id)
    # increase count for already existing
    for word in words:
        if word.token in token_count:
            word.count += token_count[word.token]
            token_count[word.token] = 0
    # add new for not existing
    for token, count in token_count.items():
        if count == 0:
            continue
        try:
            word = Word(next_token_id, token, count)
        except ValueError:
            continue
        word.vocabulary = vocab
        next_token_id += 1
    return vocab