def _insert(iterable):
     words = []
     for w in iterable:
         w = Vocabulary.normalize(w)
         if valid_words and w not in valid_words:
             continue
         words.append(w)
     word_count.update(words)
def index_embedding_words(embedding_file):
    """Put all the words in embedding_file into a set."""
    words = set()
    with open(embedding_file) as f:
        for line in tqdm(f, total=count_file_lines(embedding_file)):
            w = Vocabulary.normalize(line.rstrip().split(' ')[0])
            words.add(w)

    words.update([BOS_WORD, EOS_WORD, PAD_WORD, UNK_WORD])
    return words