def create_one_hot_vecs_from_folder(V, dir_path):
    files = get_files_of_folder(dir_path, "txt")
    ret = []
    for f in files:
        w = get_words_from_file(f)
        hot = create_one_hot_vec(V, w)
        ret.append(hot)
        print("finished: " + f)
    return np.array(ret)
def create_indexes_matrix_from_folder(V, dir_path):
    files = get_files_of_folder(dir_path, "txt")
    ret = {}
    for f in files:
        w = get_words_from_file(f)
        ind = get_index(V, w)
        key = f.split("/")[-1].replace(".txt", "")
        ret[key] = ind
        print("finished: " + f)
    return ret
def create_distinct_words_from_folder(train_dir_path, test_dir_path):
    ret = []
    files = get_files_of_folder(train_dir_path, "txt")
    files.extend(get_files_of_folder(test_dir_path, "txt"))
    for f in files:
        words = get_words_from_file(f)
        for w in words:
            if w not in ret:
                ret.append(w)
    return np.array(ret)
def create_indexes_matrix_from_file_multithread(V, file, output):
    w = get_words_from_file(file)
    ind = get_index(V, w)
    key = file.split("/")[-1].replace(".txt", "")
    output.put((key, ind))
    print("finished: " + file)