def create_lang_vec(filename, lv, cluster_sizes, N=N, k=k):
    
    total_lang = np.zeros((1,N))
    # generate english vector
    for cz in cluster_sizes:
        print "generating language vector of cluster size", cz
        # which alphabet to use
        lang_vector = random_idx.generate_RI_text_fast(N, lv, cz, ordered, filename, alphabet)#"preprocessed_texts/AliceInWonderland.txt", alph)
        total_lang += lang_vector
    return total_lang
def log_generate_RI_text_fast(N, RI_letters, cluster_sz, ordered, text_name, alph=alphabet):
    text_vector = random_idx.generate_RI_text_fast(N, RI_letters, cluster_sz, ordered, text_name, alph)
    text_vector = np.log2(text_vector)
    return text_vector
def log_generate_RI_text_fast(N, RI_letters, cluster_sz, ordered, text_name, alph=alphabet):
    text_vector = random_idx.generate_RI_text_fast(N, RI_letters, cluster_sz, ordered, text_name, alph)
    text_vector = np.log2(text_vector)
    return text_vector