def create_english_vec(N=N,k=k, cluster_sz = [2]):
        print "generating english vector of cluster size", cluster_sz
        total_eng = np.zeros((1,N))
        # generate english vector
        for cz in cluster_sz:
                english_vector = random_idx.generate_RI_lang(N, RI_letters, cz, ordered, languages=['eng'])
        total_eng += english_vector


        normed_eng = total_eng/np.linalg.norm(total_eng)
        return total_eng, normed_eng
def create_english_vec(N=N, k=k, cluster_sz=[2]):
    print "generating english vector of cluster size", cluster_sz
    total_eng = np.zeros((1, N))
    # generate english vector
    for cz in cluster_sz:
        english_vector = random_idx.generate_RI_lang(N,
                                                     RI_letters,
                                                     cz,
                                                     ordered,
                                                     languages=['eng'])
    total_eng += english_vector

    normed_eng = total_eng / np.linalg.norm(total_eng)
    return total_eng, normed_eng
def generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=None):
    return random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages)
				#sparsity = sparsities[j]
				#k = int(N*sparsity/2)
				k = ks[j]
				if k >= N:
						continue
				RI_letters = random_idx.generate_letter_id_vectors(N,k)
				total_vec = []
				print N,k
				print '=========='

				# iterate over ordered vs unordered
				for cluster_sz in cluster_sizes:
						print "~~~~~~~~~~"
						print 'cz = ', cluster_sz
						# calculate language vectors
						lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages)
						total_vec.append(lang_vectors)
						# print cosine angles 
						if ordered == 0:
								ord_str = 'unordered!'
						else:
								ord_str = 'ordered!'

				# calculate total vector
				final_lang = sum(total_vec)

				# calculate variance of cos angle distribution
				cosangles = utils.cosangles(final_lang, languages)
				vary = utils.var_measure(cosangles)
				V[i,j] = vary
				print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sizes) + ', ' + ord_str + '\n'
def generate_RI_lang(N,RI_letters, cluster_sz, ordered, languages=None):
	return random_idx.generate_RI_lang(N,RI_letters, cluster_sz, ordered, languages)