def generate_words(cluster_sz, english_vector=english_vector, normed_eng=normed_eng): print "generating english vector of cluster size", cluster_sz # generate english vector #english_vector = random_idx.generate_RI_text_words(N, RI_letters, './lang_texts/texts_english/eng.txt') # generate new string of letters length = 30 alphy = utils.generate_ordered_clusters(alph, cluster_sz=cluster_sz) gstr = alph[np.random.randint(len(alph))] temp_str = gstr for i in xrange(length): max_idx = 0 maxabs = 0 for j in xrange(len(alphy)): temp_str = gstr + alphy[j] temp_id = random_idx.generate_RI_str(N, RI_letters, cluster_sz, ordered, temp_str) #temp_id += 1e1*np.random.randn(1,N) temp_id /= np.linalg.norm(temp_id) absy = np.abs(temp_id.dot(normed_eng.T)) #print temp_str, absy if absy > maxabs: max_idx = j maxabs = absy gstr += alphy[max_idx] print len(gstr), maxabs, gstr
def generate_words(cluster_sz, english_vector=english_vector, normed_eng=normed_eng): print "generating english vector of cluster size", cluster_sz # generate english vector #english_vector = random_idx.generate_RI_text_words(N, RI_letters, './lang_texts/texts_english/eng.txt') # generate new string of letters length = 30 alphy = utils.generate_ordered_clusters(alph, cluster_sz=cluster_sz) gstr = alph[np.random.randint(len(alph))] temp_str = gstr for i in xrange(length): max_idx = 0 maxabs = 0 for j in xrange(len(alphy)): temp_str = gstr + alphy[j] temp_id = random_idx.generate_RI_str(N,RI_letters,cluster_sz,ordered,temp_str) #temp_id += 1e1*np.random.randn(1,N) temp_id /= np.linalg.norm(temp_id) absy = np.abs(temp_id.dot(normed_eng.T)) #print temp_str, absy if absy > maxabs: max_idx = j maxabs = absy gstr += alphy[max_idx] print len(gstr), maxabs, gstr
def log_generate_RI_str(N, RI_letters, cluster_sz, ordered, string, alph=alphabet): # generate RI vector for string text_vector = random_idx.generate_RI_str(N, RI_letters, cluster_sz, ordered, string, alph) text_vector = np.log2(text_vector) return text_vector