print "~~~~~~~~~~" print 'cz = ', cluster_sz # calculate language vectors lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages) total_vec.append(lang_vectors) # print cosine angles if ordered == 0: ord_str = 'unordered!' else: ord_str = 'ordered!' # calculate total vector final_lang = sum(total_vec) # calculate variance of cos angle distribution cosangles = utils.cosangles(final_lang, languages) vary = utils.var_measure(cosangles) V[i,j] = vary print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sizes) + ', ' + ord_str + '\n' print "variance of cosine values: " + str(vary) print '==========' np.savez('./vars/vars_dump.npz',V=V, Ns=Ns, sparsities=sparsities,ks=ks) # plot results #CS = plt.contourf(sparsities,Ns,V, alpha=0.7, cmap=plt.cm.jet) CS = plt.contourf(np.log10(ks),Ns,V, alpha=0.7, cmap=plt.cm.jet) CB = plt.colorbar(CS, shrink=0.8, extend='both') plt.xlabel('log(k)') plt.ylabel('N') plt.title('Variance of Cosine Angles Between Vectors')
print "N =", N, ", k =", k, ", ", ord_str for cluster_sz in cluster_sizes: ##print "~~~~~~~~~~" total_vectors = [] # calculate language vectors lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages) #lang_vectors = random_idx.generate_RI_lang_words(N, RI_letters, languages=languages) total_vectors.append(lang_vectors) # print cosine angles #print '==========' # print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n' cosangles = utils.cosangles(lang_vectors, languages) variance = utils.var_measure(cosangles) #print "variance of language values: " + str(utils.var_measure(cosangles)) final_lang = sum(total_vectors) ############################### # iterate through test files and calculate correctness print "Now waiting for input" while True: sentence = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) unknown_tots = [] sentence = cleaner(sentence) unknown_vector = random_idx.generate_RI_sentence(N, RI_letters, cluster_sz, ordered,sentence) unknown_tots.append(unknown_vector) final_unknown = sum(unknown_tots) likely_lang = utils.find_language(sentence, final_unknown, final_lang, languages,display=0)
lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages) total_vectors.append(lang_vectors) # calculate unknown vector unknown_vector = random_idx.generate_RI_text(N, RI_letters, cluster_sz, ordered,unknown_txt) unknown_tots.append(unknown_vector) # print cosine angles print '==========' if ordered == 0: ord_str = 'unordered!' else: ord_str = 'ordered!' print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n' cosangles = utils.cosangles(lang_vectors, languages) variance = utils.var_measure(cosangles) varys.append(variance) print "variance of language values: " + str(utils.var_measure(cosangles)) ######################## ''' # history vectors lang_vectors = random_idx.generate_RI_lang_history(N, RI_letters, languages=languages) total_vectors.append(lang_vectors) unknown_vector = random_idx.generate_RI_text_history(N, RI_letters, unknown_txt) unknown_tots.append(unknown_vector) print "~~~~~~~~~~" print "history vector information" cosangles = utils.cosangles(lang_vectors, languages)
print "N =", N, ", k =", k, ", ", ord_str for cluster_sz in cluster_sizes: ##print "~~~~~~~~~~" total_vectors = [] # calculate language vectors lang_vectors = random_idx.generate_RI_lang(N, RI_letters, cluster_sz, ordered, languages=languages) #lang_vectors = random_idx.generate_RI_lang_words(N, RI_letters, languages=languages) total_vectors.append(lang_vectors) # print cosine angles #print '==========' # print 'N = ' + str(N) + '; k = ' + str(k) + '; letters clusters are ' + str(cluster_sz) + ', ' + ord_str + '\n' cosangles = utils.cosangles(lang_vectors, languages) variance = utils.var_measure(cosangles) #print "variance of language values: " + str(utils.var_measure(cosangles)) final_lang = sum(total_vectors) ############################### # iterate through test files and calculate correctness test_fn = glob.glob(main_base + test_dir + '/*txt') total = len(test_fn) correct = 0 for i in trange(total): testf = test_fn[i] actual_lang = re.findall('(\w+)_\d+_p.txt$', testf)[0] unknown_tots = [] #print len(testf),testf[91:93]