# min, max = sp.calculate_min_max_from_table(tables) # split = sp.calculate_split_from_table(tables, verbose=False, normalize=(min, max)) # sp.make_plot_from_table(tables, labels=labels, colors=colors, output='wmean-google-val.png', log=False, normalize=(min, max)) # tables = ['../data/google/pairs/sets/wmean_no_pairs_r-test.npy', '../data/google/pairs/sets/wmean_pairs_r-test.npy'] # sp.calculate_error_rate_from_table(tables, split, normalize=(min, max)) # sp.calculate_hellinger_distance_from_table(tables, normalize=(min, max)) # sp.make_plot_from_table(tables, labels=labels, colors=colors, output='wmean-google.png', log=False, normalize=(min, max)) # print "" # tables = ['../data/google/pairs/sets/nntopmedian_no_pairs_r-validation.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-validation.npy'] min, max = sp.calculate_min_max_from_table(tables) split = sp.calculate_split_from_table(tables, verbose=True, normalize=(min, max)) # sp.make_plot_from_table(tables, labels=labels, colors=colors, output='nntop-google-val.png', log=False, normalize=(min, max)) tables = ['../data/google/pairs/sets/nntopmedian_no_pairs_r-test.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-test.npy'] sp.calculate_error_rate_from_table(tables, split, verbose=True, normalize=(min, max)) sp.calculate_JS_from_table(tables, verbose=True, normalize=(min, max)) # sp.make_plot_from_table(tables, labels=labels, colors=colors, output='nntop-google.png', log=False, normalize=(min, max)) print "" # PROCESSING # texts1 = ['../data/wiki/pairs/sets/enwiki_no_pairs_r-validation.txt', '../data/wiki/pairs/sets/enwiki_pairs_r-validation.txt'] # output1 = ['../data/google/pairs/sets/nntopmedian_no_pairs_r-validation.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-validation.npy'] # p1 = Process(target=sp.process_to_file_with_filter, args=(metrics.NNVarMean(metrics.euclidean, 'VM'), texts1, output1, 2000000, w, docfreqs)) # p1.start() # # texts2 = ['../data/wiki/pairs/sets/enwiki_no_pairs_r-test.txt', '../data/wiki/pairs/sets/enwiki_pairs_r-test.txt'] # output2 = ['../data/google/pairs/sets/nntopmedian_no_pairs_r-test.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-test.npy'] # p2 = Process(target=sp.process_to_file_with_filter, args=(metrics.NNVarMean(metrics.euclidean, 'VM'), texts2, output2, 1600000, w, docfreqs)) # p2.start() # # texts3 = ['../data/wiki/pairs/sets/enwiki_no_pairs_r-validation.txt', '../data/wiki/pairs/sets/enwiki_pairs_r-validation.txt']
'../data/google/pairs/sets/nntopmedian_pairs_r-validation.npy' ] min, max = sp.calculate_min_max_from_table(tables) split = sp.calculate_split_from_table(tables, verbose=True, normalize=(min, max)) # sp.make_plot_from_table(tables, labels=labels, colors=colors, output='nntop-google-val.png', log=False, normalize=(min, max)) tables = [ '../data/google/pairs/sets/nntopmedian_no_pairs_r-test.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-test.npy' ] sp.calculate_error_rate_from_table(tables, split, verbose=True, normalize=(min, max)) sp.calculate_JS_from_table(tables, verbose=True, normalize=(min, max)) # sp.make_plot_from_table(tables, labels=labels, colors=colors, output='nntop-google.png', log=False, normalize=(min, max)) print "" # PROCESSING # texts1 = ['../data/wiki/pairs/sets/enwiki_no_pairs_r-validation.txt', '../data/wiki/pairs/sets/enwiki_pairs_r-validation.txt'] # output1 = ['../data/google/pairs/sets/nntopmedian_no_pairs_r-validation.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-validation.npy'] # p1 = Process(target=sp.process_to_file_with_filter, args=(metrics.NNVarMean(metrics.euclidean, 'VM'), texts1, output1, 2000000, w, docfreqs)) # p1.start() # # texts2 = ['../data/wiki/pairs/sets/enwiki_no_pairs_r-test.txt', '../data/wiki/pairs/sets/enwiki_pairs_r-test.txt'] # output2 = ['../data/google/pairs/sets/nntopmedian_no_pairs_r-test.npy', '../data/google/pairs/sets/nntopmedian_pairs_r-test.npy'] # p2 = Process(target=sp.process_to_file_with_filter, args=(metrics.NNVarMean(metrics.euclidean, 'VM'), texts2, output2, 1600000, w, docfreqs)) # p2.start() # # texts3 = ['../data/wiki/pairs/sets/enwiki_no_pairs_r-validation.txt', '../data/wiki/pairs/sets/enwiki_pairs_r-validation.txt']
# print "" # # tablesA = ['../data/google/pairs/sets/nntop_no_pairs_r-validation.npy', '../data/google/pairs/sets/nntop_pairs_r-validation.npy'] # minA, maxA = sp.calculate_min_max_from_table(tablesA) # splitA = sp.calculate_split_from_table(tablesA, verbose=True, normalize=(minA, maxA)) # tablesA = ['../data/google/pairs/sets/nntop_no_pairs_r-test.npy', '../data/google/pairs/sets/nntop_pairs_r-test.npy'] # sp.calculate_error_rate_from_table(tablesA, splitA, normalize=(minA, maxA)) # sp.calculate_JS_from_table(tablesA, normalize=(minA, maxA), verbose=True) # print "" tablesA = ['../data/tweets/pairs/sets/nntopcontr_no_pairs-validation.npy', '../data/tweets/pairs/sets/nntopcontr_pairs-validation.npy'] minA, maxA = sp.calculate_min_max_from_table(tablesA) splitA = sp.calculate_split_from_table(tablesA, verbose=True, normalize=(minA, maxA)) tablesA = ['../data/tweets/pairs/sets/nntopcontr_no_pairs-test.npy', '../data/tweets/pairs/sets/nntopcontr_pairs-test.npy'] sp.calculate_error_rate_from_table(tablesA, splitA, normalize=(minA, maxA)) sp.calculate_JS_from_table(tablesA, normalize=(minA, maxA), verbose=True) print "" #sp.calculate_bootstrap_test_from_table(tablesB, tablesA, splitB, splitA, True, 5000, (minB, maxB), (minA, maxA)) # LDA-code # dictionary = gensim.corpora.Dictionary.load('../data/model/wiki_wordids_filtered_2.dict') # dictionary.num_docs = metrics.N_DOCUMENTS # # #pca_model = np.load('../data/model/pca.model.npz')['arr_0'] # import lda # lda_model = lda.lda()
# print "" tablesA = [ '../data/tweets/pairs/sets/nntopcontr_no_pairs-validation.npy', '../data/tweets/pairs/sets/nntopcontr_pairs-validation.npy' ] minA, maxA = sp.calculate_min_max_from_table(tablesA) splitA = sp.calculate_split_from_table(tablesA, verbose=True, normalize=(minA, maxA)) tablesA = [ '../data/tweets/pairs/sets/nntopcontr_no_pairs-test.npy', '../data/tweets/pairs/sets/nntopcontr_pairs-test.npy' ] sp.calculate_error_rate_from_table(tablesA, splitA, normalize=(minA, maxA)) sp.calculate_JS_from_table(tablesA, normalize=(minA, maxA), verbose=True) print "" #sp.calculate_bootstrap_test_from_table(tablesB, tablesA, splitB, splitA, True, 5000, (minB, maxB), (minA, maxA)) # LDA-code # dictionary = gensim.corpora.Dictionary.load('../data/model/wiki_wordids_filtered_2.dict') # dictionary.num_docs = metrics.N_DOCUMENTS # # #pca_model = np.load('../data/model/pca.model.npz')['arr_0'] # import lda # lda_model = lda.lda() # lda_model.load('../data/model/lda.model') # # texts = ['../data/pairs/enwiki_no_pairs_10.txt', '../data/pairs/enwiki_pairs_10.txt'] # labels = ['Pairs', 'No pairs']