def run_level1(): phrase_dist_files = [ PHRASE_DIST_PATH + '1dm-seed-ext', PHRASE_DIST_PATH + '2ml-seed-ext', PHRASE_DIST_PATH + '3db-seed-ext', PHRASE_DIST_PATH + '4ir-seed-ext' ] # set venue prior venue_name_idx, venue_idx_name = fio.read_dictionary_file(DATA_PATH + 'id_venue') venue_topical_prior = np.ones((5, 23)) # dm venue_topical_prior[1][venue_name_idx['KDD']] = 100 venue_topical_prior[1][venue_name_idx['ICDE']] = 70 venue_topical_prior[1][venue_name_idx['CIKM']] = 50 # ml venue_topical_prior[2][venue_name_idx['ICML']] = 100 # db venue_topical_prior[3][venue_name_idx['VLDB']] = 95 venue_topical_prior[3][venue_name_idx['SIGMOD']] = 70 venue_topical_prior[3][venue_name_idx['ICDE']] = 50 # ir venue_topical_prior[4][venue_name_idx['SIGIR']] = 95 print "Set priors complete." run_hefbib( input_corpus=DATA_PATH + 'AMiner-Paper-after1996-23venues-authorid-validcites-reindex-phrases-index.txt', input_phrase_dists=phrase_dist_files, background_prob_lst=[0.3] * 4, tot_num_topics=4, tot_num_phrases=5100, tot_num_authors=38491, tot_num_venues=23, ef_alpha=np.ones(5), # always be tot_num_topics + 1 ef_beta=np.ones(38491), ef_gamma=venue_topical_prior, ef_omega=None, ef_iter=1000, br_iter=110, output_file=DATA_PATH + 'logs/ahaha')
def run_level2_dm(): phrase_dist_files = [ PHRASE_DIST_PATH + '1dm-1fp-seed-ext', PHRASE_DIST_PATH + '1dm-2ds-seed-ext', PHRASE_DIST_PATH + '1dm-3net-seed-ext' ] # set venue prior venue_name_idx, venue_idx_name = fio.read_dictionary_file(DATA_PATH + 'id_venue') venue_topical_prior = np.ones((4, 23)) # dm - frequent pattern venue_topical_prior[1][venue_name_idx['KDD']] = 100 venue_topical_prior[1][venue_name_idx['ICDE']] = 70 venue_topical_prior[1][venue_name_idx['CIKM']] = 50 # dm - data stream venue_topical_prior[2][venue_name_idx['KDD']] = 100 venue_topical_prior[2][venue_name_idx['ICDE']] = 70 venue_topical_prior[2][venue_name_idx['CIKM']] = 50 # dm - social network venue_topical_prior[3][venue_name_idx['KDD']] = 100 venue_topical_prior[3][venue_name_idx['ICDE']] = 70 venue_topical_prior[3][venue_name_idx['CIKM']] = 50 print "Set priors complete." run_hefbib( input_corpus=DATA_PATH + 'AMiner-Paper-after1996-23venues-authorid-validcites-reindex-phrases-index.txt', input_phrase_dists=phrase_dist_files, background_prob_lst=[0.2] * 3, tot_num_topics=3, tot_num_phrases=5100, tot_num_authors=38491, tot_num_venues=23, ef_alpha=np.ones(4), # always be tot_num_topics + 1 ef_beta=np.ones(38491), ef_gamma=venue_topical_prior, ef_omega=None, ef_iter=1500, br_iter=110, output_file=DATA_PATH + 'logs/ahaha')