lda_rels = [] for balance in range(10, 201, 10): print(balance) n_dw_matrix = _n_dw_matrix[topic_0_indices + topic_1_indices * balance, :] regularization_list = [regularizers.Additive(-0.1, 0.)] * 100 lda_phi, lda_theta = experiments.default_sample( n_dw_matrix, T=2, seed=42, optimizer=default.Optimizer(regularization_list, verbose=False)) thetaless_phi, thetaless_theta = experiments.default_sample( n_dw_matrix, T=2, seed=42, optimizer=thetaless.Optimizer(regularization_list, verbose=False)) # print(np.argmax(thetaless_theta[:len(topic_0_indices), :2], axis=1).mean()) # print(np.argmax(thetaless_theta[len(topic_0_indices):, :2], axis=1).mean()) # print('!') # for topic_set in metrics.get_top_words(thetaless_phi, 10): # print('\n\t'.join(map(num_2_token.get, topic_set))) # print() # for topic_set in metrics.get_top_words(thetaless_phi, 5): # print('\n\t'.join(map(num_2_token.get, topic_set))) # print() print('lda') # #print(np.sum(lda_theta[:, 1]) / np.sum(lda_theta[:, 0])) # print(np.argmax(lda_theta, axis=1).mean()) print(metrics.calc_avg_top_words_jaccards(lda_phi, 20)) print('thetaless') # #print(np.sum(thetaless_theta[:, 1]) / np.sum(thetaless_theta[:, 0]))
'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space' ], train_proportion=0.8)[:2] args_list = list() for T in [10, 25]: for theta_alpha in [0.1, 0.01, 0.1]: regularization_list = [regularizers.Additive(0, theta_alpha) ] * ITERS_COUNT args_list.append( (train_n_dw_matrix, test_n_dw_matrix, default.Optimizer(regularization_list), T, SAMPLES, '20news_experiment/20news_{}t_default_{}_{}.pkl'.format( T, 0., theta_alpha))) args_list.append( (train_n_dw_matrix, test_n_dw_matrix, thetaless.Optimizer(regularization_list), T, SAMPLES, '20news_experiment/20news_{}t_thetaless_{}_{}.pkl'.format( T, 0., theta_alpha))) args_list.append( (train_n_dw_matrix, test_n_dw_matrix, transfer_thetaless.Optimizer(regularization_list), T, SAMPLES, '20news_experiment/20news_{}t_transfer_thetaless_{}_{}.pkl'. format(T, 0., theta_alpha))) #manager.perform_experiment(args_list[0]) #manager.perform_experiment(args_list[1]) manager.perform_experiment(args_list[2]) #Pool(processes=5).map(manager.perform_experiment, args_list)
def perform_doc_experiment( (n_dw_matrix_doc_train, doc_targets_doc_train, n_dw_matrix_doc_test, doc_targets_doc_test, optimizer, T, samples, output_path)): D, _ = n_dw_matrix_doc_test.shape svm_train_score = metrics.create_svm_score_function(doc_targets_doc_train, verbose=False) opt_plsa_not_const_phi = default.Optimizer( regularization_list=optimizer.regularization_list[:10], const_phi=False) opt_plsa_const_phi = default.Optimizer( regularization_list=optimizer.regularization_list[:10], const_phi=True) opt_artm_thetaless = thetaless.Optimizer( regularization_list=optimizer.regularization_list[:10]) res_plsa_not_const_phi = [] res_plsa_const_phi = [] res_artm_thetaless = [] cv_fold_scores = [] cv_test_scores = [] for seed in range(samples): print(seed) phi, theta = experiments.default_sample(n_dw_matrix_doc_train, T, seed, optimizer) (best_C, best_gamma, cv_fold_score, cv_test_score) = svm_train_score(theta) cv_fold_scores.append(cv_fold_score) cv_test_scores.append(cv_test_score) print('Fold score: {}\tTest score: {}'.format(cv_fold_score, cv_test_score)) algo = SVC(C=best_C, gamma=best_gamma).fit(theta, doc_targets_doc_train) init_theta = common.get_prob_matrix_by_counters( np.ones(shape=(D, T), dtype=np.float64)) plsa_not_const_phi = [] plsa_const_phi = [] artm_thetaless = [] opt_plsa_not_const_phi.iteration_callback = ( lambda it, phi, theta: plsa_not_const_phi.append( accuracy_score(algo.predict(theta), doc_targets_doc_test))) opt_plsa_const_phi.iteration_callback = ( lambda it, phi, theta: plsa_const_phi.append( accuracy_score(algo.predict(theta), doc_targets_doc_test))) opt_artm_thetaless.iteration_callback = ( lambda it, phi, theta: artm_thetaless.append( accuracy_score(algo.predict(theta), doc_targets_doc_test))) for opt in [ opt_plsa_not_const_phi, opt_plsa_const_phi, opt_artm_thetaless ]: opt.run(n_dw_matrix_doc_test, phi, init_theta) res_plsa_not_const_phi.append(plsa_not_const_phi) res_plsa_const_phi.append(plsa_const_phi) res_artm_thetaless.append(artm_thetaless) callbacks.save_results( { 'res_plsa_not_const_phi': res_plsa_not_const_phi, 'res_plsa_const_phi': res_plsa_const_phi, 'res_artm_thetaless': res_artm_thetaless, 'cv_fold_scores': cv_fold_scores, 'cv_test_scores': cv_test_scores }, output_path)
train_proportion=0.8)[:2] args_list = list() for T in [20, 50]: for phi_alpha in [-0.1, 0., 0.1]: for theta_alpha in [-0.1, 0., 0.1]: regularization_list = [ regularizers.Additive(phi_alpha, theta_alpha) ] * ITERS_COUNT args_list.append( (train_n_dw_matrix, test_n_dw_matrix, default.Optimizer(regularization_list), T, SAMPLES, 'nips_experiment/NIPS_{}t_base_{}_{}.pkl'.format( T, phi_alpha, theta_alpha))) args_list.append( (train_n_dw_matrix, test_n_dw_matrix, naive_thetaless.Optimizer(regularization_list), T, SAMPLES, 'nips_experiment/NIPS_{}t_naive_{}_{}.pkl'.format( T, phi_alpha, theta_alpha))) for use_B_cheat in [False, True]: args_list.append( (train_n_dw_matrix, test_n_dw_matrix, thetaless.Optimizer(regularization_list, use_B_cheat=use_B_cheat), T, SAMPLES, 'nips_experiment/NIPS_{}t_artm_{}_{}_{}.pkl'.format( T, phi_alpha, theta_alpha, use_B_cheat))) Pool(processes=5).map(manager.perform_experiment, args_list)
'results/{}'.format(name) ) if __name__ == '__main__': n_ww_matrix = main_cases.get_wntm_matrix() args_list = [ create_exp_args(n_ww_matrix, 'plsa', reg_funcs.plsa), create_exp_args(n_ww_matrix, 'plsa_honest', reg_funcs.plsa_honest), create_exp_args(n_ww_matrix, 'plsa_origin', reg_funcs.plsa_origin), create_exp_args( n_ww_matrix, 'plsa_semi_honest', reg_funcs.plsa_semi_honest ), create_exp_args( n_ww_matrix, 'tARTM', optimizer=thetaless.Optimizer( [reg_funcs.trivial] * ITERS_COUNT, use_B_cheat=False ), ), create_exp_args( n_ww_matrix, 'tARTM_cheat', optimizer=thetaless.Optimizer( [reg_funcs.trivial] * ITERS_COUNT, use_B_cheat=True ) ) ] Pool(processes=3).map(manager.perform_ww_experiment, args_list)