from multiprocessing import Pool from pyartm_datasets import main_cases from pyartm import regularizers from pyartm.optimizations import obd from pyartm.optimizations import default import manager ITERS_COUNT = 100 SAMPLES = 100 if __name__ == '__main__': train_n_dw_matrix, test_n_dw_matrix = main_cases.get_20newsgroups( [ 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space' ], train_proportion=0.8)[:2] args_list = list() for T in [10, 25]: args_list.append((train_n_dw_matrix, T, '20news_experiment/20news_{}t_plots.pkl'.format(T))) Pool(processes=5).map(manager.perform_plots, args_list)
from pyartm import regularizers from pyartm_datasets import main_cases from pyartm.optimizations import timed_default import manager if __name__ == '__main__': n_dw_matrix = main_cases.get_20newsgroups([ 'rec.autos', 'rec.motorcycles', 'rec.sport.baseball', 'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space' ])[0] manager.perform_experiment( n_dw_matrix, timed_default.Optimizer( regularization_list=[regularizers.Additive(0., 0.)] * 100, return_counters=True), 10, 100)
from pyartm_datasets import main_cases from pyartm import regularizers from pyartm.optimizations import default, thetaless from pyartm.common import experiments from pyartm.calculations import metrics def print_matrix(arr): for row in arr: line = list(map(str, row)) print(' '.join(line)) if __name__ == '__main__': _n_dw_matrix, _, num_2_token, doc_targets = main_cases.get_20newsgroups([ 'rec.sport.hockey', 'talk.politics.guns', ]) topic_0_indices, topic_1_indices = [], [] for index, target in enumerate(doc_targets): if target == 0: topic_0_indices.append(index) elif target == 1: topic_1_indices.append(index) thetaless_rels = [] lda_rels = [] for balance in range(10, 201, 10): print(balance) n_dw_matrix = _n_dw_matrix[topic_0_indices + topic_1_indices * balance, :] regularization_list = [regularizers.Additive(-0.1, 0.)] * 100
perplexities.append(calc_perplexity(phi, theta)) if not os.path.exists(os.path.dirname(output_path)): os.makedirs(os.path.dirname(output_path)) with open(output_path, 'w') as output_file: pickle.dump({ 'init_phi': init_phi, 'init_theta': init_theta, 'perplexities': perplexities, 'phis': phis }, output_file) if __name__ == '__main__': n_dw_matrix, _, _, _ = main_cases.get_20newsgroups([ 'sci.crypt', 'sci.electronics', 'sci.med', 'sci.space' ]) print('Original PLSA') perform_lda( n_dw_matrix, optimizer=get_optimizer(0., 100), T=10, samples=300, output_path='stability_exp/plsa.pkl' ) print('Full initialized PLSA') phi, theta = experiments.default_sample( n_dw_matrix, T=10, seed=42, optimizer=get_optimizer(-0.1, 100) ) init_phi, init_theta = experiments.default_sample( n_dw_matrix, T=10, seed=42, optimizer=get_optimizer(0., 100),
# coding: utf-8 from multiprocessing import Pool from pyartm_datasets import main_cases from pyartm import regularizers from pyartm.optimizations import default import manager if __name__ == '__main__': train_n_dw_matrix, test_n_dw_matrix = main_cases.get_20newsgroups( ['sci.crypt', 'sci.electronics', 'sci.med', 'sci.space'], train_proportion=0.8)[:2] args_list = list() phi_alpha = -0.1 for T in range(3, 16): for theta_alpha in [-0.1, 0., 0.1]: regularization_list = [ regularizers.Additive(phi_alpha, theta_alpha) ] * 100 args_list.append((train_n_dw_matrix, test_n_dw_matrix, default.Optimizer(regularization_list), T, 10, 'iter_exp/20news_{}t_{}_{}.pkl'.format( T, phi_alpha, theta_alpha))) Pool(processes=5).map(manager.perform_iteration_dependency_experiment, args_list)
from pyartm.optimizations import default, balanced from pyartm.common import experiments from pyartm.calculations import metrics from pyartm_experiments.balanced import balanced_ptdw def print_matrix(arr): for row in arr: line = list(map(str, row)) print(' '.join(line)) if __name__ == '__main__': _n_dw_matrix, _, num_2_token, doc_targets = main_cases.get_20newsgroups([ 'comp.sys.mac.hardware', 'talk.politics.guns', ]) topic_0_indices, topic_1_indices = [], [] for index, target in enumerate(doc_targets): if target == 0: topic_0_indices.append(index) elif target == 1: topic_1_indices.append(index) for balance in [1, 2, 5, 10, 20, 50, 100, 200, 300, 500]: n_dw_matrix = _n_dw_matrix[topic_0_indices + topic_1_indices * balance, :] regularization_list = [regularizers.Additive(-0.1, 0.)] * 100 lda_phi, lda_theta = experiments.default_sample( n_dw_matrix, T=2,