コード例 #1
0
ファイル: manager.py プロジェクト: ilirhin/python_artm
def perform_experiment((
   train_n_dw_matrix, test_n_dw_matrix, optimizer,
   T, samples, init_iters, output_path
)):
    init_optimizer = default.Optimizer([regularizers.Trivial()] * init_iters)
    callback = experiments.default_callback(
        train_n_dw_matrix=train_n_dw_matrix,
        test_n_dw_matrix=test_n_dw_matrix
    )
    init_optimizer.iteration_callback = callback
    optimizer.iteration_callback = callback
    for seed in range(samples):
        print(seed)
        plsa_phi, plsa_theta = experiments.default_sample(
            train_n_dw_matrix=train_n_dw_matrix,
            T=T,
            seed=seed,
            optimizer=init_optimizer,
            finish_launch=False,
        )
        optimizer.run(train_n_dw_matrix, plsa_phi, plsa_theta)
        if optimizer.iteration_callback:
            optimizer.iteration_callback.finish_launch()

    optimizer.iteration_callback.save_results(output_path)
コード例 #2
0
def perform_experiment(n_dw_matrix, optimizer, T, samples):
    optimizer.iteration_callback = experiments.default_callback(
        train_n_dw_matrix=n_dw_matrix,
        top_pmi_sizes=[5, 10, 20, 30],
        top_avg_jaccard_sizes=[10, 50, 100, 200],
        measure_time=True)
    for seed in range(samples):
        print(seed)
        experiments.default_sample(n_dw_matrix, T, seed, optimizer)
        print(timed_default.SimpleTimer.total_times)
コード例 #3
0
ファイル: manager.py プロジェクト: ilirhin/python_artm
def perform_experiment((train_n_dw_matrix, test_n_dw_matrix, optimizer, T,
                        samples, output_path)):
    optimizer.iteration_callback = experiments.default_callback(
        train_n_dw_matrix=train_n_dw_matrix,
        test_n_dw_matrix=test_n_dw_matrix,
        top_pmi_sizes=[5, 10, 20, 30],
        top_avg_jaccard_sizes=[10, 50, 100, 200])
    for seed in range(samples):
        print(seed)
        experiments.default_sample(train_n_dw_matrix, T, seed, optimizer)
    optimizer.iteration_callback.save_results(output_path)
コード例 #4
0
def perform_iteration_dependency_experiment((
    train_n_dw_matrix, test_n_dw_matrix, optimizer,
    T, samples, output_path
)):
    optimizer.iteration_callback = experiments.default_callback(
        train_n_dw_matrix=train_n_dw_matrix,
        test_n_dw_matrix=test_n_dw_matrix,
        uniqueness_measures=True,
        iter_eval_step=5
    )
    for seed in range(samples):
        print(seed)
        experiments.default_sample(train_n_dw_matrix, T, seed, optimizer)
    optimizer.iteration_callback.save_results(output_path)
コード例 #5
0
def perform_alpha_dependency_experiment((
    train_n_dw_matrix, optimizer, T, samples, output_path
)):
    callback = experiments.default_callback(
        train_n_dw_matrix=train_n_dw_matrix,
        uniqueness_measures=True
    )
    callback.start_launch()
    for seed in range(samples):
        print(seed)
        callback(0, *experiments.default_sample(
            train_n_dw_matrix, T, seed, optimizer
        ))
    callback.finish_launch()
    callback.save_results(output_path)
コード例 #6
0
def perform_experiment(train_n_dw_matrix, test_n_dw_matrix, optimizer, T,
                       samples, output_path, tau, path_phi_output):

    optimizer.iteration_callback = experiments.default_callback(
        train_n_dw_matrix=train_n_dw_matrix,
        test_n_dw_matrix=test_n_dw_matrix,
        top_pmi_sizes=[5, 10, 20, 30],
        top_avg_jaccard_sizes=[10, 50, 100, 200],
        measure_time=False)

    for seed in range(samples):
        expphi, exptheta = experiments.default_sample(train_n_dw_matrix, T,
                                                      seed, optimizer, tau)
    optimizer.iteration_callback.save_results(output_path)
    with open(path_phi_output, 'wb') as resource_file:
        pickle.dump(expphi, resource_file)
    return (expphi, exptheta)
コード例 #7
0
ファイル: manager.py プロジェクト: ilirhin/python_artm
def perform_ww_experiment((n_ww_matrix, optimizer, T, samples, output_dir)):
    optimizer.iteration_callback = Callback(n_ww_matrix)
    for seed in range(samples):
        print('Seed', seed)
        seed_callback = experiments.default_callback(
            train_n_dw_matrix=n_ww_matrix,
            top_avg_jaccard_sizes=[10, 50, 100, 200])
        phi, theta, n_tw, n_dt = symmetric_sample(n_ww_matrix, T, seed,
                                                  optimizer)
        seed_callback.start_launch()
        seed_callback(0, phi, theta)
        seed_callback.finish_launch()
        result = dict(phi=phi, theta=theta, n_tw=n_tw, n_dt=n_dt)
        result['properties'] = {
            key: value[0][0]
            for key, value in seed_callback.result.items()
        }
        callbacks.save_results(
            result, os.path.join(output_dir, 'seed_{}.pkl'.format(seed)))
コード例 #8
0
def perform_experiment(train_n_dw_matrix, test_n_dw_matrix, T, num_2_token):
    train_corpus = [zip(row.indices, row.data) for row in train_n_dw_matrix]

    for seed in [42, 7, 777, 12]:
        model = LdaModel(train_corpus,
                         alpha='auto',
                         id2word=num_2_token,
                         num_topics=T,
                         iterations=500,
                         random_state=seed)
        gensim_phi = exp_common.get_phi(model)
        gensim_theta = exp_common.get_theta(train_corpus, model)
        print('gensim perplexity')
        print(np.exp(-model.log_perplexity(train_corpus)))

        D, W = train_n_dw_matrix.shape
        random_gen = np.random.RandomState(seed)
        phi = common.get_prob_matrix_by_counters(
            random_gen.uniform(size=(T, W)).astype(np.float64))
        theta = common.get_prob_matrix_by_counters(
            np.ones(shape=(D, T)).astype(np.float64))
        phi, theta = default.Optimizer([regularizers.Additive(0.1, 0.)] * 100,
                                       verbose=False).run(
                                           train_n_dw_matrix, phi, theta)

        callback = experiments.default_callback(
            train_n_dw_matrix=train_n_dw_matrix,
            test_n_dw_matrix=test_n_dw_matrix,
            top_pmi_sizes=[5, 10, 20, 30],
            top_avg_jaccard_sizes=[10, 50, 100, 200],
            measure_time=True)
        callback.start_launch()
        callback(0, phi, theta)
        callback(1, gensim_phi, gensim_theta)

        print('artm')
        for name, values in callback.launch_result.items():
            print('\t{}: {}'.format(name, values[0]))

        print('gensim')
        for name, values in callback.launch_result.items():
            print('\t{}: {}'.format(name, values[1]))