lda_rels = []
 for balance in range(10, 201, 10):
     print(balance)
     n_dw_matrix = _n_dw_matrix[topic_0_indices +
                                topic_1_indices * balance, :]
     regularization_list = [regularizers.Additive(-0.1, 0.)] * 100
     lda_phi, lda_theta = experiments.default_sample(
         n_dw_matrix,
         T=2,
         seed=42,
         optimizer=default.Optimizer(regularization_list, verbose=False))
     thetaless_phi, thetaless_theta = experiments.default_sample(
         n_dw_matrix,
         T=2,
         seed=42,
         optimizer=thetaless.Optimizer(regularization_list, verbose=False))
     # print(np.argmax(thetaless_theta[:len(topic_0_indices), :2], axis=1).mean())
     # print(np.argmax(thetaless_theta[len(topic_0_indices):, :2], axis=1).mean())
     # print('!')
     # for topic_set in metrics.get_top_words(thetaless_phi, 10):
     #     print('\n\t'.join(map(num_2_token.get, topic_set)))
     #     print()
     # for topic_set in metrics.get_top_words(thetaless_phi, 5):
     #     print('\n\t'.join(map(num_2_token.get, topic_set)))
     #     print()
     print('lda')
     # #print(np.sum(lda_theta[:, 1]) / np.sum(lda_theta[:, 0]))
     # print(np.argmax(lda_theta, axis=1).mean())
     print(metrics.calc_avg_top_words_jaccards(lda_phi, 20))
     print('thetaless')
     # #print(np.sum(thetaless_theta[:, 1]) / np.sum(thetaless_theta[:, 0]))
Example #2
0
            'rec.sport.hockey', 'sci.crypt', 'sci.electronics', 'sci.med',
            'sci.space'
        ],
        train_proportion=0.8)[:2]

    args_list = list()
    for T in [10, 25]:
        for theta_alpha in [0.1, 0.01, 0.1]:
            regularization_list = [regularizers.Additive(0, theta_alpha)
                                   ] * ITERS_COUNT
            args_list.append(
                (train_n_dw_matrix, test_n_dw_matrix,
                 default.Optimizer(regularization_list), T, SAMPLES,
                 '20news_experiment/20news_{}t_default_{}_{}.pkl'.format(
                     T, 0., theta_alpha)))
            args_list.append(
                (train_n_dw_matrix, test_n_dw_matrix,
                 thetaless.Optimizer(regularization_list), T, SAMPLES,
                 '20news_experiment/20news_{}t_thetaless_{}_{}.pkl'.format(
                     T, 0., theta_alpha)))
            args_list.append(
                (train_n_dw_matrix, test_n_dw_matrix,
                 transfer_thetaless.Optimizer(regularization_list), T, SAMPLES,
                 '20news_experiment/20news_{}t_transfer_thetaless_{}_{}.pkl'.
                 format(T, 0., theta_alpha)))

    #manager.perform_experiment(args_list[0])
    #manager.perform_experiment(args_list[1])
    manager.perform_experiment(args_list[2])
    #Pool(processes=5).map(manager.perform_experiment, args_list)
Example #3
0
def perform_doc_experiment(
    (n_dw_matrix_doc_train, doc_targets_doc_train, n_dw_matrix_doc_test,
     doc_targets_doc_test, optimizer, T, samples, output_path)):
    D, _ = n_dw_matrix_doc_test.shape
    svm_train_score = metrics.create_svm_score_function(doc_targets_doc_train,
                                                        verbose=False)
    opt_plsa_not_const_phi = default.Optimizer(
        regularization_list=optimizer.regularization_list[:10],
        const_phi=False)
    opt_plsa_const_phi = default.Optimizer(
        regularization_list=optimizer.regularization_list[:10], const_phi=True)
    opt_artm_thetaless = thetaless.Optimizer(
        regularization_list=optimizer.regularization_list[:10])

    res_plsa_not_const_phi = []
    res_plsa_const_phi = []
    res_artm_thetaless = []
    cv_fold_scores = []
    cv_test_scores = []

    for seed in range(samples):
        print(seed)
        phi, theta = experiments.default_sample(n_dw_matrix_doc_train, T, seed,
                                                optimizer)
        (best_C, best_gamma, cv_fold_score,
         cv_test_score) = svm_train_score(theta)
        cv_fold_scores.append(cv_fold_score)
        cv_test_scores.append(cv_test_score)

        print('Fold score: {}\tTest score: {}'.format(cv_fold_score,
                                                      cv_test_score))
        algo = SVC(C=best_C, gamma=best_gamma).fit(theta,
                                                   doc_targets_doc_train)
        init_theta = common.get_prob_matrix_by_counters(
            np.ones(shape=(D, T), dtype=np.float64))

        plsa_not_const_phi = []
        plsa_const_phi = []
        artm_thetaless = []

        opt_plsa_not_const_phi.iteration_callback = (
            lambda it, phi, theta: plsa_not_const_phi.append(
                accuracy_score(algo.predict(theta), doc_targets_doc_test)))
        opt_plsa_const_phi.iteration_callback = (
            lambda it, phi, theta: plsa_const_phi.append(
                accuracy_score(algo.predict(theta), doc_targets_doc_test)))
        opt_artm_thetaless.iteration_callback = (
            lambda it, phi, theta: artm_thetaless.append(
                accuracy_score(algo.predict(theta), doc_targets_doc_test)))

        for opt in [
                opt_plsa_not_const_phi, opt_plsa_const_phi, opt_artm_thetaless
        ]:
            opt.run(n_dw_matrix_doc_test, phi, init_theta)

        res_plsa_not_const_phi.append(plsa_not_const_phi)
        res_plsa_const_phi.append(plsa_const_phi)
        res_artm_thetaless.append(artm_thetaless)

    callbacks.save_results(
        {
            'res_plsa_not_const_phi': res_plsa_not_const_phi,
            'res_plsa_const_phi': res_plsa_const_phi,
            'res_artm_thetaless': res_artm_thetaless,
            'cv_fold_scores': cv_fold_scores,
            'cv_test_scores': cv_test_scores
        }, output_path)
Example #4
0
        train_proportion=0.8)[:2]

    args_list = list()
    for T in [20, 50]:
        for phi_alpha in [-0.1, 0., 0.1]:
            for theta_alpha in [-0.1, 0., 0.1]:
                regularization_list = [
                    regularizers.Additive(phi_alpha, theta_alpha)
                ] * ITERS_COUNT
                args_list.append(
                    (train_n_dw_matrix, test_n_dw_matrix,
                     default.Optimizer(regularization_list), T, SAMPLES,
                     'nips_experiment/NIPS_{}t_base_{}_{}.pkl'.format(
                         T, phi_alpha, theta_alpha)))
                args_list.append(
                    (train_n_dw_matrix, test_n_dw_matrix,
                     naive_thetaless.Optimizer(regularization_list), T,
                     SAMPLES,
                     'nips_experiment/NIPS_{}t_naive_{}_{}.pkl'.format(
                         T, phi_alpha, theta_alpha)))
                for use_B_cheat in [False, True]:
                    args_list.append(
                        (train_n_dw_matrix, test_n_dw_matrix,
                         thetaless.Optimizer(regularization_list,
                                             use_B_cheat=use_B_cheat), T,
                         SAMPLES,
                         'nips_experiment/NIPS_{}t_artm_{}_{}_{}.pkl'.format(
                             T, phi_alpha, theta_alpha, use_B_cheat)))

    Pool(processes=5).map(manager.perform_experiment, args_list)
Example #5
0
        'results/{}'.format(name)
    )


if __name__ == '__main__':
    n_ww_matrix = main_cases.get_wntm_matrix()
    args_list = [
        create_exp_args(n_ww_matrix, 'plsa', reg_funcs.plsa),
        create_exp_args(n_ww_matrix, 'plsa_honest', reg_funcs.plsa_honest),
        create_exp_args(n_ww_matrix, 'plsa_origin', reg_funcs.plsa_origin),
        create_exp_args(
            n_ww_matrix, 'plsa_semi_honest', reg_funcs.plsa_semi_honest
        ),
        create_exp_args(
            n_ww_matrix, 'tARTM',
            optimizer=thetaless.Optimizer(
                [reg_funcs.trivial] * ITERS_COUNT,
                use_B_cheat=False
            ),
        ),
        create_exp_args(
            n_ww_matrix, 'tARTM_cheat',
            optimizer=thetaless.Optimizer(
                [reg_funcs.trivial] * ITERS_COUNT,
                use_B_cheat=True
            )
        )
    ]

    Pool(processes=3).map(manager.perform_ww_experiment, args_list)