Esempio n. 1
0
def main():
    # do_corpus = "aac"
    # do_corpus = "pmc"
    do_corpus =  None

    percent = 0.4

    if do_corpus == "aac":
        exp_dir = os.path.join(getRootDir("aac"), "experiments", "aac_generate_kw_trace")
        features = FeaturesReader(os.path.join(exp_dir, "feature_data_ft_mms_min1.json.gz"))

        getStopwordsFromContexts(features,
                                 "http://129.215.197.75:9200/",
                                 "idx_az_ilc_az_annotated_aac_2010_1_paragraph",
                                 "_all_text",
                                 os.path.join(exp_dir, "term_scores.json"),
                                 os.path.join(exp_dir, "stopwords_aac.txt"),
                                 percent=percent
                                 )
    elif do_corpus == "pmc":
        exp_dir = os.path.join(getRootDir("pmc_coresc"), "experiments", "pmc_generate_kw_trace")
        # features = FeaturesReader(os.path.join(exp_dir, "feature_data_ft_mms_min1.json.gz"))
        features = FeaturesReader(os.path.join(exp_dir, "feature_data_at_w_min1.json.gz"))
        getStopwordsFromContexts(features,
                                 "http://129.215.197.75:9200/",
                                 "idx_az_ilc_az_annotated_pmc_2013_1_paragraph",
                                 "_all_text",
                                 os.path.join(exp_dir, "term_scores.json"),
                                 os.path.join(exp_dir, "stopwords_pmc.txt"),
                                 percent=percent
                                 )
    pass

    print("AAC", len(c6_stopwords_aac_40))
    print("PMC", len(c6_stopwords_pmc_40))
Esempio n. 2
0
def main():
    corpus = ez_connect("AAC", "koko")
    exp_dir = os.path.join(getRootDir("aac"), "experiments",
                           "aac_generate_kw_trace")
    filename = os.path.join(exp_dir, "feature_data_test_w.json.gz")
    # filename = os.path.join(exp_dir, "feature_data_test_mms.json.gz")
    features = FeaturesReader(filename).getMiniBatch(1000)
    testWeightDifference(corpus, features)
Esempio n. 3
0
def main(n_iters=250000, reset=False):
    params = {
        "n_iters": n_iters,
        "print_every": 100,
        "learning_rate": 0.01,
    }
    exp_dir = os.path.join(getRootDir("aac"), "experiments",
                           "aac_generate_kw_trace")
    model = TorchModel(exp_dir, params=params)
    model.run()
Esempio n. 4
0
def main():
    exp_dir = os.path.join(getRootDir("aac"), "experiments",
                           "aac_generate_kw_trace")
    to_process = [
        "feature_data_w.json.gz",
        # "feature_data_test_w.json.gz"
    ]
    for filename in to_process:
        full_filename = os.path.join(exp_dir, filename)
        features = FeaturesReader(full_filename)
        saveFixedContexts(full_filename + ".fixed", features)
Esempio n. 5
0
def main():
    params = {}
    # exp_dir = os.path.join(getRootDir("aac"), "experiments", "aac_generate_kw_trace")
    # model = KerasModel(exp_dir, params=params,
    #                    train_data_filename="feature_data.json.gz",
    #                    test_data_filename="feature_data_test.json.gz"
    #                    )
    exp_dir = os.path.join(getRootDir("pmc_coresc"), "experiments", "pmc_generate_kw_trace")
    model = KerasModel(exp_dir, params=params,
                       train_data_filename="feature_data_at_w_min1.json.gz",
                       test_data_filename="feature_data_test_at_w.json.gz"
                       )
    model.run()
Esempio n. 6
0
def main(num_epochs=10, reset=False):
    params = {
        "num_epochs": num_epochs,
        "print_every": 100,
        # "learning_rate": 0.003,
        "learning_rate": 0.01,
        # "optimizer": "Adam",
        "optimizer": "SGD",

    }
    exp_dir = os.path.join(getRootDir("aac"), "experiments", "aac_generate_kw_trace")
    model = TorchModel(exp_dir, params=params)
    model.run()
Esempio n. 7
0
def main(num_epochs=1, reset=False):
    params = {
        "num_epochs": num_epochs,
        "hidden_size": 200,
        "print_every": 100,
        # "learning_rate": 0.003,
        "learning_rate": 0.01,
        # "optimizer": "Adam",
        "optimizer": "SGD",
    }
    exp_dir = os.path.join(getRootDir("aac"), "experiments",
                           "aac_generate_kw_trace")
    model = TorchModel(exp_dir,
                       params=params,
                       train_data_filename="feature_data_w_min2.json.gz",
                       test_data_filename="feature_data_test_w_min2.json.gz")
    model.run()
Esempio n. 8
0
def main():
    model_class = BaselineModel1

    exp_dir = os.path.join(getRootDir("aac"), "experiments",
                           "aac_generate_kw_trace")
    model = model_class(
        exp_dir,
        train_data_filename="feature_data_at_w_min1.json.gz",
        # test_data_filename="feature_data_test_at_w_1u1d.json.gz",
        test_data_filename="feature_data_test_at_w.json.gz",
        params={"filter_stopwords": False})

    # exp_dir = os.path.join(getRootDir("pmc_coresc"), "experiments", "pmc_generate_kw_trace")
    # model = model_class(exp_dir,
    #                     train_data_filename="feature_data_at_w_min1.json.gz",
    #                     test_data_filename="feature_data_test_at_w.json.gz",
    #                     params = {"filter_stopwords": False}
    #                     )

    model.use_weights = True
    model.run(external_test=True)

    pass
Esempio n. 9
0
def main():
    # Linear regression
    params_lin_reg = {
        "augment_features": False,
        "regression": True,
        "model": "LinearRegression",
        "sklearn_params": {
        },
    }

    # # RandomForestRegressor
    params_random_forest = {
        "augment_features": False,
        "regression": True,
        # "regression": False,
        # "model": "RandomForestRegressor",
        "model": "ExtraTreesRegressor",
        # "model": "LinearRegression",
        # "classifier": "RandomForestClassifier",
        "sklearn_params": {
            "n_estimators": 10,
            # "class_weight": "balanced_subsample",
            # "class_weight": {True: 1000,
            #                  False: 0.1},
            "n_jobs": -1,
            "verbose": 3,
        },
    }

    # MLPRegressor
    params_mlp = {
        "augment_features": False,
        "regression": True,
        "model": "MLPRegressor",
        "sklearn_params": {
            "verbose": 3,
            "hidden_layer_sizes": (300, 200, 100),
            # "activation": "logistic",
            "activation": "relu",
            "learning_rate": "adaptive",
            # "solver": "lbfgs",
            "tol": 1e-5
        },
    }

    # params = params_mlp
    # params = params_lin_reg
    params = params_random_forest

    exp_dir = os.path.join(getRootDir("aac"), "experiments", "aac_generate_kw_trace")
    model = SKLearnModel(exp_dir, params=params,
                         train_data_filename="feature_data_at_w_min1.json.gz",
                         test_data_filename="feature_data_test_at_w.json.gz"
                         )

    # exp_dir = os.path.join(getRootDir("pmc_coresc"), "experiments", "pmc_generate_kw_trace")
    # model = SKLearnModel(exp_dir, params=params,
    #                      train_data_filename="feature_data_at_w_min1.json.gz",
    #                      test_data_filename="feature_data_test_at_w.json.gz",
    #                      )

    model.run(external_test=True)

    pass