Example #1
0
def run_test(folder,
             train_path,
             dev_plus_path,
             test_path,
             transformation_name,
             train_path_mod,
             dev_plus_mod,
             test_path_mod,
             search_path,
             rho,
             search_random_state,
             train_random_state,
             boot_random_state,
             dgp_random_state,
             number_of_simulations,
             output_raw_result,
             output_result,
             output_dir,
             n_cores,
             verbose,
             save_steps=8500,
             clean=True):

    # Get data

    init_test = time()

    train = pd.read_csv(train_path)
    dev_plus = pd.read_csv(dev_plus_path)
    test = pd.read_csv(test_path)

    train = filter_df_by_label(train.dropna()).reset_index(drop=True)
    dev_plus = filter_df_by_label(dev_plus.dropna()).reset_index(drop=True)
    test = filter_df_by_label(test.dropna()).reset_index(drop=True)

    pre_process_nli_df(train)
    pre_process_nli_df(dev_plus)
    pre_process_nli_df(test)

    # Get hyperparams

    params_keys = ['num_train_epochs', "max_seq_length",
                   "learning_rate", "weight_decay",
                   "adam_epsilon", "max_grad_norm"]

    hyperparams = {"local_rank": -1,
                   "overwrite_cache": False,
                   "per_gpu_train_batch_size": 32,
                   "per_gpu_eval_batch_size": 50,
                   "gradient_accumulation_steps": 1,
                   # "max_steps": 50,  # debug
                   "max_steps": -1,
                   "warmup_steps": 0,
                   "save_steps": save_steps,
                   "no_cuda": False,
                   "n_gpu": 1,
                   "data_set_name": folder,
                   "transformation_name": transformation_name,
                   "rho": rho,
                   "model_name_or_path": "bert",
                   "output_dir": output_dir,
                   "fp16": False,
                   "fp16_opt_level": "01",
                   "device": "cpu",
                   "verbose": True,
                   "model_type": "bert",
                   "pad_on_left": False,
                   "pad_token": 0,
                   "n_cores": n_cores,
                   'eval_sample_size': 200,
                   "pad_token_segment_id": 0,
                   "mask_padding_with_zero": True,
                   "base_path": "data/{}/cached_".format(folder),
                   "pretrained_weights": 'bert-base-uncased',
                   "number_of_simulations": number_of_simulations,
                   "search_random_state": search_random_state,
                   "dgp_random_state": dgp_random_state,
                   "train_random_state": train_random_state,
                   "random_state": train_random_state,
                   "boot_random_state": boot_random_state,
                   "output_raw_result": output_raw_result,
                   "output_result": output_result}

    search_results = pd.read_csv(search_path)

    for k in params_keys:
        hyperparams[k] = search_results.loc[0, k]

    # Set transformed version of the datasets

    def train_trans(df): return path_base_transformation(df, train_path_mod)

    def dev_trans(df): return path_base_transformation(df, dev_plus_mod)

    def test_trans(df): return path_base_transformation(df, test_path_mod)

    test_t = test_trans(test)

    dev_plus_t = dev_trans(dev_plus)

    # get_training_sample

    train.loc[:, "o_index"] = train.index.values

    dgp_train = DGP(data=train,
                    transformation=train_trans,
                    rho=rho)

    train_ = dgp_train.sample_transform(random_state=dgp_random_state)

    # Train

    model = BertWrapper(hyperparams)
    # _, _, train_time = model.fit(train_.sample(1000, random_state=10))  #
    # debug
    _, _, train_time = model.fit(train_)

    # # Test set Eval
    # test_results = model.get_results(test.iloc[:1000], mode="test")  # debug
    # test_t_results = model.get_results(
    #     test_t.iloc[:1000], mode="test_t")  # debug

    test_results = model.get_results(test, mode="test")
    test_t_results = model.get_results(test_t, mode="test_t")

    # # Dev set Eval
    # dev_results = model.get_results(
    #     dev_plus.iloc[:1000], mode="dev_plus")  # debug
    # dev_t_results = model.get_results(
    #     dev_plus_t.iloc[:1000], mode="dev_plus_t")  # debug

    dev_results = model.get_results(dev_plus, mode="dev_plus")
    dev_t_results = model.get_results(dev_plus_t, mode="dev_plus_t")

    # Getting statistics

    m_results = get_matched_results_transformers(test_results, test_t_results)

    test_acc = m_results.A.mean()
    transformed_test_acc = m_results.B.mean()
    t_obs, acc_diff, test_size, standart_error = get_paired_t_statistic(
        m_results)
    cochran_obs = get_cochran_statistic(m_results)

    dev_m_results = get_matched_results_transformers(
        dev_results, dev_t_results)
    dev_acc = dev_m_results.A.mean()
    dev_t_acc = dev_m_results.B.mean()
    dev_diff = np.abs(dev_acc - dev_t_acc)

    # get simulations

    def get_paired_t(matched_results):
        t_obs, _, _, _ = get_paired_t_statistic(matched_results)
        return t_obs

    paired_t_boots = get_boots_series_under_H0(m_results,
                                               get_paired_t,
                                               number_of_simulations,
                                               boot_random_state)

    cochran_boots = get_boots_series_under_H0(m_results,
                                              get_cochran_statistic,
                                              number_of_simulations,
                                              boot_random_state)

    paired_t_p_value = get_boot_paired_t_p_value(paired_t_boots, t_obs)

    cochran_p_value = get_boot_cochran_p_value(cochran_boots, cochran_obs)

    htest_time = time() - init_test

    # Aggregate all results

    dict_ = {"data": [hyperparams["data_set_name"]],
             "model": [hyperparams["model_name_or_path"]],
             "transformation": [hyperparams["transformation_name"]],
             "rho": [rho],
             "search_random_state": [hyperparams["search_random_state"]],
             "dgp_random_state": [dgp_random_state],
             "train_random_state": [hyperparams["train_random_state"]],
             "boot_random_state": [boot_random_state],
             "number_of_simulations": [number_of_simulations],
             "test_accuracy": [test_acc],
             "transformed_test_accuracy": [transformed_test_acc],
             "accuracy_difference": [acc_diff],
             "test_size": [test_size],
             "standart_error": [standart_error],
             "observable_paired_t_stats": [t_obs],
             "paired_t_p_value": [paired_t_p_value],
             "observable_cochran_stats": [cochran_obs],
             "cochran_p_value": [cochran_p_value],
             "dev_plus_accuracy": [dev_acc],
             "transformed_dev_plus_accuracy": [dev_t_acc],
             "dev_plus_accuracy_difference": [dev_diff],
             "training_time": [train_time / 3600],
             "test_time": [htest_time / 3600]}

    test_results = pd.DataFrame(dict_)
    m_results.to_csv(output_raw_result, index=False)
    test_results.to_csv(output_result, index=False)
    if verbose:
        print(output_raw_result)
        print(output_result)
    if clean:
        clean_folder_log(output_dir)
        clean_folder(folder)
Example #2
0
 def get_paired_t(matched_results):
     t_obs, _, _, _ = get_paired_t_statistic(matched_results)
     return t_obs