# parts = line[:-1].split('\t') # label = int(parts[0]) # string = parts[1] # augmented_sentences = eda.get_delete_sentences(string, n_aug=n_aug, alpha_rd=alpha) # cosine_sim_list = utils_bert.compute_sent_similarities(string, augmented_sentences) # dataset_cosine_sim_list += cosine_sim_list # return dataset_cosine_sim_list if __name__ == "__main__": for method, name in [ #(compute_swap_similarity, 'swap'), (compute_insert_similarity, 'insert'), # (compute_insertswap_similarity, 'insertswap'), ]: all_cosine_sim_list = [] for dataset_name in ['sst2', 'subj', 'trec']: data_folder = config.data_folders[dataset_name] delete_folder = utils_config.make_exp_folder(data_folder, 'delete') delete_train_txt_path, _, _, _ = utils_config.get_txt_paths( delete_folder) dataset_cosine_sim_list = method(delete_train_txt_path) all_cosine_sim_list += dataset_cosine_sim_list print( f"{name} has sim {mean(all_cosine_sim_list):.3f} stdev {stdev(all_cosine_sim_list):.3f}" )
import config, utils_config, utils_mlp_a1_grad_sim_classification from pathlib import Path dataset_name = 'sst2' data_folder = config.data_folders[dataset_name] output_folder = Path("outputs") exp_id = '9b' num_classes = 2 resume_checkpoint_path = Path( "checkpoints/sst2_vanilla_mlp_50_seednum0/e10_va0.7460.pt") if __name__ == "__main__": train_txt_path, train_embedding_path, test_txt_path, test_embedding_path = utils_config.get_txt_paths( data_folder) for train_subset in [500]: #, 20, 50]: mean_val_acc, stdev_acc, mean_conf_acc, stdev_conf_acc, mlp_val_acc, mlp_stdev_acc, mlp_conf_acc, mlp_stdev_conf_acc = utils_mlp_a1_grad_sim_classification.train_mlp_multiple( train_txt_path, train_embedding_path, test_txt_path, test_embedding_path, num_classes=num_classes, dataset_name=dataset_name, exp_id=exp_id, train_subset=train_subset, resume_checkpoint_path=resume_checkpoint_path, num_seeds=1, )
import config, utils_bert, utils_config, utils_svm, utils_processing data_folder = config.data_folders['sst2'] if __name__ == "__main__": train_txt_path, train_embedding_path, test_txt_path, test_embedding_path = utils_config.get_txt_paths( data_folder) # utils_bert.get_embedding_dict(train_txt_path, train_embedding_path) # utils_bert.get_embedding_dict(test_txt_path, test_embedding_path) insert_folder = utils_config.make_exp_folder(data_folder, f"insert-eval") _, _, insert_test_txt_path, insert_test_embedding_path = utils_config.get_txt_paths( insert_folder) # utils_processing.augment_insert(test_txt_path, insert_test_txt_path, n_aug=2, alpha=0.3) # utils_bert.get_embedding_dict(insert_test_txt_path, insert_test_embedding_path) swap_folder = utils_config.make_exp_folder(data_folder, f"swap-eval") _, _, swap_test_txt_path, swap_test_embedding_path = utils_config.get_txt_paths( swap_folder) # utils_processing.augment_swap(test_txt_path, swap_test_txt_path, n_aug=2, alpha=0.2) # utils_bert.get_embedding_dict(swap_test_txt_path, swap_test_embedding_path) # utils_svm.evaluate_svm_baselines( train_txt_path, # test_txt_path, # train_embedding_path, # test_embedding_path, # insert_test_txt_path, # insert_test_embedding_path, # swap_test_txt_path, # swap_test_embedding_path,