def analyze_nli_pair(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_pair_analyze" e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLIEx_T", "model-12097") e.nli_visualization_pairing(nli_setting, e_config, data_loader, load_id, data)
def interactive(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIInterative" e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_Only_B", 'model-0') e.nli_interactive_list(nli_setting, e_config, data_loader, load_id)
def train_nil_on_bert(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_Only_{}".format("C") e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = None load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') #load_id = ("NLI_bert_w_explain", 'model-91531') #load_id = ("NLI_Only_A", "model-0") e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, False)
def attribution_explain(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("nli_eval") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_nli_warm", "model-97332") load_id = ("NLI_Only_A", 'model-0') e.nli_attribution_baselines(nli_setting, e_config, data_loader, load_id)
def predict_lime_snli_continue(): hp = hyperparams.HPBert() hp.batch_size = 512 + 256 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "SNLI_LIME_{}".format("eval") e_config.load_names = ['bert', 'cls_dense'] data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("SNLI_Only_A", 'model-0') e.predict_lime_snli_continue(nli_setting, e_config, data_loader, load_id, "test")
def run_nli_w_path(run_name, step_name, model_path): #run_name disable_eager_execution() hp = HPBert() nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" data_loader = nli.DataLoader(hp.seq_max, "bert_voca.txt", True) data = get_nli_batches_from_data_loader(data_loader, hp.batch_size) run_name = "{}_{}_NLI".format(run_name, step_name) saved_model = train_nli(hp, nli_setting, run_name, 3, data, model_path) tf.reset_default_graph() avg_acc = test_nli(hp, nli_setting, run_name, data, saved_model) print("avg_acc: ", avg_acc) save_report("nli", run_name, step_name, avg_acc)
def interactive_visual(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIInterative" e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] load_id = ("NLIEx_U_mismatch", "model-10265") load_id = ("NLIEx_Y_conflict", 'model-12039') data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) e.nli_interactive_visual(nli_setting, e_config, data_loader, load_id)
def do_test_dev_acc(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_Test" e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLI_bare_A", 'model-195608') load_id = ("NLIEx_S", 'model-4417') load_id = ("NLIEx_Y_conflict", "model-9636") load_id = ("NLI_Only_C", 'model-0') e.test_acc(nli_setting, e_config, data_loader, load_id)
def train_mnli_any_way(): hp = HP() hp.batch_size = 8 hp.compare_deletion_num = 20 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_Any_512" e_config.ex_val = False e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] e_config.v2_load = True data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("nli512", 'model.ckpt-65000') e.train_nli_any_way(nli_setting, e_config, data_loader, load_id)
def train_ukp_with_nli(load_id, exp_name): step_per_epoch = 24544 + 970 hp = hyperparams.HPBert() e_config = ExperimentConfig() e_config.num_steps = step_per_epoch e_config.save_interval = 100 * 60 # 30 minutes e_config.voca_size = 30522 e_config.num_dev_batches = 30 e_config.load_names = ['bert'] e_config.valid_freq = 500 encode_opt = "is_good" nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" num_class_list = [3, 3] f1_list = [] for topic in data_generator.argmining.ukp_header.all_topics: e = Experiment(hp) print(exp_name) e_config.name = "argmix_{}_{}_{}".format(exp_name, topic, encode_opt) arg_data_loader = BertDataLoader(topic, True, hp.seq_max, "bert_voca.txt", option=encode_opt) nli_data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) shared_data_loader = SharedFeeder([arg_data_loader, nli_data_loader], [1, 5], ["Arg", "NLI"], hp.batch_size) save_path = e.train_shared(e_config, shared_data_loader, num_class_list, load_id) print(topic) f1_last = e.eval_ukp_on_shared(e_config, arg_data_loader, num_class_list, save_path) f1_list.append((topic, f1_last)) print(exp_name) print(encode_opt) print(f1_list) for key, score in f1_list: print("{0}\t{1:.03f}".format(key, score))
def train_pairing(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("T") e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLIEx_P_match", "model-1636") load_id = ("NLI_run_A", 'model-0') PAIRING_NLI = 6 e.train_nli_smart(nli_setting, e_config, data_loader, load_id, 'match', PAIRING_NLI)
def visualize_senli_on_plain_text(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_lm_analyze" e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLIEx_AnyA", "model-7255") p = "/mnt/scratch/youngwookim/Chair/data/tf/1" data = list(extract_stream(p)) e.nli_visualization_lm(nli_setting, e_config, data_loader, load_id, data)
def baseline_explain(): hp = hyperparams.HPBert() hp.batch_size *= 32 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("nli_warm") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] explain_tag = 'conflict' # 'dontcare' 'match' 'mismatch' #explain_tag = 'match' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_A", 'model-0') e.nli_explain_baselines(nli_setting, e_config, data_loader, load_id, explain_tag)
def train_snli_on_bert(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "SNLI_Only_{}".format("1") e_config.num_epoch = 1 e_config.save_interval = 3 * 60 * 60 # 30 minutes data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = None load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, f_train_ex=False)
def attribution_predict(): hp = hyperparams.HPBert() target_label = 'mismatch' e = Experiment(hp) hp.batch_size = 512 nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("nli_eval") e_config.load_names = ['bert', 'cls_dense'] #data_id = "test_{}".format(target_label) data_id = "{}_1000".format(target_label) data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_Only_C", 'model-0') e.nli_attribution_predict(nli_setting, e_config, data_loader, load_id, target_label, data_id)
def test_snli(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "SNLIEx_Test" e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) todo = [] load_id = ("SNLI_Only_A", 'model-0') todo.append(load_id) todo.append(("SNLI_Only_1", 'model-0')) for load_id in todo: tf.reset_default_graph() e.test_acc(nli_setting, e_config, data_loader, load_id)
def train_nil_on_bert(): print('train_nil_on_bert') hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_10k_bert_cold" e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] # , 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) dir_name = "bert_cold" model_step = 10 * 1000 load_id = (dir_name, "model.ckpt-{}".format(model_step)) print(load_id) saved = e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, False) e.test_acc2(nli_setting, e_config, data_loader, saved)
def train_nil_from_v2_checkpoint(run_name, model_path): hp = hyperparams.HPSENLI3() print(hp.batch_size) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) tokenizer = get_tokenizer() CLS_ID = tokenizer.convert_tokens_to_ids(["[CLS]"])[0] SEP_ID = tokenizer.convert_tokens_to_ids(["[SEP]"])[0] data_loader.CLS_ID = CLS_ID data_loader.SEP_ID = SEP_ID tf_logger.setLevel(logging.INFO) steps = 12271 data = load_cache("nli_batch16") if data is None: tf_logger.info("Encoding data from csv") data = get_nli_batches_from_data_loader(data_loader, hp.batch_size) save_to_pickle(data, "nli_batch16") train_nli(hp, nli_setting, run_name, steps, data, model_path)
def train_mnli_any_way(): hp = hyperparams.HPBert() hp.compare_deletion_num = 20 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_AnyA" e_config.ex_val = False e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] # explain_tag = 'match' # 'dontcare' 'match' 'mismatch' # explain_tag = 'mismatch' # explain_tag = 'conflict' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_A", 'model-0') e.train_nli_any_way(nli_setting, e_config, data_loader, load_id)
def train_nli_with_premade(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("HB") e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] #, 'cls_dense'] #, 'aux_conflict'] explain_tag = 'conflict' # 'dontcare' 'match' 'mismatch' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLI_run_nli_warm", "model-97332") #load_id = ("NLIEx_A", "model-16910") load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_nli_ex_with_premade_data(nli_setting, e_config, data_loader, load_id, explain_tag)
def predict_lime_snli(): hp = hyperparams.HPBert() hp.batch_size = 1024 + 512 + 256 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "SNLI_LIME_{}".format("eval") e_config.load_names = ['bert', 'cls_dense'] start = int(sys.argv[1]) print("Begin", start) sub_range = (start, start + 100) data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("SNLI_Only_A", 'model-0') e.predict_lime_snli(nli_setting, e_config, data_loader, load_id, "test", sub_range)
def train_ubuntu(): hp = hyperparams.HPUbuntu() hp.batch_size = 16 e = Experiment(hp) voca_setting = NLI() voca_setting.vocab_size = 30522 voca_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "Ubuntu_{}".format("A") e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'reg_dense'] #, 'aux_conflict'] data_loader = ubuntu.DataLoader(hp.seq_max, voca_setting.vocab_filename, voca_setting.vocab_size, True) #load_id = ("NLI_run_nli_warm", "model-97332") #load_id = ("NLIEx_A", "model-16910") load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("Ubuntu_A", "model-5145") e.train_ubuntu(e_config, data_loader, load_id)
def baseline_predict(): hp = hyperparams.HPBert() hp.batch_size = 512 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("nli_eval") e_config.load_names = ['bert', 'cls_dense'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_Only_C", 'model-0') target_label = "mismatch" data_id = "{}_1000".format(target_label) start = int(sys.argv[1]) print(start) sub_range = (start, start + 100) #sub_range = None e.nli_baseline_predict(nli_setting, e_config, data_loader, load_id, target_label, data_id, sub_range)
def train_nli_smart_rf(explain_tag): hp = hyperparams.HPSENLI() hp.compare_deletion_num = 20 e = Experiment(hp) e.log.setLevel(logging.WARNING) e.log2.setLevel(logging.WARNING) e.log.info("I don't want to see") nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("CO_" + explain_tag) e_config.num_epoch = 1 e_config.ex_val = False e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] e_config.save_payload = True data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_A", 'model-0') e.train_nli_smart(nli_setting, e_config, data_loader, load_id, explain_tag, 5)
def get_eval_params(load_type, model_path, data_type): hp = hyperparams.HPSENLI3_eval() hp.batch_size = 128 nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" if data_type == "mnli": data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) elif data_type == "snli": data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) else: assert False dir_path, file_name = os.path.split(model_path) run_name = os.path.split(dir_path)[1] + "/" + file_name dev_batches = get_batches_ex(data_loader.get_dev_data(), hp.batch_size, 4) if load_type == "v2": load_fn = load_bert_v2 else: load_fn = load_model return dev_batches, hp, load_fn, nli_setting, run_name
def train_nli_with_reinforce_old(): hp = hyperparams.HPNLI2() e = Experiment(hp) nli_setting = NLI() e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("retest") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'dense_cls'] #, 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename) load_id = ("interval", "model-48040") e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, True)
def predict_rf_tune(): hp = hyperparams.HPBert() hp.batch_size = 256 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" target_label = 'match' #data_id = 'test_conflict' data_id = "{}_1000".format(target_label) e_config = ExperimentConfig() l = [(0.1, 12039), (0.2, 12245), (0.3, 12063), (0.4, 12250), (0.6, 12262), (0.7, 12253)] l = [(0.5, 12175), (0.8, 12269), (0.9, 12259)] for del_g, step in l: e_config.name = "X_match_del_{}".format(del_g) e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLIEx_match_del_{}".format(del_g), "model-{}".format(step)) e.clear_run() e.predict_rf(nli_setting, e_config, data_loader, load_id, data_id, 5)
def tuning_train_nli_rf(): l = [0.9] for g_del in l: tf.reset_default_graph() hp = hyperparams.HPSENLI() hp.g_val = g_del hp.compare_deletion_num = 20 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("match_del_{}".format(g_del)) e_config.num_epoch = 1 e_config.ex_val = True e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] explain_tag = 'match' # 'dontcare' 'match' 'mismatch' #explain_tag = 'conflict' #explain_tag = 'mismatch' #explain_tag = 'conflict' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLI_run_nli_warm", "model-97332") #load_id = ("NLIEx_A", "model-16910") #load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') #load_id = ("NLIEx_D", "model-1964") #load_id = ("NLIEx_D", "model-1317") load_id = ("NLI_run_A", 'model-0') e.train_nli_smart(nli_setting, e_config, data_loader, load_id, explain_tag, 5)