def contrv_pred(): hp = hyperparams.HPQL() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Contrv_{}".format("B") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minuteslm_protest e_config.load_names = ['bert', 'cls'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = ws.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.controv_lm(e_config, data_loader, load_id)
def train_mscore_regression(): hp = hyperparams.HPMscore() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Contrv_{}".format("C") e_config.num_epoch = 4 e_config.save_interval = 10 * 60 # 10 minutes e_config.load_names = ['bert'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = mscore.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_controversy_classification(e_config, data_loader, load_id)
def wikicont_bert(): hp = hyperparams.HPBert() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "WikiContrv2009_only_wiki" e_config.num_epoch = 1 e_config.save_interval = 60 * 60 # 1 minutes e_config.load_names = ['bert'] e_config.valid_freq = 100 vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = Ams18.DataLoader(hp.seq_max, vocab_filename, vocab_size) data_loader.source_collection.collection_type = 'wiki' load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_wiki_contrv(e_config, data_loader, load_id)
def train_nil_on_bert(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_Only_{}".format("C") e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = None load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') #load_id = ("NLI_bert_w_explain", 'model-91531') #load_id = ("NLI_Only_A", "model-0") e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, False)
def test_nli(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_400k_tlm_simple_wo_hint" e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #saved = "/mnt/scratch/youngwookim/Chair/output/model/runs/NLI_Cold/model-0" saved = "/mnt/scratch/youngwookim/Chair/output/model/runs/NLI_400k_tlm_wo_hint/model-0" saved = '/mnt/scratch/youngwookim/Chair/output/model/runs/NLI_400k_tlm_simple_hint/model-0' print(saved) e.test_acc2(nli_setting, e_config, data_loader, saved)
def attribution_explain(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("nli_eval") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_nli_warm", "model-97332") load_id = ("NLI_Only_A", 'model-0') e.nli_attribution_baselines(nli_setting, e_config, data_loader, load_id)
def run_adhoc_rank_on_robust(): hp = hyperparams.HPAdhoc() hp.batch_size = 512 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}_eval".format("F") e_config.num_epoch = 4 e_config.load_names = ['bert', 'reg_dense'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = data_sampler.DataLoaderFromFile(hp.batch_size, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("Adhoc_E", 'model-58338') e.rank_adhoc(e_config, data_loader, load_id)
def interactive(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIInterative" e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_Only_B", 'model-0') e.nli_interactive_list(nli_setting, e_config, data_loader, load_id)
def train_adhoc512(): hp = hyperparams.HPFAD() hp.batch_size = 16 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_J_{}".format("512") e_config.num_epoch = 4 e_config.save_interval = 10 * 60 # 60 minutes e_config.load_names = ['bert', 'reg_dense'] vocab_size = 30522 data_loader = data_sampler.DataLoaderFromFile(hp.batch_size, vocab_size, 171) #load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("Adhoc_J_512", 'model-6189') e.train_adhoc2(e_config, data_loader, load_id)
def analyze_nli_pair(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_pair_analyze" e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLIEx_T", "model-12097") e.nli_visualization_pairing(nli_setting, e_config, data_loader, load_id, data)
def train_mnli_any_way(): hp = HP() hp.batch_size = 8 hp.compare_deletion_num = 20 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_Any_512" e_config.ex_val = False e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] e_config.v2_load = True data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("nli512", 'model.ckpt-65000') e.train_nli_any_way(nli_setting, e_config, data_loader, load_id)
def interactive_visual(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIInterative" e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] load_id = ("NLIEx_U_mismatch", "model-10265") load_id = ("NLIEx_Y_conflict", 'model-12039') data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) e.nli_interactive_visual(nli_setting, e_config, data_loader, load_id)
def run_adhoc_rank(): hp = hyperparams.HPAdhoc() hp.batch_size = 512 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}_eval2".format("E") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'reg_dense'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = ws.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("Adhoc_E", 'model-58338') e.rank_adhoc(e_config, data_loader, load_id)
def predict_adhoc512(): hp = hyperparams.HPFAD() hp.batch_size = 16 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_J_{}".format("512") e_config.num_epoch = 4 e_config.save_interval = 10 * 60 # 60 minutes e_config.load_names = ['bert', 'reg_dense'] vocab_size = 30522 payload_path = os.path.join(cpath.data_path, "robust_payload", "enc_payload_512.pickle") task_idx = int(sys.argv[2]) print(task_idx) load_id = ("Adhoc_J_512", 'model-6180') e.predict_robust(e_config, vocab_size, load_id, payload_path, task_idx)
def train_agree(): hp = hyperparams.HPBert() e_config = ExperimentConfig() e_config.num_epoch = 2 e_config.save_interval = 100 * 60 # 30 minutes e_config.voca_size = 30522 e_config.load_names = ['bert'] load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') exp_purpose = "(dis)agree train" e = Experiment(hp) print(exp_purpose) e_config.name = "AgreeTrain" vocab_filename = "bert_voca.txt" data_loader = agree.DataLoader(hp.seq_max, vocab_filename) save_path = e.train_agree(e_config, data_loader, load_id) print(exp_purpose)
def mscore_eval(): hp = hyperparams.HPMscore() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Contrv_{}".format("B_eval") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'reg_dense'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = mscore.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("Contrv_B", 'model-3001') #load_id = ("Contrv_B", 'model-6006') e.test_controversy_mscore(e_config, data_loader, load_id)
def visualize_senli_on_plain_text(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_lm_analyze" e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLIEx_AnyA", "model-7255") p = "/mnt/scratch/youngwookim/Chair/data/tf/1" data = list(extract_stream(p)) e.nli_visualization_lm(nli_setting, e_config, data_loader, load_id, data)
def train_pairing(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("T") e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLIEx_P_match", "model-1636") load_id = ("NLI_run_A", 'model-0') PAIRING_NLI = 6 e.train_nli_smart(nli_setting, e_config, data_loader, load_id, 'match', PAIRING_NLI)
def cie(): hp = hyperparams.HPCIE() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "cie" e_config.num_epoch = 40 e_config.save_interval = 10 * 60 # 30 minutes e_config.load_names = ['bert'] vocab_size = 30522 vocab_filename = "bert_voca.txt" is_span = 1 data_loader = title.DataLoader(hp.seq_max, vocab_filename, vocab_size, is_span) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') if is_span: e.controversy_cie_span_train(e_config, data_loader, load_id) else: e.controversy_cie_train(e_config, data_loader, load_id)
def train_snli_on_bert(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "SNLI_Only_{}".format("1") e_config.num_epoch = 1 e_config.save_interval = 3 * 60 * 60 # 30 minutes data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = None load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, f_train_ex=False)
def baseline_explain(): hp = hyperparams.HPBert() hp.batch_size *= 32 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("nli_warm") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] explain_tag = 'conflict' # 'dontcare' 'match' 'mismatch' #explain_tag = 'match' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_A", 'model-0') e.nli_explain_baselines(nli_setting, e_config, data_loader, load_id, explain_tag)
def pair_feature(): hp = HPPairFeatureTweet() topic = "atheism" setting = shared_setting.TopicTweets2Stance(topic) use_cache = True run_id = "{}_{}".format(topic, hp.sent_max) if use_cache: print("using PairDataCache") data = loader.PairDataLoader.load_from_pickle(run_id) else: tweet_group = tweet_reader.load_per_user(topic) data = loader.PairDataLoader(hp.sent_max, setting, tweet_group) data.index_data() data.save_to_pickle(run_id) e_config = ExperimentConfig() e_config.name = "LM_pair_featuer_tweets_{}".format(topic) e_config.num_epoch = 1 e_config.save_interval = 3 * 60 # 3 minutes e = Experiment(hp) e.train_pair_feature(e_config, data)
def train_nil_on_bert(): print('train_nil_on_bert') hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_10k_bert_cold" e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] # , 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) dir_name = "bert_cold" model_step = 10 * 1000 load_id = (dir_name, "model.ckpt-{}".format(model_step)) print(load_id) saved = e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, False) e.test_acc2(nli_setting, e_config, data_loader, saved)
def train_nli_with_premade(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("HB") e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] #, 'cls_dense'] #, 'aux_conflict'] explain_tag = 'conflict' # 'dontcare' 'match' 'mismatch' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLI_run_nli_warm", "model-97332") #load_id = ("NLIEx_A", "model-16910") load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_nli_ex_with_premade_data(nli_setting, e_config, data_loader, load_id, explain_tag)
def train_mnli_any_way(): hp = hyperparams.HPBert() hp.compare_deletion_num = 20 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_AnyA" e_config.ex_val = False e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] # explain_tag = 'match' # 'dontcare' 'match' 'mismatch' # explain_tag = 'mismatch' # explain_tag = 'conflict' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_A", 'model-0') e.train_nli_any_way(nli_setting, e_config, data_loader, load_id)
def document_lm(): hp = HPDocLM() topic = "hillary" setting = shared_setting.TopicTweets2Stance(topic) use_cache = False run_id = "{}_{}".format(topic, hp.seq_max) if use_cache: data = author_as_doc.AuthorAsDoc.load_from_pickle(run_id) else: tweet_group = tweet_reader.load_per_user(topic) data = author_as_doc.AuthorAsDoc(hp.seq_max, setting, tweet_group) data.index_data() data.save_to_pickle(run_id) e_config = ExperimentConfig() e_config.name = "DLM_pair_tweets_{}".format(topic) e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e = Experiment(hp) e.train_doc_lm(e_config, data)
def train_ubuntu(): hp = hyperparams.HPUbuntu() hp.batch_size = 16 e = Experiment(hp) voca_setting = NLI() voca_setting.vocab_size = 30522 voca_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "Ubuntu_{}".format("A") e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'reg_dense'] #, 'aux_conflict'] data_loader = ubuntu.DataLoader(hp.seq_max, voca_setting.vocab_filename, voca_setting.vocab_size, True) #load_id = ("NLI_run_nli_warm", "model-97332") #load_id = ("NLIEx_A", "model-16910") load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("Ubuntu_A", "model-5145") e.train_ubuntu(e_config, data_loader, load_id)
def train_nli_smart_rf(explain_tag): hp = hyperparams.HPSENLI() hp.compare_deletion_num = 20 e = Experiment(hp) e.log.setLevel(logging.WARNING) e.log2.setLevel(logging.WARNING) e.log.info("I don't want to see") nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("CO_" + explain_tag) e_config.num_epoch = 1 e_config.ex_val = False e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] e_config.save_payload = True data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("NLI_run_A", 'model-0') e.train_nli_smart(nli_setting, e_config, data_loader, load_id, explain_tag, 5)
def tuning_train_nli_rf(): l = [0.9] for g_del in l: tf.reset_default_graph() hp = hyperparams.HPSENLI() hp.g_val = g_del hp.compare_deletion_num = 20 e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("match_del_{}".format(g_del)) e_config.num_epoch = 1 e_config.ex_val = True e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] #, 'aux_conflict'] explain_tag = 'match' # 'dontcare' 'match' 'mismatch' #explain_tag = 'conflict' #explain_tag = 'mismatch' #explain_tag = 'conflict' data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #load_id = ("NLI_run_nli_warm", "model-97332") #load_id = ("NLIEx_A", "model-16910") #load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') #load_id = ("NLIEx_D", "model-1964") #load_id = ("NLIEx_D", "model-1317") load_id = ("NLI_run_A", 'model-0') e.train_nli_smart(nli_setting, e_config, data_loader, load_id, explain_tag, 5)