def lm_tweets_train(): hp = HPTweets() data = tweets.TweetLoader("atheism", hp.seq_max, shared_setting.Tweets2Stance) e_config = ExperimentConfig() e_config.name = "LM_tweets" e_config.num_epoch = 30 e_config.save_interval = 30 * 60 # 30 minutes e = Experiment(hp) e.train_lm_batch(e_config, data)
def stance_cold_start_simple(): hp = HPColdStart() e = Experiment(hp) topic = "atheism" setting = shared_setting.SimpleTokner(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e.train_stance(voca_size, stance_data)
def feature_svm(): hp = HPFineTunePair() topic = "atheism" e = Experiment(hp) preload_id = ("LM_reserve/DLM_pair_tweets_atheism", 217246) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.FineLoader(topic, hp.seq_max, setting.vocab_filename, hp.sent_max) e.feature_svm(setting.vocab_size, stance_data, preload_id)
def stance_cold_start(): hp = HPColdStart() e = Experiment(hp) topic = "hillary" setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e.train_stance(voca_size, stance_data)
def lm_guardian_train(): hp = Hyperparams() guardian_data = guardian.GuardianLoader("atheism", hp.seq_max, shared_setting.Guardian2Stance) e_config = ExperimentConfig() e_config.name = "LM_guardian" e_config.num_epoch = 30 e = Experiment(hp) e.train_lm_batch(e_config, guardian_data)
def train_aux(): hp = HPFineTunePair() hp2 = HPTiny() e = Experiment(hp) topic = "hillary" preload_id = ("DLM_pair_tweets_hillary", 131200) setting = shared_setting.TopicTweets2Stance(topic) sentiment = stance_detection.SentimentLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e.train_aux1(hp2, voca_size, sentiment, preload_id)
def pair_lm(): hp = HPPairTweet() topic = "atheism" setting = shared_setting.TopicTweets2Stance(topic) tweet_group = tweet_reader.load_per_user(topic) data = loader.PairDataLoader(hp.sent_max, setting, tweet_group) e_config = ExperimentConfig() e_config.name = "LM_pair_tweets_{}".format(topic) e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e = Experiment(hp) e.train_pair_lm(e_config, data)
def test_ubuntu(): hp = hyperparams.HPUbuntu() hp.batch_size = 16 e = Experiment(hp) voca_setting = NLI() voca_setting.vocab_size = 30522 voca_setting.vocab_filename = "bert_voca.txt" data_loader = ubuntu.DataLoader(hp.seq_max, voca_setting.vocab_filename, voca_setting.vocab_size, True) e.test_valid_ubuntu(data_loader)
def gradient_rte_visulize(): hp = hyperparams.HPBert() e = Experiment(hp) vocab_filename = "bert_voca.txt" load_id = loader.find_model_name("RTE_A") e_config = ExperimentConfig() e_config.name = "RTE_{}".format("visual") e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] data_loader = rte.DataLoader(hp.seq_max, vocab_filename, True) e.rte_visualize(e_config, data_loader, load_id)
def train_aux_stance(): hp = HPColdStart() hp2 = HPTiny() topic = "hillary" preload_id = ("after_aux", 234) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) voca_size = setting.vocab_size e = Experiment(hp) e.train_aux_stance(hp2, voca_size, stance_data, preload_id)
def train_nil(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_only_{}".format("B") e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) e.train_nli(nli_setting, e_config, data_loader)
def stance_with_consistency(): hp = HPStanceConsistency() topic = "atheism" e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "stance_consistency_{}".format(topic) setting = shared_setting.TopicTweets2Stance(topic) stance_data = stance_detection.DataLoader(topic, hp.seq_max, setting.vocab_filename) tweet_group = tweet_reader.load_per_user(topic) aux_data = AuxPairLoader(hp.seq_max, setting, tweet_group) voca_size = setting.vocab_size e.train_stance_consistency(voca_size, stance_data, aux_data)
def train_nli_with_reinforce_old(): hp = hyperparams.HPNLI2() e = Experiment(hp) nli_setting = NLI() e_config = ExperimentConfig() e_config.name = "NLI_run_{}".format("retest") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'dense_cls'] #, 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename) load_id = ("interval", "model-48040") e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, True)
def protest_bert(): hp = hyperparams.HPBert() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "protest" e_config.num_epoch = 1 e_config.save_interval = 1 * 60 # 1 minutes e_config.load_names = ['bert'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = protest.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_protest(e_config, data_loader, load_id)
def crs_stance_baseline(): hp = hyperparams.HPCRS() hp.batch_size = 16 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "CRS_{}".format("baseline") e_config.num_epoch = 4 e_config.save_interval = 10 * 60 # 60 minutes e_config.load_names = ['bert'] #, 'reg_dense'] e_config.voca_size = 30522 data_loader = DataGenerator() load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_crs_classify(e_config, data_loader, load_id)
def pred_mnli_anyway(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_AnyA" e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) target_label = 'mismatch' data_id = "{}_1000".format(target_label) load_id = ("NLIEx_AnyA", 'model-2785') e.predict_rf(nli_setting, e_config, data_loader, load_id, data_id)
def pred_snli_ex(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "SNLIEx_B" e_config.load_names = ['bert', 'cls_dense', 'aux_conflict'] data_loader = nli.SNLIDataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("SNLIEx_B", 'model-10275') e.predict_rf(nli_setting, e_config, data_loader, load_id, "test")
def train_nil(): hp = HP() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_only_{}".format("512") e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_nli_ex_0(nli_setting, e_config, data_loader, load_id, False)
def train_adhoc_with_reinforce(): hp = hyperparams.HPAdhoc() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}".format("E") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = ws.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_adhoc(e_config, data_loader, load_id)
def train_nil_cold(): print('train_nil_cold') hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_Cold" e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) saved = e.train_nli_ex_0(nli_setting, e_config, data_loader, None, False) e.test_acc2(nli_setting, e_config, data_loader, saved)
def train_rte(): hp = hyperparams.HPBert() e = Experiment(hp) vocab_filename = "bert_voca.txt" #load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("tlm_simple", "model.ckpt-15000") e_config = ExperimentConfig() e_config.name = "RTE_{}".format("tlm_simple_15000") e_config.num_epoch = 10 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] data_loader = rte.DataLoader(hp.seq_max, vocab_filename, True) e.train_rte(e_config, data_loader, load_id)
def train_nli_with_premade(explain_tag): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLIEx_{}".format("Premade_"+explain_tag) e_config.num_epoch = 1 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert'] #, 'cls_dense'] #, 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_nli_ex_with_premade_data(nli_setting, e_config, data_loader, load_id, explain_tag)
def wikicont_bert(): hp = hyperparams.HPBert() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "WikiContrv2009_only_wiki" e_config.num_epoch = 1 e_config.save_interval = 60 * 60 # 1 minutes e_config.load_names = ['bert'] e_config.valid_freq = 100 vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = Ams18.DataLoader(hp.seq_max, vocab_filename, vocab_size) data_loader.source_collection.collection_type = 'wiki' load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_wiki_contrv(e_config, data_loader, load_id)
def bert_lm_test(): hp = hyperparams.HPQL() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Contrv_{}".format("B") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minuteslm_protest e_config.load_names = ['bert', 'cls'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = ws.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.bert_lm_pos_neg(e_config, data_loader, load_id)
def train_adhoc_fad(): hp = hyperparams.HPFAD() hp.batch_size = 16 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}".format("FAD") e_config.num_epoch = 4 e_config.save_interval = 10 * 60 # 60 minutes e_config.load_names = ['bert'] #, 'reg_dense'] vocab_size = 30522 data_loader = data_sampler.DataLoaderFromFile(hp.batch_size, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') #load_id = ("Adhoc_I2", 'model-290') e.train_adhoc2(e_config, data_loader, load_id)
def test_ql(): hp = hyperparams.HPAdhoc() hp.batch_size = 512 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}".format("C") e_config.num_epoch = 4 e_config.load_names = ['bert', 'cls'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = ws.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.test_ql(e_config, data_loader, load_id)
def run_ql_rank(): hp = hyperparams.HPQL() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}".format("D") e_config.num_epoch = 4 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = ws.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.rank_ql(e_config, data_loader, load_id)
def train_mscore_regression(): hp = hyperparams.HPMscore() e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Contrv_{}".format("C") e_config.num_epoch = 4 e_config.save_interval = 10 * 60 # 10 minutes e_config.load_names = ['bert'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = mscore.DataLoader(hp.seq_max, vocab_filename, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') e.train_controversy_classification(e_config, data_loader, load_id)
def test_nli(): hp = hyperparams.HPBert() e = Experiment(hp) nli_setting = NLI() nli_setting.vocab_size = 30522 nli_setting.vocab_filename = "bert_voca.txt" e_config = ExperimentConfig() e_config.name = "NLI_400k_tlm_simple_wo_hint" e_config.num_epoch = 2 e_config.save_interval = 30 * 60 # 30 minutes e_config.load_names = ['bert', 'cls_dense'] # , 'aux_conflict'] data_loader = nli.DataLoader(hp.seq_max, nli_setting.vocab_filename, True) #saved = "/mnt/scratch/youngwookim/Chair/output/model/runs/NLI_Cold/model-0" saved = "/mnt/scratch/youngwookim/Chair/output/model/runs/NLI_400k_tlm_wo_hint/model-0" saved = '/mnt/scratch/youngwookim/Chair/output/model/runs/NLI_400k_tlm_simple_hint/model-0' print(saved) e.test_acc2(nli_setting, e_config, data_loader, saved)
def run_adhoc_rank_on_robust(): hp = hyperparams.HPAdhoc() hp.batch_size = 512 e = Experiment(hp) e_config = ExperimentConfig() e_config.name = "Adhoc_{}_eval".format("F") e_config.num_epoch = 4 e_config.load_names = ['bert', 'reg_dense'] vocab_size = 30522 vocab_filename = "bert_voca.txt" data_loader = data_sampler.DataLoaderFromFile(hp.batch_size, vocab_size) load_id = ("uncased_L-12_H-768_A-12", 'bert_model.ckpt') load_id = ("Adhoc_E", 'model-58338') e.rank_adhoc(e_config, data_loader, load_id)