def run( lr=0.001, batsize=20, epochs=60, embdim=128, encdim=256, numlayers=1, beamsize=5, dropout=.25, wreg=1e-10, cuda=False, gpu=0, minfreq=2, gradnorm=3., smoothing=0.1, cosine_restarts=1., seed=123456, numcvfolds=6, testfold=-1, # if non-default, must be within number of splits, the chosen value is used for validation reorder_random=False, ): localargs = locals().copy() print(locals()) random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) tt = q.ticktock("script") device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt.tick("loading data") cvfolds = None if testfold == -1 else numcvfolds testfold = None if testfold == -1 else testfold ds = GeoDataset( sentence_encoder=SequenceEncoder(tokenizer=split_tokenizer), min_freq=minfreq, cvfolds=cvfolds, testfold=testfold, reorder_random=reorder_random) print( f"max lens: {ds.maxlen_input} (input) and {ds.maxlen_output} (output)") tt.tock("data loaded") do_rare_stats(ds) # batch = next(iter(train_dl)) # print(batch) # print("input graph") # print(batch.batched_states) model = BasicGenModel(embdim=embdim, hdim=encdim, dropout=dropout, numlayers=numlayers, sentence_encoder=ds.sentence_encoder, query_encoder=ds.query_encoder, feedatt=True) # sentence_rare_tokens = set([ds.sentence_encoder.vocab(i) for i in model.inp_emb.rare_token_ids]) # do_rare_stats(ds, sentence_rare_tokens=sentence_rare_tokens) tfdecoder = SeqDecoder(model, tf_ratio=1., eval=[ CELoss(ignore_index=0, mode="logprobs", smoothing=smoothing), SeqAccuracies(), TreeAccuracy(tensor2tree=partial( tensor2tree, D=ds.query_encoder.vocab), orderless={"and"}) ]) losses = make_array_of_metrics("loss", "elem_acc", "seq_acc", "tree_acc") freedecoder = SeqDecoder(model, maxtime=100, tf_ratio=0., eval=[ SeqAccuracies(), TreeAccuracy(tensor2tree=partial( tensor2tree, D=ds.query_encoder.vocab), orderless={"and"}) ]) vlosses = make_array_of_metrics("seq_acc", "tree_acc") beamdecoder = BeamDecoder(model, maxtime=100, beamsize=beamsize, copy_deep=True, eval=[SeqAccuracies()], eval_beam=[ TreeAccuracy(tensor2tree=partial( tensor2tree, D=ds.query_encoder.vocab), orderless={"and"}) ]) beamlosses = make_array_of_metrics("seq_acc", "tree_acc", "tree_acc_at_last") # 4. define optim # optim = torch.optim.Adam(trainable_params, lr=lr, weight_decay=wreg) optim = torch.optim.Adam(tfdecoder.parameters(), lr=lr, weight_decay=wreg) # lr schedule if cosine_restarts >= 0: # t_max = epochs * len(train_dl) t_max = epochs print(f"Total number of updates: {t_max}") lr_schedule = q.WarmupCosineWithHardRestartsSchedule( optim, 0, t_max, cycles=cosine_restarts) reduce_lr = [lambda: lr_schedule.step()] else: reduce_lr = [] # 6. define training function clipgradnorm = lambda: torch.nn.utils.clip_grad_norm_( tfdecoder.parameters(), gradnorm) # clipgradnorm = lambda: None trainbatch = partial(q.train_batch, on_before_optim_step=[clipgradnorm]) train_on = "train" valid_on = "test" if testfold is None else "valid" trainepoch = partial(q.train_epoch, model=tfdecoder, dataloader=ds.dataloader(train_on, batsize, shuffle=True), optim=optim, losses=losses, _train_batch=trainbatch, device=device, on_end=reduce_lr) # 7. define validation function (using partial) validepoch = partial(q.test_epoch, model=freedecoder, dataloader=ds.dataloader(valid_on, batsize, shuffle=False), losses=vlosses, device=device) # validepoch = partial(q.test_epoch, model=freedecoder, dataloader=valid_dl, losses=vlosses, device=device) # p = q.save_run(freedecoder, localargs, filepath=__file__) # q.save_dataset(ds, p) # _freedecoder, _localargs = q.load_run(p) # _ds = q.load_dataset(p) # sys.exit() # 7. run training tt.tick("training") q.run_training(run_train_epoch=trainepoch, run_valid_epoch=validepoch, max_epochs=epochs) tt.tock("done training") if testfold is not None: return vlosses[1].get_epoch_error() # testing tt.tick("testing") testresults = q.test_epoch(model=beamdecoder, dataloader=ds.dataloader("test", batsize), losses=beamlosses, device=device) print("validation test results: ", testresults) tt.tock("tested") tt.tick("testing") testresults = q.test_epoch(model=beamdecoder, dataloader=ds.dataloader("test", batsize), losses=beamlosses, device=device) print("test results: ", testresults) tt.tock("tested") # save model? tosave = input( "Save this model? 'y(es)'=Yes, <int>=overwrite previous, otherwise=No) \n>" ) # if True: # overwrite = None if tosave.lower() == "y" or tosave.lower() == "yes" or re.match( "\d+", tosave.lower()): overwrite = int(tosave) if re.match("\d+", tosave) else None p = q.save_run(model, localargs, filepath=__file__, overwrite=overwrite) q.save_dataset(ds, p) _model, _localargs = q.load_run(p) _ds = q.load_dataset(p) _freedecoder = BeamDecoder(_model, maxtime=100, beamsize=beamsize, copy_deep=True, eval=[SeqAccuracies()], eval_beam=[ TreeAccuracy(tensor2tree=partial( tensor2tree, D=ds.query_encoder.vocab), orderless={"and"}) ]) # testing tt.tick("testing reloaded") _testresults = q.test_epoch(model=_freedecoder, dataloader=_ds.dataloader("test", batsize), losses=beamlosses, device=device) print(_testresults) tt.tock("tested") # save predictions _, testpreds = q.eval_loop(_freedecoder, ds.dataloader("test", batsize=batsize, shuffle=False), device=device) testout = get_outputs_for_save(testpreds) _, trainpreds = q.eval_loop(_freedecoder, ds.dataloader("train", batsize=batsize, shuffle=False), device=device) trainout = get_outputs_for_save(trainpreds) with open(os.path.join(p, "trainpreds.json"), "w") as f: ujson.dump(trainout, f) with open(os.path.join(p, "testpreds.json"), "w") as f: ujson.dump(testout, f)
def run( lr=0.001, batsize=20, epochs=70, embdim=128, encdim=400, numlayers=1, beamsize=5, dropout=.5, wreg=1e-10, cuda=False, gpu=0, minfreq=2, gradnorm=3., smoothing=0.1, cosine_restarts=1., seed=123456, ): localargs = locals().copy() print(locals()) torch.manual_seed(seed) np.random.seed(seed) tt = q.ticktock("script") device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt.tick("loading data") ds = GeoDatasetRank() print( f"max lens: {ds.maxlen_input} (input) and {ds.maxlen_output} (output)") tt.tock("data loaded") # do_rare_stats(ds) # model = TreeRankModel(embdim=embdim, hdim=encdim, dropout=dropout, numlayers=numlayers, # sentence_encoder=ds.sentence_encoder, query_encoder=ds.query_encoder) # model = ParikhRankModel(embdim=encdim, dropout=dropout, sentence_encoder=ds.sentence_encoder, query_encoder=ds.query_encoder) # sentence_rare_tokens = set([ds.sentence_encoder.vocab(i) for i in model.inp_emb.rare_token_ids]) # do_rare_stats(ds, sentence_rare_tokens=sentence_rare_tokens) ranker = Ranker(model, eval=[BCELoss(mode="logits", smoothing=smoothing)], evalseq=[ SeqAccuracies(), TreeAccuracy(tensor2tree=partial( tensor2tree, D=ds.query_encoder.vocab), orderless={"and", "or"}) ]) losses = make_array_of_metrics("loss", "seq_acc", "tree_acc") vlosses = make_array_of_metrics("seq_acc", "tree_acc") # 4. define optim # optim = torch.optim.Adam(trainable_params, lr=lr, weight_decay=wreg) optim = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wreg) # lr schedule if cosine_restarts >= 0: # t_max = epochs * len(train_dl) t_max = epochs print(f"Total number of updates: {t_max}") lr_schedule = q.WarmupCosineWithHardRestartsSchedule( optim, 0, t_max, cycles=cosine_restarts) reduce_lr = [lambda: lr_schedule.step()] else: reduce_lr = [] # 6. define training function clipgradnorm = lambda: torch.nn.utils.clip_grad_norm_( model.parameters(), gradnorm) # clipgradnorm = lambda: None trainbatch = partial(q.train_batch, on_before_optim_step=[clipgradnorm]) trainepoch = partial(q.train_epoch, model=ranker, dataloader=ds.dataloader("train", batsize), optim=optim, losses=losses, _train_batch=trainbatch, device=device, on_end=reduce_lr) # 7. define validation function (using partial) validepoch = partial(q.test_epoch, model=ranker, dataloader=ds.dataloader("test", batsize), losses=vlosses, device=device) # 7. run training tt.tick("training") q.run_training(run_train_epoch=trainepoch, run_valid_epoch=validepoch, max_epochs=epochs) tt.tock("done training") # testing tt.tick("testing") testresults = q.test_epoch(model=ranker, dataloader=ds.dataloader("test", batsize), losses=vlosses, device=device) print("validation test results: ", testresults) tt.tock("tested") tt.tick("testing") testresults = q.test_epoch(model=ranker, dataloader=ds.dataloader("test", batsize), losses=vlosses, device=device) print("test results: ", testresults) tt.tock("tested") # save model? tosave = input( "Save this model? 'y(es)'=Yes, <int>=overwrite previous, otherwise=No) \n>" ) # if True: # overwrite = None if tosave.lower() == "y" or tosave.lower() == "yes" or re.match( "\d+", tosave.lower()): overwrite = int(tosave) if re.match("\d+", tosave) else None p = q.save_run(model, localargs, filepath=__file__, overwrite=overwrite) q.save_dataset(ds, p) _model, _localargs = q.load_run(p) _ds = q.load_dataset(p) _freedecoder = BeamDecoder( _model, maxtime=100, beamsize=beamsize, copy_deep=True, eval=[SeqAccuracies()], eval_beam=[ TreeAccuracy(tensor2tree=partial(tensor2tree, D=ds.query_encoder.vocab), orderless={"op:and", "SW:concat"}) ]) # testing tt.tick("testing reloaded") _testresults = q.test_epoch(model=_freedecoder, dataloader=_ds.dataloader("test", batsize), losses=beamlosses, device=device) print(_testresults) tt.tock("tested") # save predictions _, testpreds = q.eval_loop(_freedecoder, ds.dataloader("test", batsize=batsize, shuffle=False), device=device) testout = get_outputs_for_save(testpreds) _, trainpreds = q.eval_loop(_freedecoder, ds.dataloader("train", batsize=batsize, shuffle=False), device=device) trainout = get_outputs_for_save(trainpreds) with open(os.path.join(p, "trainpreds.json"), "w") as f: ujson.dump(trainout, f) with open(os.path.join(p, "testpreds.json"), "w") as f: ujson.dump(testout, f)
def run(lr=0.001, batsize=50, epochs=100, embdim=100, encdim=100, numlayers=1, beamsize=1, dropout=.2, wreg=1e-10, cuda=False, gpu=0, minfreq=3, gradnorm=3., cosine_restarts=1., seed=123456, ): localargs = locals().copy() print(locals()) torch.manual_seed(seed) np.random.seed(seed) tt = q.ticktock("script") device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt.tick("loading data") ds = LCQuaDnoENTDataset(sentence_encoder=SequenceEncoder(tokenizer=split_tokenizer), min_freq=minfreq) print(f"max lens: {ds.maxlen_input} (input) and {ds.maxlen_output} (output)") tt.tock("data loaded") do_rare_stats(ds) # batch = next(iter(train_dl)) # print(batch) # print("input graph") # print(batch.batched_states) model = BasicGenModel(embdim=embdim, hdim=encdim, dropout=dropout, numlayers=numlayers, sentence_encoder=ds.sentence_encoder, query_encoder=ds.query_encoder, feedatt=True) # sentence_rare_tokens = set([ds.sentence_encoder.vocab(i) for i in model.inp_emb.rare_token_ids]) # do_rare_stats(ds, sentence_rare_tokens=sentence_rare_tokens) tfdecoder = SeqDecoder(model, tf_ratio=1., eval=[CELoss(ignore_index=0, mode="logprobs"), SeqAccuracies(), TreeAccuracy(tensor2tree=partial(tensor2tree, D=ds.query_encoder.vocab), orderless={"select", "count", "ask"})]) losses = make_array_of_metrics("loss", "elem_acc", "seq_acc", "tree_acc") # beamdecoder = BeamActionSeqDecoder(tfdecoder.model, beamsize=beamsize, maxsteps=50) if beamsize == 1: freedecoder = SeqDecoder(model, maxtime=40, tf_ratio=0., eval=[SeqAccuracies(), TreeAccuracy(tensor2tree=partial(tensor2tree, D=ds.query_encoder.vocab), orderless={"select", "count", "ask"})]) vlosses = make_array_of_metrics("seq_acc", "tree_acc") else: freedecoder = BeamDecoder(model, maxtime=30, beamsize=beamsize, eval=[SeqAccuracies()], eval_beam=[TreeAccuracy(tensor2tree=partial(tensor2tree, D=ds.query_encoder.vocab), orderless={"select", "count", "ask"})]) vlosses = make_array_of_metrics("seq_acc", "tree_acc", "tree_acc_at_last") # # test # tt.tick("doing one epoch") # for batch in iter(train_dl): # batch = batch.to(device) # ttt.tick("start batch") # # with torch.no_grad(): # out = tfdecoder(batch) # ttt.tock("end batch") # tt.tock("done one epoch") # print(out) # sys.exit() # beamdecoder(next(iter(train_dl))) # print(dict(tfdecoder.named_parameters()).keys()) losses = make_array_of_metrics("loss", "elem_acc", "seq_acc", "tree_acc") vlosses = make_array_of_metrics("seq_acc", "tree_acc") # if beamsize >= 3: # vlosses = make_loss_array("seq_acc", "tree_acc", "tree_acc_at3", "tree_acc_at_last") # else: # vlosses = make_loss_array("seq_acc", "tree_acc", "tree_acc_at_last") # trainable_params = tfdecoder.named_parameters() # exclude_params = set() # exclude_params.add("model.model.inp_emb.emb.weight") # don't train input embeddings if doing glove # trainable_params = [v for k, v in trainable_params if k not in exclude_params] # 4. define optim # optim = torch.optim.Adam(trainable_params, lr=lr, weight_decay=wreg) optim = torch.optim.Adam(tfdecoder.parameters(), lr=lr, weight_decay=wreg) # lr schedule if cosine_restarts >= 0: # t_max = epochs * len(train_dl) t_max = epochs print(f"Total number of updates: {t_max}") lr_schedule = q.WarmupCosineWithHardRestartsSchedule(optim, 0, t_max, cycles=cosine_restarts) reduce_lr = [lambda: lr_schedule.step()] else: reduce_lr = [] # 6. define training function clipgradnorm = lambda: torch.nn.utils.clip_grad_norm_(tfdecoder.parameters(), gradnorm) # clipgradnorm = lambda: None trainbatch = partial(q.train_batch, on_before_optim_step=[clipgradnorm]) trainepoch = partial(q.train_epoch, model=tfdecoder, dataloader=ds.dataloader("train", batsize), optim=optim, losses=losses, _train_batch=trainbatch, device=device, on_end=reduce_lr) # 7. define validation function (using partial) validepoch = partial(q.test_epoch, model=freedecoder, dataloader=ds.dataloader("valid", batsize), losses=vlosses, device=device) # validepoch = partial(q.test_epoch, model=freedecoder, dataloader=valid_dl, losses=vlosses, device=device) # p = q.save_run(freedecoder, localargs, filepath=__file__) # q.save_dataset(ds, p) # _freedecoder, _localargs = q.load_run(p) # _ds = q.load_dataset(p) # sys.exit() # 7. run training tt.tick("training") q.run_training(run_train_epoch=trainepoch, run_valid_epoch=validepoch, max_epochs=epochs) tt.tock("done training") # testing tt.tick("testing") testresults = q.test_epoch(model=freedecoder, dataloader=ds.dataloader("valid", batsize), losses=vlosses, device=device) print("validation test results: ", testresults) tt.tock("tested") tt.tick("testing") testresults = q.test_epoch(model=freedecoder, dataloader=ds.dataloader("test", batsize), losses=vlosses, device=device) print("test results: ", testresults) tt.tock("tested") # save model? tosave = input("Save this model? 'y(es)'=Yes, <int>=overwrite previous, otherwise=No) \n>") if tosave.lower() == "y" or tosave.lower() == "yes" or re.match("\d+", tosave.lower()): overwrite = int(tosave) if re.match("\d+", tosave) else None p = q.save_run(model, localargs, filepath=__file__, overwrite=overwrite) q.save_dataset(ds, p) # region reload _model, _localargs = q.load_run(p) _ds = q.load_dataset(p) freedecoder.model = _model # testing tt.tick("testing reloaded") _testresults = q.test_epoch(model=freedecoder, dataloader=_ds.dataloader("test", batsize), losses=vlosses, device=device) print(_testresults) assert(testresults == _testresults) tt.tock("tested") # endregion # save predictions trainpreds = q.eval_loop(freedecoder, ds.dataloader("train", batsize=batsize, shuffle=False), device=device) validpreds = q.eval_loop(freedecoder, ds.dataloader("valid", batsize=batsize, shuffle=False), device=device) testpreds = q.eval_loop(freedecoder, ds.dataloader("test", batsize=batsize, shuffle=False), device=device) trainpreds = get_arrays_to_save(trainpreds[1]) validpreds = get_arrays_to_save(validpreds[1]) testpreds = get_arrays_to_save(testpreds[1]) for fname, content in [("trainpreds.npz", trainpreds), ("validpreds.npz", validpreds), ("testpreds.npz", testpreds)]: np.savez(os.path.join(p, fname), **content) return testresults
def run( lr=0.001, enclrmul=0.1, hdim=768, numlayers=8, numheads=12, dropout=0.1, wreg=0., batsize=10, epochs=100, warmup=0, sustain=0, cosinelr=False, gradacc=1, gradnorm=100, patience=5, validinter=3, seed=87646464, gpu=-1, datamode="single", decodemode="single", # "full", "ltr" (left to right), "single", "entropy-single" trainonvalid=False, ): settings = locals().copy() print(json.dumps(settings, indent=4)) random.seed(seed) torch.manual_seed(seed) np.random.seed(seed) device = torch.device("cpu") if gpu < 0 else torch.device(gpu) tt = q.ticktock("script") tt.tick("loading") tds, vds, xds, tds_seq, vds_seq, xds_seq, nltok, flenc, orderless = load_ds( "restaurants", mode=datamode, trainonvalid=trainonvalid) tt.tock("loaded") tdl = DataLoader(tds, batch_size=batsize, shuffle=True, collate_fn=collate_fn) vdl = DataLoader(vds, batch_size=batsize, shuffle=False, collate_fn=collate_fn) xdl = DataLoader(xds, batch_size=batsize, shuffle=False, collate_fn=collate_fn) tdl_seq = DataLoader(tds_seq, batch_size=batsize, shuffle=True, collate_fn=autocollate) vdl_seq = DataLoader(vds_seq, batch_size=batsize, shuffle=False, collate_fn=autocollate) xdl_seq = DataLoader(xds_seq, batch_size=batsize, shuffle=False, collate_fn=autocollate) # model tagger = TransformerTagger(hdim, flenc.vocab, numlayers, numheads, dropout) tagmodel = TreeInsertionTaggerModel(tagger) decodermodel = TreeInsertionDecoder(tagger, seqenc=flenc, maxsteps=50, max_tree_size=30, mode=decodemode) decodermodel = TreeInsertionDecoderTrainModel(decodermodel) # batch = next(iter(tdl)) # out = tagmodel(*batch) tmetrics = make_array_of_metrics("loss", "elemrecall", "allrecall", "entropyrecall", reduction="mean") vmetrics = make_array_of_metrics("loss", "elemrecall", "allrecall", "entropyrecall", reduction="mean") tseqmetrics = make_array_of_metrics("treeacc", reduction="mean") vseqmetrics = make_array_of_metrics("treeacc", reduction="mean") xmetrics = make_array_of_metrics("treeacc", reduction="mean") # region parameters def get_parameters(m, _lr, _enclrmul): bertparams = [] otherparams = [] for k, v in m.named_parameters(): if "bert_model." in k: bertparams.append(v) else: otherparams.append(v) if len(bertparams) == 0: raise Exception("No encoder parameters found!") paramgroups = [{ "params": bertparams, "lr": _lr * _enclrmul }, { "params": otherparams }] return paramgroups # endregion def get_optim(_m, _lr, _enclrmul, _wreg=0): paramgroups = get_parameters(_m, _lr=lr, _enclrmul=_enclrmul) optim = torch.optim.Adam(paramgroups, lr=lr, weight_decay=_wreg) return optim def clipgradnorm(_m=None, _norm=None): torch.nn.utils.clip_grad_norm_(_m.parameters(), _norm) eyt = q.EarlyStopper(vseqmetrics[-1], patience=patience, min_epochs=30, more_is_better=True, remember_f=lambda: deepcopy(tagger)) # def wandb_logger(): # d = {} # for name, loss in zip(["loss", "elem_acc", "seq_acc", "tree_acc"], metrics): # d["train_"+name] = loss.get_epoch_error() # for name, loss in zip(["seq_acc", "tree_acc"], vmetrics): # d["valid_"+name] = loss.get_epoch_error() # wandb.log(d) t_max = epochs optim = get_optim(tagger, lr, enclrmul, wreg) print(f"Total number of updates: {t_max} .") if cosinelr: lr_schedule = q.sched.Linear(steps=warmup) >> q.sched.Cosine( steps=t_max - warmup) >> 0. else: lr_schedule = q.sched.Linear(steps=warmup) >> 1. lr_schedule = q.sched.LRSchedule(optim, lr_schedule) trainbatch = partial( q.train_batch, gradient_accumulation_steps=gradacc, on_before_optim_step=[lambda: clipgradnorm(_m=tagger, _norm=gradnorm)]) trainepoch = partial(q.train_epoch, model=tagmodel, dataloader=tdl, optim=optim, losses=tmetrics, device=device, _train_batch=trainbatch, on_end=[lambda: lr_schedule.step()]) trainseqepoch = partial(q.test_epoch, model=decodermodel, losses=tseqmetrics, dataloader=tdl_seq, device=device) validepoch = partial(q.test_epoch, model=decodermodel, losses=vseqmetrics, dataloader=vdl_seq, device=device, on_end=[lambda: eyt.on_epoch_end()]) # validepoch() # TODO: remove this after debugging tt.tick("training") q.run_training(run_train_epoch=trainepoch, run_valid_epoch=[trainseqepoch, validepoch], max_epochs=epochs, check_stop=[lambda: eyt.check_stop()], validinter=validinter) tt.tock("done training") tt.msg("reloading best") if eyt.remembered is not None: decodermodel.model.tagger = eyt.remembered tagmodel.tagger = eyt.remembered tt.tick("running test") testepoch = partial(q.test_epoch, model=decodermodel, losses=xmetrics, dataloader=xdl_seq, device=device) print(testepoch()) tt.tock() # inspect predictions validepoch = partial(q.test_epoch, model=tagmodel, losses=vmetrics, dataloader=vdl, device=device) print(validepoch()) inps, outs = q.eval_loop(tagmodel, vdl, device=device) # print(outs) doexit = False for i in range(len(inps[0])): for j in range(len(inps[0][i])): ui = input("next? (ENTER for next/anything else to exit)>>>") if ui != "": doexit = True break question = " ".join(nltok.convert_ids_to_tokens(inps[0][i][j])) out_toks = flenc.vocab.tostr( inps[1][i][j].detach().cpu().numpy()).split(" ") iscorrect = True lines = [] for k, out_tok in enumerate(out_toks): gold_toks_for_k = inps[3][i][j][k].detach().cpu().nonzero()[:, 0] if len(gold_toks_for_k) > 0: gold_toks_for_k = flenc.vocab.tostr(gold_toks_for_k).split( " ") else: gold_toks_for_k = [""] isopen = inps[2][i][j][k] isopen = isopen.detach().cpu().item() pred_tok = outs[1][i][j][k].max(-1)[1].detach().cpu().item() pred_tok = flenc.vocab(pred_tok) pred_tok_correct = pred_tok in gold_toks_for_k or not isopen if not pred_tok_correct: iscorrect = False entropy = torch.softmax(outs[1][i][j][k], -1).clamp_min(1e-6) entropy = -(entropy * torch.log(entropy)).sum().item() lines.append( f"{out_tok:25} [{isopen:1}] >> {f'{pred_tok} ({entropy:.3f})':35} {'!!' if not pred_tok_correct else ' '} [{','.join(gold_toks_for_k) if isopen else ''}]" ) print(f"{question} {'!!WRONG!!' if not iscorrect else ''}") for line in lines: print(line) if doexit: break
def run_span_borders( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, evalbatsize=-1, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, dim=200, numlayers=1, cuda=False, gpu=0, savep="exp_bilstm_span_borders_", datafrac=1., vanillaemb=False, embdim=300, sched="cos", warmup=0.1, cycles=0.5, ): settings = locals().copy() print(locals()) if evalbatsize < 0: evalbatsize = batsize if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BiLSTM") tt.tick("loading data") data = load_data(which="span/borders", datafrac=datafrac) trainds, devds, testds = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:-1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:-1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) # endregion # region model tt.tick("creating model") # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") bert = BertModel.from_pretrained("bert-base-uncased") emb = bert.embeddings.word_embeddings if vanillaemb: tt.msg("using vanilla emb of size {}".format(embdim)) emb = torch.nn.Embedding(emb.weight.size(0), embdim) else: embdim = bert.config.hidden_size # inpD = tokenizer.vocab # q.WordEmb.masktoken = "[PAD]" # emb = q.WordEmb(embdim, worddic=inpD) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in_shared=dropout) spandet = BorderSpanDetector(emb, bilstm, dim * 2, dropout=dropout) spandet.to(device) tt.tock("model created") # endregion # region training totalsteps = len(trainloader) * epochs params = spandet.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) # optim = torch.optim.Adam(spandet.parameters(), lr=lr, weight_decay=wreg) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) outlen = trainloader.dataset.tensors[0].size(1) spandet.outlen = outlen # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) tt.tock("done")
def run_relations( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, savep="exp_bilstm_rels_", test=False, datafrac=1., vanillaemb=False, gloveemb=True, embdim=300, dim=300, numlayers=2, warmup=0.01, cycles=0.5, sched="cos", evalbatsize=-1, classweighted=False, ): print(locals()) settings = locals().copy() if evalbatsize < 0: evalbatsize = batsize if test: epochs = 0 if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data assert (not gloveemb or not vanillaemb) tt = q.ticktock("script") tt.msg("running relation classifier with BiLSTM") tt.tick("loading data") data = load_data(which="rel+borders", retrelD=True, datafrac=datafrac, wordlevel=gloveemb, rettokD=True) trainds, devds, testds, relD, tokD = data if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] relcounts = torch.zeros(max(relD.values()) + 1) trainrelcounts = torch.bincount(trainds.tensors[1]) relcounts[:len(trainrelcounts)] += trainrelcounts.float() tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("making model") if vanillaemb: bert = BertModel.from_pretrained("bert-base-uncased") emb = bert.embeddings.word_embeddings tt.msg("using vanilla emb of size {}".format(embdim)) emb = torch.nn.Embedding(emb.weight.size(0), embdim) elif gloveemb: emb = q.WordEmb.load_glove("glove.50d", selectD=tokD) else: bert = BertModel.from_pretrained("bert-base-uncased") emb = bert.embeddings.word_embeddings embdim = bert.config.hidden_size bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in=dropout) # bilstm = torch.nn.LSTM(embdim, dim, batch_first=True, num_layers=numlayers, bidirectional=True, dropout=dropout) m = RelationClassifier(emb=emb, bilstm=bilstm, dim=dim * 2, relD=relD, dropout=dropout) m.to(device) tt.tock("made model") # endregion # region training totalsteps = len(trainloader) * epochs params = m.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) # optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=schedmap[sched]) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) losses = [ q.SmoothedCELoss(smoothing=smoothing, weight=1 / relcounts.clamp_min(1e-6) if classweighted else None), q.Accuracy() ] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_both( lr=DEFAULT_LR, dropout=.5, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, evalbatsize=-1, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskmention=False, warmup=-1., sched="ang", cycles=-1., savep="exp_bert_both_", test=False, freezeemb=False, large=False, datafrac=1., savemodel=False, ): settings = locals().copy() print(locals()) tt = q.ticktock("script") if evalbatsize < 0: evalbatsize = batsize tt.msg("running borders and rel classifier with BERT") if test: epochs = 0 if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") if cycles == -1: if sched == "cos": cycles = 0.5 elif sched in ["cosrestart", "coshardrestart"]: cycles = 1.0 # region data tt.tick("loading data") data = load_data(which="forboth", retrelD=True, datafrac=datafrac) trainds, devds, testds, relD = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) print("number of relations: {}".format(len(relD))) # endregion # region model tt.tick("loading BERT") whichbert = "bert-base-uncased" if large: whichbert = "bert-large-uncased" bert = BertModel.from_pretrained(whichbert) m = BordersAndRelationClassifier(bert, relD, dropout=dropout, mask_entity_mention=maskmention) m.to(device) tt.tock("loaded BERT") # endregion # region training totalsteps = len(trainloader) * epochs assert (initwreg == 0.) initl2penalty = InitL2Penalty(bert, factor=q.hyperparam(initwreg)) params = [] for paramname, param in m.named_parameters(): if paramname.startswith("bert.embeddings.word_embeddings"): if not freezeemb: params.append(param) else: params.append(param) sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) tmodel = BordersAndRelationLosses(m, cesmoothing=smoothing) # xmodel = BordersAndRelationLosses(m, cesmoothing=smoothing) # losses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] # xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] tlosses = [q.SelectedLinearLoss(i) for i in range(7)] xlosses = [q.SelectedLinearLoss(i) for i in range(7)] trainlosses = [q.LossWrapper(l) for l in tlosses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=tmodel, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=tmodel, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=tmodel, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") m.clip_len = True q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) settings["testres"] = testres tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) print(savedir) # save model if savemodel: torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions m.clip_len = False # TEST data testpreds = q.eval_loop(m, evalloader, device=device) borderpreds = testpreds[0].cpu().detach().numpy() relpreds = testpreds[1].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), borderpreds) np.save(os.path.join(savedir, "relpreds.test.npy"), relpreds) # DEV data testpreds = q.eval_loop(m, evalloader_dev, device=device) borderpreds = testpreds[0].cpu().detach().numpy() relpreds = testpreds[1].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), borderpreds) np.save(os.path.join(savedir, "relpreds.dev.npy"), relpreds) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_relations( lr=DEFAULT_LR, dropout=.5, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, warmup=-1., sched="ang", savep="exp_bert_rels_", test=False, freezeemb=False, ): settings = locals().copy() if test: epochs = 0 print(locals()) if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running relation classifier with BERT") tt.tick("loading data") data = load_data(which="rel+borders", retrelD=True) trainds, devds, testds, relD = data if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=batsize, shuffle=False) testloader = DataLoader(testds, batch_size=batsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=batsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=batsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("loading BERT") bert = BertModel.from_pretrained("bert-base-uncased") m = RelationClassifier(bert, relD, dropout=dropout) m.to(device) tt.tock("loaded BERT") # endregion # region training totalsteps = len(trainloader) * epochs params = [] for paramname, param in m.named_parameters(): if paramname.startswith("bert.embeddings.word_embeddings"): if not freezeemb: params.append(param) else: params.append(param) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=schedmap[sched], init_weight_decay=initwreg) losses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_span_borders( lr=DEFAULT_LR, dropout=.5, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, warmup=-1., sched="ang", savep="exp_bert_span_borders_", freezeemb=False, ): settings = locals().copy() print(locals()) if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BERT") tt.tick("loading data") data = load_data(which="span/borders") trainds, devds, testds = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=batsize, shuffle=False) testloader = DataLoader(testds, batch_size=batsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:-1]) evalloader = DataLoader(evalds, batch_size=batsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:-1]) evalloader_dev = DataLoader(evalds_dev, batch_size=batsize, shuffle=False) # endregion # region model tt.tick("loading BERT") bert = BertModel.from_pretrained("bert-base-uncased") spandet = BorderSpanDetector(bert, dropout=dropout) spandet.to(device) tt.tock("loaded BERT") # endregion # region training totalsteps = len(trainloader) * epochs params = [] for paramname, param in spandet.named_parameters(): if paramname.startswith("bert.embeddings.word_embeddings"): if not freezeemb: params.append(param) else: params.append(param) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=schedmap[sched]) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(reduction="none"), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(reduction="none"), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.tock("done")
def run_relations( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, savep="exp_bilstm_rels_", test=False, datafrac=1., glove=False, embdim=50, dim=300, numlayers=2, warmup=0.0, cycles=0.5, sched="cos", evalbatsize=-1, classweighted=False, fixembed=False, ): print(locals()) settings = locals().copy() if evalbatsize < 0: evalbatsize = batsize if test: epochs = 0 if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running relation classifier with BiLSTM") tt.tick("loading data") data = load_data(which="wordmat,wordborders,rels", datafrac=datafrac, retrelD=True) trainds, devds, testds, wD, relD = data rev_wD = {v: k for k, v in wD.items()} def pp(ids): ret = " ".join( [rev_wD[idse.item()] for idse in ids if idse.item() != 0]) return ret print(pp(trainds.tensors[0][0])) print(trainds.tensors[1][0]) if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds, D=wD) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] for i in range(10): question = trainds.tensors[0][i] print(pp(question)) print() for i in range(10): question = devds.tensors[0][i] print(pp(question)) print() for i in range(10): question = testds.tensors[0][i] print(pp(question)) relcounts = torch.zeros(max(relD.values()) + 1) trainrelcounts = torch.tensor( np.bincount(trainds.tensors[1].detach().cpu().numpy())) relcounts[:len(trainrelcounts)] += trainrelcounts.float() tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("making model") emb = q.WordEmb(embdim, worddic=wD) if glove: print("using glove") stoi_, vectors_, dim = torch.load( "../../data/buboqa/data/sq_glove300d.pt") # map vectors from custom glove ids to wD ids vectors = torch.zeros(max(wD.values()) + 1, embdim, device=vectors_.device, dtype=vectors_.dtype) stoi = {} for k, v in stoi_.items(): if k in wD: vectors[wD[k]] = vectors_[v] stoi[k] = wD[k] print("{} words in stoi that are in wD".format(len(stoi))) gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors) # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD) if fixembed: gloveemb.freeze() emb.freeze() emb = q.SwitchedWordEmb(emb).override(gloveemb) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in=dropout) # bilstm = torch.nn.LSTM(embdim, dim, batch_first=True, num_layers=numlayers, bidirectional=True, dropout=dropout) m = RelationClassifier(emb=emb, bilstm=bilstm, dim=dim, relD=relD, dropout=dropout) m.to(device) # model = RelationPrediction(config) tt.tock("made model") # endregion # region training totalsteps = len(trainloader) * epochs params = m.parameters() params = [param for param in params if param.requires_grad == True] sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=sched) # optim = torch.optim.Adam(params, lr=lr, weight_decay=wreg) # losses = [ # torch.nn.CrossEntropyLoss(size_average=True), # q.Accuracy() # ] losses = [ q.SmoothedCELoss(smoothing=smoothing, weight=1 / relcounts.clamp_min(1e-6) if classweighted else None), q.Accuracy() ] # xlosses = [ # torch.nn.CrossEntropyLoss(size_average=True), # q.Accuracy() # ] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_span_borders( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, evalbatsize=-1, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, dim=200, numlayers=1, cuda=False, gpu=0, savep="exp_bilstm_span_borders_", datafrac=1., glove=False, fixembed=False, embdim=50, sched="cos", warmup=0.1, cycles=0.5, ): settings = locals().copy() print(locals()) if evalbatsize < 0: evalbatsize = batsize if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BiLSTM") tt.tick("loading data") data = load_data(which="wordmat,wordborders", datafrac=datafrac) trainds, devds, testds, wD = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) # endregion # region model tt.tick("creating model") emb = q.WordEmb(embdim, worddic=wD) if glove: print("using glove") stoi_, vectors_, dim = torch.load( "../../data/buboqa/data/sq_glove300d.pt") # map vectors from custom glove ids to wD ids vectors = torch.zeros(max(wD.values()) + 1, embdim, device=vectors_.device, dtype=vectors_.dtype) stoi = {} for k, v in stoi_.items(): if k in wD: vectors[wD[k]] = vectors_[v] stoi[k] = wD[k] print("{} words in stoi that are in wD".format(len(stoi))) gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors) # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD) if fixembed: gloveemb.freeze() emb = q.SwitchedWordEmb(emb).override(gloveemb) # inpD = tokenizer.vocab # q.WordEmb.masktoken = "[PAD]" # emb = q.WordEmb(embdim, worddic=inpD) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in_shared=dropout) spandet = BorderSpanDetector(emb, bilstm, dim * 2, dropout=dropout) spandet.to(device) tt.tock("model created") # endregion # region training totalsteps = len(trainloader) * epochs params = spandet.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) # optim = torch.optim.Adam(spandet.parameters(), lr=lr, weight_decay=wreg) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) outlen = trainloader.dataset.tensors[0].size(1) spandet.outlen = outlen # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) tt.tock("done")