def setUp(self): words = "<MASK> <RARE> the a his monkey inception key earlgrey" wdic = dict(zip(words.split(), range(0, len(words.split())))) overwords = "he his her mine cat monkey the interstellar grey key" overwdic = dict( zip(overwords.split(), range(0, len(overwords.split())))) self.baseemb = q.WordEmb(dim=50, worddic=wdic) self.overemb = q.WordEmb(dim=50, worddic=overwdic) self.emb = self.baseemb.override(self.overemb) pass
def test_creation_simple(self): dic = dict(zip(map(chr, range(97, 122)), range(122 - 97))) m = q.WordEmb(10, worddic=dic) embedding, _ = m(Variable(torch.LongTensor([0, 1, 2]))) self.assertEqual(embedding.size(), (3, 10)) trueemb = m.weight.cpu().detach().numpy()[0] self.assertTrue(np.allclose(trueemb, embedding[0].detach().numpy()))
def __init__(self, *dims: int, worddic: dict = None, bias: bool = True, tieweights=False, dropout: float = 0., dropouti: float = 0., dropouth: float = 0., dropoute: float = 0., **kw): super(RNNLayer_LM, self).__init__(**kw) self.dims = dims self.D = worddic self.states = None # make layers self.emb = q.WordEmb(dims[0], worddic=self.D) self.out = q.WordLinout(dims[-1], worddic=self.D) if tieweights: self.out.weight = self.emb.weight self.rnn = self.encodertype(*dims, bidir=False, bias=bias, dropout_in=dropout) self.rnn.ret_all_states = True self.dropout = torch.nn.Dropout(p=dropout) self.dropouti = torch.nn.Dropout(p=dropouti) self.dropoute = torch.nn.Dropout(p=dropoute) self.dropouth = torch.nn.Dropout(p=dropouth)
def test_decoder_shape(self): wdic = "<MASK> a b c d e f g h i j k l m n o p".split() wdic = dict(zip(wdic, range(len(wdic)))) emb = q.WordEmb(10, worddic=wdic) m = q.AYNDecoder(emb, n_max_seq=7, n_layers=3, n_head=2, d_k=4, d_v=6, d_pos_vec=6, d_model=16, d_inner_hid=20, dropout=0) src_seq = q.var(np.random.randint(1, max(wdic.values()), (5, 7))).v src_seq_mask_starts = np.random.randint(1, 7, (5,), dtype="int64") src_seq_mask = np.ones_like(src_seq.data.numpy()) for i in range(5): src_seq_mask[i, :src_seq_mask_starts[i]] = 0 src_seq_mask = q.var(src_seq_mask).v src_seq.masked_fill_(src_seq_mask.byte(), 0) src_pos = q.var(np.arange(0, 7, dtype="int64")).v src_pos = src_pos.unsqueeze(0).repeat(5, 1) ctx = q.var(np.random.random((5, 8, 16)).astype("float32")).v ctx_seq_mask_starts = np.random.randint(1, 8, (5,), dtype="int64") ctx_seq_mask = np.ones((5, 8)) for i in range(5): ctx_seq_mask[i, :ctx_seq_mask_starts[i]] = 0 ctx_seq_mask = -1*q.var(ctx_seq_mask).v.byte()+1 out = m(src_seq, ctx, ctx_seq_mask) print(out) self.assertEqual(out.size(), (5, 7, 16)) loss = out.sum() loss.backward()
def test_it(self): D = "<MASK> <RARE> cat dog person earlgreytea the".split() D = dict(zip(D, range(len(D)))) base = q.WordEmb(50, worddic=D) switched = q.SwitchedWordEmb(base) words = "cat dog person".split() over = q.WordEmb(50, worddic=D) switched.override(over, selectwords=words) x = torch.arange(0, len(D)).unsqueeze(0) y, ymask = switched(x) ybase, _ = base(x) yover, _ = over(x) ymix = torch.tensor([0, 0, 1, 1, 1, 0, 0]).float().unsqueeze(0).unsqueeze(-1) y_ref = ybase * (1 - ymix) + yover * ymix print((y - y_ref).norm()) self.assertTrue(np.allclose(y.detach().numpy(), y_ref.detach().numpy())) print(y.size())
def test_creation_masked(self): dic = dict(zip(map(chr, range(97, 122)), range(1, 122 - 97 + 1))) dic[q.WordEmb.masktoken] = 0 m = q.WordEmb(10, worddic=dic) embedding, mask = m(Variable(torch.LongTensor([0, 1, 2]))) self.assertEqual(embedding.size(), (3, 10)) trueemb = m.weight.cpu().detach().numpy()[1] self.assertTrue(np.allclose(trueemb, embedding[1].detach().numpy())) self.assertTrue( np.allclose(embedding[0].detach().numpy(), np.zeros((10, )))) print(mask) self.assertTrue(np.allclose(mask.detach().numpy(), [0, 1, 1]))
def setUp(self): wdic = { "<MASK>": 0, "<RARE>": 1, "the": 10, "a": 5, "his": 50, "abracadabrqmsd--qsdfmqgf-": 6 } wdic2 = { "<MASK>": 0, "<RARE>": 1, "the": 2, "a": 3, "his": 4, "abracadabrqmsd--qsdfmqgf-": 5, "qsdfqsdf": 7 } self.adapted = q.WordEmb(50, worddic=wdic) self.vanilla = q.WordEmb( 50, worddic=wdic, value=self.adapted.embedding.weight.data.numpy()) self.adapted = self.adapted.adapt(wdic2)
def __init__(self, dim=512, worddic=None, numlayers=3, numheads=8, activation=q.GeLU, embedding_dropout=0., attention_dropout=0., residual_dropout=0., word_dropout=0., relpos=True, tie_wordvecs=False, maxlen=512): super(TransformerLM, self).__init__() self.wordemb = q.WordEmb(dim, worddic=worddic, word_dropout=word_dropout) posemb = None if relpos is False: print("using learned absolute position embeddings") posembD = dict(zip(range(maxlen), range(maxlen))) posemb = q.WordEmb(dim, worddic=posembD) self.transformer = q.TransformerDecoder( dim=dim, numlayers=numlayers, numheads=numheads, activation=activation, embedding_dropout=embedding_dropout, attention_dropout=attention_dropout, residual_dropout=residual_dropout, relpos=relpos, noctx=True, maxlen=maxlen, posemb=posemb) q.RecDropout.convert_to_standard_in(self.transformer) self.wordout = q.WordLinout(dim, worddic=worddic) if tie_wordvecs: self.wordout.weight = self.wordemb.weight
def test_it(self): x = np.random.randint(0, 100, (1000, 7)) y_inp = x[:, :-1] y_out = x[:, 1:] wD = dict((chr(xi), xi) for xi in range(100)) ctx = torch.randn(1000, 8, 30) decoder_emb = q.WordEmb(20, worddic=wD) decoder_lstm = q.LSTMCell(20, 30) decoder_att = q.DotAttention() decoder_out = q.WordLinout(60, worddic=wD) decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att, None, decoder_out) decoder_tf = q.TFDecoder(decoder_cell) y = decoder_tf(torch.tensor(x), ctx=ctx) self.assertTrue(y.size(), (1000, 7, 100)) # endregion
def test_it(self): D = "<MASK> [RED] NT(START) NT(a) T(b) NT(c) T(d) T(e) NT(f) T(g) T(h) T(i)" D = dict(zip(D.split(), range(len(D.split())))) tok2act = { k: (2 if k == "[RED]" else 1 if k[:2] == "NT" else 0) for k in D } class CustomCombiner(StackCellCombiner): def forward(self, _x, mask): ret = (_x * mask.unsqueeze(-1).float()).sum( 1) / mask.float().sum(1).unsqueeze(-1).clamp_min(1e-6) ret = ret.detach() # TODO: for grad debugging return ret class CustomWordLinout(q.WordLinout): def update(self, _): pass class Tok2Act(torch.nn.Module): def __init__(self, t2a, D): super(Tok2Act, self).__init__() self.D = D t2a_ = torch.zeros(max(D.values()) + 1).long() for k, v in t2a.items(): t2a_[D[k]] = v self.register_buffer("t2a", t2a_) def forward(self, _x): return self.t2a[_x] embdim = 4 coredim = 5 emb = q.WordEmb(embdim, worddic=D) core = q.LSTMCell(embdim, coredim, dropout_rec=.1) # combiner = BasicCombiner(embdim) combiner = CustomCombiner() att = BasicAttention() out = CustomWordLinout(coredim * 2, worddic=D) tok2act = Tok2Act(tok2act, D) cell = StackCell(emb=emb, tok2act=tok2act, core=core, combiner=combiner, att=att, out=out) ctx = torch.randn(2, 6, coredim) cell.save_ctx(ctx) ex1 = "NT(START) NT(a) T(b) NT(c) T(d) T(e) [RED] NT(f) T(g) T(h) [RED] T(i) [RED]" ex2 = "NT(START) NT(a) NT(c) T(d) T(e) [RED] [RED]" x1 = [D[exi] for exi in ex1.split()] + [0] x2 = [D[exi] for exi in ex2.split()] x2 = x2 + [0] * (len(x1) - len(x2)) x = torch.tensor([x1, x2]) cell._debug_embs = torch.nn.Parameter(torch.zeros(2, len(x1), embdim)) ys = [] for i in range(len(x[0])): y = cell(x[:, i]) ys.append(y) # print(cell._debug_embs) print(cell._debug_embs.size()) l = ys[11][0].sum() l.backward() print(cell._debug_embs.grad) print(cell._stack)
def run_gatedtree( lr=0.01, gradclip=5., batsize=20, epochs=80, embdim=200, encdim=200, numlayer=1, cuda=False, gpu=0, wreg=1e-8, dropout=0.5, smoothing=0.4, goldsmoothing=-0.1, which="geo", relatt=False, ): tt = q.ticktock("script") tt.msg("running gated tree decoder") device = torch.device("cpu") if cuda: device = torch.device("cuda", gpu) # region data tt.tick("generating data") # dss, D = gen_sort_data(seqlen=seqlen, numvoc=numvoc, numex=numex, prepend_inp=False) dss, nlD, flD = gen_datasets(which=which) tloader, vloader, xloader = [ torch.utils.data.DataLoader(ds, batch_size=batsize, shuffle=True) for ds in dss ] seqlen = len(dss[0][0][1]) id2pushpop = torch.zeros(len(flD), dtype=torch.long, device=device) id2pushpop[flD["("]] = +1 id2pushpop[flD[")"]] = -1 tt.tock("data generated") # endregion # region model tt.tick("building model") # source side inpemb = q.WordEmb(embdim, worddic=nlD) encdims = [encdim] * numlayer encoder = q.LSTMEncoder(embdim, *encdims, bidir=False, dropout_in_shared=dropout) # target side decemb = q.WordEmb(embdim, worddic=flD) decinpdim = embdim decdims = [decinpdim] + [encdim] * numlayer dec_core = \ [GatedTreeLSTMCell(decdims[i-1], decdims[i], dropout_in=dropout) for i in range(1, len(decdims))] ### dec_core = TreeRNNDecoderCellCore(*dec_core) if relatt: att = ComboAbsRelAttention(ctxdim=encdim, vecdim=encdim) else: att = BasicAttention() out = torch.nn.Sequential(q.WordLinout(encdim, worddic=flD), # torch.nn.Softmax(-1) ) merge = q.rnn.FwdDecCellMerge(decdims[-1], encdims[-1], outdim=encdim) deccell = TreeRNNDecoderCell(emb=decemb, core=dec_core, att=att, out=out, merge=merge, id2pushpop=id2pushpop) train_dec = q.TFDecoder(deccell) test_dec = q.FreeDecoder(deccell, maxtime=seqlen + 10) train_encdec = EncDec(inpemb, encoder, train_dec) test_encdec = Test_EncDec(inpemb, encoder, test_dec) train_encdec.to(device) test_encdec.to(device) tt.tock("built model") # endregion # region training # losses: if smoothing == 0: ce = q.loss.CELoss(mode="logits", ignore_index=0) elif goldsmoothing < 0.: ce = q.loss.SmoothedCELoss(mode="logits", ignore_index=0, smoothing=smoothing) else: ce = q.loss.DiffSmoothedCELoss(mode="logits", ignore_index=0, alpha=goldsmoothing, beta=smoothing) acc = q.loss.SeqAccuracy(ignore_index=0) elemacc = q.loss.SeqElemAccuracy(ignore_index=0) treeacc = TreeAccuracyLambdaDFPar(flD=flD) # optim optim = torch.optim.RMSprop(train_encdec.parameters(), lr=lr, alpha=0.95, weight_decay=wreg) clipgradnorm = lambda: torch.nn.utils.clip_grad_value_( train_encdec.parameters(), clip_value=gradclip) # lööps batchloop = partial(q.train_batch, on_before_optim_step=[clipgradnorm]) trainloop = partial( q.train_epoch, model=train_encdec, dataloader=tloader, optim=optim, device=device, losses=[q.LossWrapper(ce), q.LossWrapper(elemacc), q.LossWrapper(acc)], print_every_batch=False, _train_batch=batchloop) validloop = partial(q.test_epoch, model=test_encdec, dataloader=vloader, device=device, losses=[q.LossWrapper(treeacc)], print_every_batch=False) tt.tick("training") q.run_training(trainloop, validloop, max_epochs=epochs) tt.tock("trained") tt.tick("testing") test_results = validloop(model=test_encdec, dataloader=xloader) print("Test results (freerunning): {}".format(test_results)) test_results = validloop(model=train_encdec, dataloader=xloader) print("Test results (TF): {}".format(test_results)) tt.tock("tested") # endregion tt.msg("done")
def setUp(self): worddic = "<MASK> <RARE> first second third fourth fifth" worddic = dict(zip(worddic.split(), range(len(worddic.split())))) self.emb1 = q.WordEmb(100, worddic=worddic) self.emb2 = q.WordEmb(100, worddic=worddic)
def run_seq2seq_( lr=0.001, batsize=32, evalbatsize=256, epochs=100, warmup=5, embdim=50, encdim=100, numlayers=2, dropout=.0, wreg=1e-6, cuda=False, gpu=0, ): settings = locals().copy() device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt = q.ticktock("script") tt.msg("running seq2seq on LC-QuAD") tt.tick("loading data") xsm, ysm, teststart, tok2act = load_data() _tok2act = {ysm.RD[k]: v for k, v in tok2act.items()} print("Some examples:") for i in range(5): print( f"{xsm[i]}\n ->{ysm[i]}\n -> {Node.from_transitions(' '.join(ysm[i].split()[1:]), _tok2act)}" ) print("Non-leaf tokens:") print({ysm.RD[k]: v for k, v in tok2act.items() if v > 0}) devstart = teststart - 500 trainds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[:devstart]).long(), torch.tensor(ysm.matrix[:devstart, :-1]).long(), torch.tensor(ysm.matrix[:devstart, 1:]).long()) valds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[devstart:teststart]).long(), torch.tensor(ysm.matrix[devstart:teststart, :-1]).long(), torch.tensor(ysm.matrix[devstart:teststart, 1:]).long()) testds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[teststart:]).long(), torch.tensor(ysm.matrix[teststart:, :-1]).long(), torch.tensor(ysm.matrix[teststart:, 1:]).long()) tt.msg( f"Data splits: train: {len(trainds)}, valid: {len(valds)}, test: {len(testds)}" ) tloader = torch.utils.data.DataLoader(trainds, batch_size=batsize, shuffle=True) vloader = torch.utils.data.DataLoader(valds, batch_size=evalbatsize, shuffle=False) xloader = torch.utils.data.DataLoader(testds, batch_size=evalbatsize, shuffle=False) tt.tock("data loaded") # model enclayers, declayers = numlayers, numlayers decdim = encdim xemb = q.WordEmb(embdim, worddic=xsm.D) yemb = q.WordEmb(embdim, worddic=ysm.D) encdims = [embdim] + [encdim // 2] * enclayers xenc = q.LSTMEncoder(embdim, *encdims[1:], bidir=True, dropout_in_shared=dropout) decdims = [embdim] + [decdim] * declayers dec_core = torch.nn.Sequential(*[ q.LSTMCell(decdims[i - 1], decdims[i], dropout_in=dropout, dropout_rec=dropout) for i in range(1, len(decdims)) ]) yout = q.WordLinout(encdim + decdim, worddic=ysm.D) dec_cell = semparse.rnn.LuongCell(emb=yemb, core=dec_core, out=yout, dropout=dropout) decoder = q.TFDecoder(dec_cell) testdecoder = q.FreeDecoder(dec_cell, maxtime=100) m = Seq2Seq(xemb, xenc, decoder) testm = Seq2Seq(xemb, xenc, testdecoder, test=True) # test model tt.tick("running a batch") test_y = m(*iter(tloader).next()[:-1]) q.batch_reset(m) test_y = testm(*iter(vloader).next()[:-1]) q.batch_reset(m) tt.tock(f"ran a batch: {test_y.size()}") optim = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=wreg) tlosses = [ q.CELoss(mode="logits", ignore_index=0), q.Accuracy(ignore_index=0), q.SeqAccuracy(ignore_index=0) ] xlosses = [ q.CELoss(mode="logits", ignore_index=0), q.Accuracy(ignore_index=0), q.SeqAccuracy(ignore_index=0) ] tlosses = [q.LossWrapper(l) for l in tlosses] vlosses = [q.LossWrapper(l) for l in xlosses] xlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=tloader, optim=optim, losses=tlosses, device=device) devloop = partial(q.test_epoch, model=testm, dataloader=vloader, losses=vlosses, device=device) testloop = partial(q.test_epoch, model=testm, dataloader=xloader, losses=xlosses, device=device) lrplateau = q.util.ReduceLROnPlateau(optim, mode="max", factor=.1, patience=3, cooldown=1, warmup=warmup, threshold=0., verbose=True, eps=1e-9) on_after_valid = [lambda: lrplateau.step(vlosses[1].get_epoch_error())] _devloop = partial(devloop, on_end=on_after_valid) stoptrain = [lambda: all([pg["lr"] <= 1e-7 for pg in optim.param_groups])] tt.tick("training") q.run_training(trainloop, _devloop, max_epochs=epochs, check_stop=stoptrain) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) settings["testres"] = testres tt.tock("tested") devres = devloop() print(devres, vlosses[0].get_epoch_error()) return vlosses[1].get_epoch_error()
def run_relations( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, epochs=10, smoothing=DEFAULT_SMOOTHING, cuda=False, gpu=0, balanced=False, maskentity=False, savep="exp_bilstm_rels_", test=False, datafrac=1., glove=False, embdim=50, dim=300, numlayers=2, warmup=0.0, cycles=0.5, sched="cos", evalbatsize=-1, classweighted=False, fixembed=False, ): print(locals()) settings = locals().copy() if evalbatsize < 0: evalbatsize = batsize if test: epochs = 0 if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running relation classifier with BiLSTM") tt.tick("loading data") data = load_data(which="wordmat,wordborders,rels", datafrac=datafrac, retrelD=True) trainds, devds, testds, wD, relD = data rev_wD = {v: k for k, v in wD.items()} def pp(ids): ret = " ".join( [rev_wD[idse.item()] for idse in ids if idse.item() != 0]) return ret print(pp(trainds.tensors[0][0])) print(trainds.tensors[1][0]) if maskentity: trainds, devds, testds = replace_entity_span(trainds, devds, testds, D=wD) else: trainds, devds, testds = [ TensorDataset(ds.tensors[0], ds.tensors[2]) for ds in [trainds, devds, testds] ] for i in range(10): question = trainds.tensors[0][i] print(pp(question)) print() for i in range(10): question = devds.tensors[0][i] print(pp(question)) print() for i in range(10): question = testds.tensors[0][i] print(pp(question)) relcounts = torch.zeros(max(relD.values()) + 1) trainrelcounts = torch.tensor( np.bincount(trainds.tensors[1].detach().cpu().numpy())) relcounts[:len(trainrelcounts)] += trainrelcounts.float() tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) if test: evalloader = DataLoader(TensorDataset(*evalloader.dataset[:10]), batch_size=batsize, shuffle=False) testloader = DataLoader(TensorDataset(*testloader.dataset[:10]), batch_size=batsize, shuffle=False) # endregion # region model tt.tick("making model") emb = q.WordEmb(embdim, worddic=wD) if glove: print("using glove") stoi_, vectors_, dim = torch.load( "../../data/buboqa/data/sq_glove300d.pt") # map vectors from custom glove ids to wD ids vectors = torch.zeros(max(wD.values()) + 1, embdim, device=vectors_.device, dtype=vectors_.dtype) stoi = {} for k, v in stoi_.items(): if k in wD: vectors[wD[k]] = vectors_[v] stoi[k] = wD[k] print("{} words in stoi that are in wD".format(len(stoi))) gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors) # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD) if fixembed: gloveemb.freeze() emb.freeze() emb = q.SwitchedWordEmb(emb).override(gloveemb) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in=dropout) # bilstm = torch.nn.LSTM(embdim, dim, batch_first=True, num_layers=numlayers, bidirectional=True, dropout=dropout) m = RelationClassifier(emb=emb, bilstm=bilstm, dim=dim, relD=relD, dropout=dropout) m.to(device) # model = RelationPrediction(config) tt.tock("made model") # endregion # region training totalsteps = len(trainloader) * epochs params = m.parameters() params = [param for param in params if param.requires_grad == True] sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, warmup=warmup, t_total=totalsteps, schedule=sched) # optim = torch.optim.Adam(params, lr=lr, weight_decay=wreg) # losses = [ # torch.nn.CrossEntropyLoss(size_average=True), # q.Accuracy() # ] losses = [ q.SmoothedCELoss(smoothing=smoothing, weight=1 / relcounts.clamp_min(1e-6) if classweighted else None), q.Accuracy() ] # xlosses = [ # torch.nn.CrossEntropyLoss(size_average=True), # q.Accuracy() # ] xlosses = [q.SmoothedCELoss(smoothing=smoothing), q.Accuracy()] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=m, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=m, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(m, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) # save relation dictionary # json.dump(relD, open(os.path.join(savedir, "relD.json"), "w")) # save test predictions testpreds = q.eval_loop(m, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.test.npy"), testpreds) testpreds = q.eval_loop(m, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "relpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) # save bert-tokenized questions # tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # with open(os.path.join(savedir, "testquestions.txt"), "w") as f: # for batch in evalloader: # ques, io = batch # ques = ques.numpy() # for question in ques: # qstr = " ".join([x for x in tokenizer.convert_ids_to_tokens(question) if x != "[PAD]"]) # f.write(qstr + "\n") tt.tock("done")
def run_span_borders( lr=DEFAULT_LR, dropout=.3, wreg=DEFAULT_WREG, initwreg=DEFAULT_INITWREG, batsize=DEFAULT_BATSIZE, evalbatsize=-1, epochs=DEFAULT_EPOCHS, smoothing=DEFAULT_SMOOTHING, dim=200, numlayers=1, cuda=False, gpu=0, savep="exp_bilstm_span_borders_", datafrac=1., glove=False, fixembed=False, embdim=50, sched="cos", warmup=0.1, cycles=0.5, ): settings = locals().copy() print(locals()) if evalbatsize < 0: evalbatsize = batsize if cuda: device = torch.device("cuda", gpu) else: device = torch.device("cpu") # region data tt = q.ticktock("script") tt.msg("running span border with BiLSTM") tt.tick("loading data") data = load_data(which="wordmat,wordborders", datafrac=datafrac) trainds, devds, testds, wD = data tt.tock("data loaded") tt.msg("Train/Dev/Test sizes: {} {} {}".format(len(trainds), len(devds), len(testds))) trainloader = DataLoader(trainds, batch_size=batsize, shuffle=True) devloader = DataLoader(devds, batch_size=evalbatsize, shuffle=False) testloader = DataLoader(testds, batch_size=evalbatsize, shuffle=False) evalds = TensorDataset(*testloader.dataset.tensors[:1]) evalloader = DataLoader(evalds, batch_size=evalbatsize, shuffle=False) evalds_dev = TensorDataset(*devloader.dataset.tensors[:1]) evalloader_dev = DataLoader(evalds_dev, batch_size=evalbatsize, shuffle=False) # endregion # region model tt.tick("creating model") emb = q.WordEmb(embdim, worddic=wD) if glove: print("using glove") stoi_, vectors_, dim = torch.load( "../../data/buboqa/data/sq_glove300d.pt") # map vectors from custom glove ids to wD ids vectors = torch.zeros(max(wD.values()) + 1, embdim, device=vectors_.device, dtype=vectors_.dtype) stoi = {} for k, v in stoi_.items(): if k in wD: vectors[wD[k]] = vectors_[v] stoi[k] = wD[k] print("{} words in stoi that are in wD".format(len(stoi))) gloveemb = q.WordEmb(embdim, worddic=stoi, _weight=vectors) # gloveemb = q.WordEmb.load_glove("glove.{}d".format(embdim), selectD=wD) if fixembed: gloveemb.freeze() emb = q.SwitchedWordEmb(emb).override(gloveemb) # inpD = tokenizer.vocab # q.WordEmb.masktoken = "[PAD]" # emb = q.WordEmb(embdim, worddic=inpD) bilstm = q.rnn.LSTMEncoder(embdim, *([dim] * numlayers), bidir=True, dropout_in_shared=dropout) spandet = BorderSpanDetector(emb, bilstm, dim * 2, dropout=dropout) spandet.to(device) tt.tock("model created") # endregion # region training totalsteps = len(trainloader) * epochs params = spandet.parameters() sched = get_schedule(sched, warmup=warmup, t_total=totalsteps, cycles=cycles) optim = BertAdam(params, lr=lr, weight_decay=wreg, schedule=sched) # optim = torch.optim.Adam(spandet.parameters(), lr=lr, weight_decay=wreg) losses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] xlosses = [ q.SmoothedCELoss(smoothing=smoothing), SpanF1Borders(), q.SeqAccuracy() ] trainlosses = [q.LossWrapper(l) for l in losses] devlosses = [q.LossWrapper(l) for l in xlosses] testlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=spandet, dataloader=trainloader, optim=optim, losses=trainlosses, device=device) devloop = partial(q.test_epoch, model=spandet, dataloader=devloader, losses=devlosses, device=device) testloop = partial(q.test_epoch, model=spandet, dataloader=testloader, losses=testlosses, device=device) tt.tick("training") q.run_training(trainloop, devloop, max_epochs=epochs) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) tt.tock("tested") if len(savep) > 0: tt.tick("making predictions and saving") i = 0 while os.path.exists(savep + str(i)): i += 1 os.mkdir(savep + str(i)) savedir = savep + str(i) # save model # torch.save(spandet, open(os.path.join(savedir, "model.pt"), "wb")) # save settings json.dump(settings, open(os.path.join(savedir, "settings.json"), "w")) outlen = trainloader.dataset.tensors[0].size(1) spandet.outlen = outlen # save test predictions testpreds = q.eval_loop(spandet, evalloader, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.test.npy"), testpreds) # save dev predictions testpreds = q.eval_loop(spandet, evalloader_dev, device=device) testpreds = testpreds[0].cpu().detach().numpy() np.save(os.path.join(savedir, "borderpreds.dev.npy"), testpreds) tt.msg("saved in {}".format(savedir)) tt.tock("done")