def __init__(self, *dims: int, worddic: dict = None, bias: bool = True, tieweights=False, dropout: float = 0., dropouti: float = 0., dropouth: float = 0., dropoute: float = 0., **kw): super(RNNLayer_LM, self).__init__(**kw) self.dims = dims self.D = worddic self.states = None # make layers self.emb = q.WordEmb(dims[0], worddic=self.D) self.out = q.WordLinout(dims[-1], worddic=self.D) if tieweights: self.out.weight = self.emb.weight self.rnn = self.encodertype(*dims, bidir=False, bias=bias, dropout_in=dropout) self.rnn.ret_all_states = True self.dropout = torch.nn.Dropout(p=dropout) self.dropouti = torch.nn.Dropout(p=dropouti) self.dropoute = torch.nn.Dropout(p=dropoute) self.dropouth = torch.nn.Dropout(p=dropouth)
def setUp(self): wdic = { "<MASK>": 0, "<RARE>": 1, "the": 10, "a": 5, "his": 50, "monkey": 6 } wdic2 = { "<MASK>": 0, "<RARE>": 1, "the": 2, "a": 3, "his": 4, "abracadabrqmsd--qsdfmqgf-": 5, "qsdfqsdf": 7 } self.base = q.WordLinout(10, worddic=wdic) self.over = q.WordLinout(10, worddic=wdic2) self.overridden = self.base.override(self.over)
def setUp(self): wdic = { "<MASK>": 0, "<RARE>": 1, "the": 10, "a": 5, "his": 50, "abracadabrqmsd--qsdfmqgf-": 6 } wdic2 = { "<MASK>": 0, "<RARE>": 1, "the": 2, "a": 3, "his": 4, "abracadabrqmsd--qsdfmqgf-": 5, "qsdfqsdf": 7 } self.adapted = q.WordLinout(10, worddic=wdic) self.vanilla = q.WordLinout( 10, worddic=wdic, weight=self.adapted.lin.weight.data.numpy()) self.adapted = self.adapted.adapt(wdic2)
def __init__(self, dim=512, worddic=None, numlayers=3, numheads=8, activation=q.GeLU, embedding_dropout=0., attention_dropout=0., residual_dropout=0., word_dropout=0., relpos=True, tie_wordvecs=False, maxlen=512): super(TransformerLM, self).__init__() self.wordemb = q.WordEmb(dim, worddic=worddic, word_dropout=word_dropout) posemb = None if relpos is False: print("using learned absolute position embeddings") posembD = dict(zip(range(maxlen), range(maxlen))) posemb = q.WordEmb(dim, worddic=posembD) self.transformer = q.TransformerDecoder( dim=dim, numlayers=numlayers, numheads=numheads, activation=activation, embedding_dropout=embedding_dropout, attention_dropout=attention_dropout, residual_dropout=residual_dropout, relpos=relpos, noctx=True, maxlen=maxlen, posemb=posemb) q.RecDropout.convert_to_standard_in(self.transformer) self.wordout = q.WordLinout(dim, worddic=worddic) if tie_wordvecs: self.wordout.weight = self.wordemb.weight
def test_it(self): x = np.random.randint(0, 100, (1000, 7)) y_inp = x[:, :-1] y_out = x[:, 1:] wD = dict((chr(xi), xi) for xi in range(100)) ctx = torch.randn(1000, 8, 30) decoder_emb = q.WordEmb(20, worddic=wD) decoder_lstm = q.LSTMCell(20, 30) decoder_att = q.DotAttention() decoder_out = q.WordLinout(60, worddic=wD) decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att, None, decoder_out) decoder_tf = q.TFDecoder(decoder_cell) y = decoder_tf(torch.tensor(x), ctx=ctx) self.assertTrue(y.size(), (1000, 7, 100)) # endregion
def run_gatedtree( lr=0.01, gradclip=5., batsize=20, epochs=80, embdim=200, encdim=200, numlayer=1, cuda=False, gpu=0, wreg=1e-8, dropout=0.5, smoothing=0.4, goldsmoothing=-0.1, which="geo", relatt=False, ): tt = q.ticktock("script") tt.msg("running gated tree decoder") device = torch.device("cpu") if cuda: device = torch.device("cuda", gpu) # region data tt.tick("generating data") # dss, D = gen_sort_data(seqlen=seqlen, numvoc=numvoc, numex=numex, prepend_inp=False) dss, nlD, flD = gen_datasets(which=which) tloader, vloader, xloader = [ torch.utils.data.DataLoader(ds, batch_size=batsize, shuffle=True) for ds in dss ] seqlen = len(dss[0][0][1]) id2pushpop = torch.zeros(len(flD), dtype=torch.long, device=device) id2pushpop[flD["("]] = +1 id2pushpop[flD[")"]] = -1 tt.tock("data generated") # endregion # region model tt.tick("building model") # source side inpemb = q.WordEmb(embdim, worddic=nlD) encdims = [encdim] * numlayer encoder = q.LSTMEncoder(embdim, *encdims, bidir=False, dropout_in_shared=dropout) # target side decemb = q.WordEmb(embdim, worddic=flD) decinpdim = embdim decdims = [decinpdim] + [encdim] * numlayer dec_core = \ [GatedTreeLSTMCell(decdims[i-1], decdims[i], dropout_in=dropout) for i in range(1, len(decdims))] ### dec_core = TreeRNNDecoderCellCore(*dec_core) if relatt: att = ComboAbsRelAttention(ctxdim=encdim, vecdim=encdim) else: att = BasicAttention() out = torch.nn.Sequential(q.WordLinout(encdim, worddic=flD), # torch.nn.Softmax(-1) ) merge = q.rnn.FwdDecCellMerge(decdims[-1], encdims[-1], outdim=encdim) deccell = TreeRNNDecoderCell(emb=decemb, core=dec_core, att=att, out=out, merge=merge, id2pushpop=id2pushpop) train_dec = q.TFDecoder(deccell) test_dec = q.FreeDecoder(deccell, maxtime=seqlen + 10) train_encdec = EncDec(inpemb, encoder, train_dec) test_encdec = Test_EncDec(inpemb, encoder, test_dec) train_encdec.to(device) test_encdec.to(device) tt.tock("built model") # endregion # region training # losses: if smoothing == 0: ce = q.loss.CELoss(mode="logits", ignore_index=0) elif goldsmoothing < 0.: ce = q.loss.SmoothedCELoss(mode="logits", ignore_index=0, smoothing=smoothing) else: ce = q.loss.DiffSmoothedCELoss(mode="logits", ignore_index=0, alpha=goldsmoothing, beta=smoothing) acc = q.loss.SeqAccuracy(ignore_index=0) elemacc = q.loss.SeqElemAccuracy(ignore_index=0) treeacc = TreeAccuracyLambdaDFPar(flD=flD) # optim optim = torch.optim.RMSprop(train_encdec.parameters(), lr=lr, alpha=0.95, weight_decay=wreg) clipgradnorm = lambda: torch.nn.utils.clip_grad_value_( train_encdec.parameters(), clip_value=gradclip) # lööps batchloop = partial(q.train_batch, on_before_optim_step=[clipgradnorm]) trainloop = partial( q.train_epoch, model=train_encdec, dataloader=tloader, optim=optim, device=device, losses=[q.LossWrapper(ce), q.LossWrapper(elemacc), q.LossWrapper(acc)], print_every_batch=False, _train_batch=batchloop) validloop = partial(q.test_epoch, model=test_encdec, dataloader=vloader, device=device, losses=[q.LossWrapper(treeacc)], print_every_batch=False) tt.tick("training") q.run_training(trainloop, validloop, max_epochs=epochs) tt.tock("trained") tt.tick("testing") test_results = validloop(model=test_encdec, dataloader=xloader) print("Test results (freerunning): {}".format(test_results)) test_results = validloop(model=train_encdec, dataloader=xloader) print("Test results (TF): {}".format(test_results)) tt.tock("tested") # endregion tt.msg("done")
def setUp(self): worddic = "<MASK> <RARE> first second third fourth fifth" worddic = dict(zip(worddic.split(), range(len(worddic.split())))) self.linout = q.WordLinout(10, worddic=worddic)
def run_seq2seq_( lr=0.001, batsize=32, evalbatsize=256, epochs=100, warmup=5, embdim=50, encdim=100, numlayers=2, dropout=.0, wreg=1e-6, cuda=False, gpu=0, ): settings = locals().copy() device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt = q.ticktock("script") tt.msg("running seq2seq on LC-QuAD") tt.tick("loading data") xsm, ysm, teststart, tok2act = load_data() _tok2act = {ysm.RD[k]: v for k, v in tok2act.items()} print("Some examples:") for i in range(5): print( f"{xsm[i]}\n ->{ysm[i]}\n -> {Node.from_transitions(' '.join(ysm[i].split()[1:]), _tok2act)}" ) print("Non-leaf tokens:") print({ysm.RD[k]: v for k, v in tok2act.items() if v > 0}) devstart = teststart - 500 trainds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[:devstart]).long(), torch.tensor(ysm.matrix[:devstart, :-1]).long(), torch.tensor(ysm.matrix[:devstart, 1:]).long()) valds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[devstart:teststart]).long(), torch.tensor(ysm.matrix[devstart:teststart, :-1]).long(), torch.tensor(ysm.matrix[devstart:teststart, 1:]).long()) testds = torch.utils.data.TensorDataset( torch.tensor(xsm.matrix[teststart:]).long(), torch.tensor(ysm.matrix[teststart:, :-1]).long(), torch.tensor(ysm.matrix[teststart:, 1:]).long()) tt.msg( f"Data splits: train: {len(trainds)}, valid: {len(valds)}, test: {len(testds)}" ) tloader = torch.utils.data.DataLoader(trainds, batch_size=batsize, shuffle=True) vloader = torch.utils.data.DataLoader(valds, batch_size=evalbatsize, shuffle=False) xloader = torch.utils.data.DataLoader(testds, batch_size=evalbatsize, shuffle=False) tt.tock("data loaded") # model enclayers, declayers = numlayers, numlayers decdim = encdim xemb = q.WordEmb(embdim, worddic=xsm.D) yemb = q.WordEmb(embdim, worddic=ysm.D) encdims = [embdim] + [encdim // 2] * enclayers xenc = q.LSTMEncoder(embdim, *encdims[1:], bidir=True, dropout_in_shared=dropout) decdims = [embdim] + [decdim] * declayers dec_core = torch.nn.Sequential(*[ q.LSTMCell(decdims[i - 1], decdims[i], dropout_in=dropout, dropout_rec=dropout) for i in range(1, len(decdims)) ]) yout = q.WordLinout(encdim + decdim, worddic=ysm.D) dec_cell = semparse.rnn.LuongCell(emb=yemb, core=dec_core, out=yout, dropout=dropout) decoder = q.TFDecoder(dec_cell) testdecoder = q.FreeDecoder(dec_cell, maxtime=100) m = Seq2Seq(xemb, xenc, decoder) testm = Seq2Seq(xemb, xenc, testdecoder, test=True) # test model tt.tick("running a batch") test_y = m(*iter(tloader).next()[:-1]) q.batch_reset(m) test_y = testm(*iter(vloader).next()[:-1]) q.batch_reset(m) tt.tock(f"ran a batch: {test_y.size()}") optim = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=wreg) tlosses = [ q.CELoss(mode="logits", ignore_index=0), q.Accuracy(ignore_index=0), q.SeqAccuracy(ignore_index=0) ] xlosses = [ q.CELoss(mode="logits", ignore_index=0), q.Accuracy(ignore_index=0), q.SeqAccuracy(ignore_index=0) ] tlosses = [q.LossWrapper(l) for l in tlosses] vlosses = [q.LossWrapper(l) for l in xlosses] xlosses = [q.LossWrapper(l) for l in xlosses] trainloop = partial(q.train_epoch, model=m, dataloader=tloader, optim=optim, losses=tlosses, device=device) devloop = partial(q.test_epoch, model=testm, dataloader=vloader, losses=vlosses, device=device) testloop = partial(q.test_epoch, model=testm, dataloader=xloader, losses=xlosses, device=device) lrplateau = q.util.ReduceLROnPlateau(optim, mode="max", factor=.1, patience=3, cooldown=1, warmup=warmup, threshold=0., verbose=True, eps=1e-9) on_after_valid = [lambda: lrplateau.step(vlosses[1].get_epoch_error())] _devloop = partial(devloop, on_end=on_after_valid) stoptrain = [lambda: all([pg["lr"] <= 1e-7 for pg in optim.param_groups])] tt.tick("training") q.run_training(trainloop, _devloop, max_epochs=epochs, check_stop=stoptrain) tt.tock("done training") tt.tick("testing") testres = testloop() print(testres) settings["testres"] = testres tt.tock("tested") devres = devloop() print(devres, vlosses[0].get_epoch_error()) return vlosses[1].get_epoch_error()