def __init__(self,
              *dims: int,
              worddic: dict = None,
              bias: bool = True,
              tieweights=False,
              dropout: float = 0.,
              dropouti: float = 0.,
              dropouth: float = 0.,
              dropoute: float = 0.,
              **kw):
     super(RNNLayer_LM, self).__init__(**kw)
     self.dims = dims
     self.D = worddic
     self.states = None
     # make layers
     self.emb = q.WordEmb(dims[0], worddic=self.D)
     self.out = q.WordLinout(dims[-1], worddic=self.D)
     if tieweights:
         self.out.weight = self.emb.weight
     self.rnn = self.encodertype(*dims,
                                 bidir=False,
                                 bias=bias,
                                 dropout_in=dropout)
     self.rnn.ret_all_states = True
     self.dropout = torch.nn.Dropout(p=dropout)
     self.dropouti = torch.nn.Dropout(p=dropouti)
     self.dropoute = torch.nn.Dropout(p=dropoute)
     self.dropouth = torch.nn.Dropout(p=dropouth)
Exemple #2
0
 def setUp(self):
     wdic = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 10,
         "a": 5,
         "his": 50,
         "monkey": 6
     }
     wdic2 = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 2,
         "a": 3,
         "his": 4,
         "abracadabrqmsd--qsdfmqgf-": 5,
         "qsdfqsdf": 7
     }
     self.base = q.WordLinout(10, worddic=wdic)
     self.over = q.WordLinout(10, worddic=wdic2)
     self.overridden = self.base.override(self.over)
Exemple #3
0
 def setUp(self):
     wdic = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 10,
         "a": 5,
         "his": 50,
         "abracadabrqmsd--qsdfmqgf-": 6
     }
     wdic2 = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 2,
         "a": 3,
         "his": 4,
         "abracadabrqmsd--qsdfmqgf-": 5,
         "qsdfqsdf": 7
     }
     self.adapted = q.WordLinout(10, worddic=wdic)
     self.vanilla = q.WordLinout(
         10, worddic=wdic, weight=self.adapted.lin.weight.data.numpy())
     self.adapted = self.adapted.adapt(wdic2)
Exemple #4
0
 def __init__(self,
              dim=512,
              worddic=None,
              numlayers=3,
              numheads=8,
              activation=q.GeLU,
              embedding_dropout=0.,
              attention_dropout=0.,
              residual_dropout=0.,
              word_dropout=0.,
              relpos=True,
              tie_wordvecs=False,
              maxlen=512):
     super(TransformerLM, self).__init__()
     self.wordemb = q.WordEmb(dim,
                              worddic=worddic,
                              word_dropout=word_dropout)
     posemb = None
     if relpos is False:
         print("using learned absolute position embeddings")
         posembD = dict(zip(range(maxlen), range(maxlen)))
         posemb = q.WordEmb(dim, worddic=posembD)
     self.transformer = q.TransformerDecoder(
         dim=dim,
         numlayers=numlayers,
         numheads=numheads,
         activation=activation,
         embedding_dropout=embedding_dropout,
         attention_dropout=attention_dropout,
         residual_dropout=residual_dropout,
         relpos=relpos,
         noctx=True,
         maxlen=maxlen,
         posemb=posemb)
     q.RecDropout.convert_to_standard_in(self.transformer)
     self.wordout = q.WordLinout(dim, worddic=worddic)
     if tie_wordvecs:
         self.wordout.weight = self.wordemb.weight
Exemple #5
0
    def test_it(self):
        x = np.random.randint(0, 100, (1000, 7))
        y_inp = x[:, :-1]
        y_out = x[:, 1:]
        wD = dict((chr(xi), xi) for xi in range(100))

        ctx = torch.randn(1000, 8, 30)

        decoder_emb = q.WordEmb(20, worddic=wD)
        decoder_lstm = q.LSTMCell(20, 30)
        decoder_att = q.DotAttention()
        decoder_out = q.WordLinout(60, worddic=wD)

        decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att,
                                     None, decoder_out)
        decoder_tf = q.TFDecoder(decoder_cell)

        y = decoder_tf(torch.tensor(x), ctx=ctx)

        self.assertTrue(y.size(), (1000, 7, 100))


# endregion
def run_gatedtree(
    lr=0.01,
    gradclip=5.,
    batsize=20,
    epochs=80,
    embdim=200,
    encdim=200,
    numlayer=1,
    cuda=False,
    gpu=0,
    wreg=1e-8,
    dropout=0.5,
    smoothing=0.4,
    goldsmoothing=-0.1,
    which="geo",
    relatt=False,
):
    tt = q.ticktock("script")
    tt.msg("running gated tree decoder")
    device = torch.device("cpu")
    if cuda:
        device = torch.device("cuda", gpu)

    # region data
    tt.tick("generating data")
    # dss, D = gen_sort_data(seqlen=seqlen, numvoc=numvoc, numex=numex, prepend_inp=False)
    dss, nlD, flD = gen_datasets(which=which)
    tloader, vloader, xloader = [
        torch.utils.data.DataLoader(ds, batch_size=batsize, shuffle=True)
        for ds in dss
    ]
    seqlen = len(dss[0][0][1])
    id2pushpop = torch.zeros(len(flD), dtype=torch.long, device=device)
    id2pushpop[flD["("]] = +1
    id2pushpop[flD[")"]] = -1

    tt.tock("data generated")
    # endregion

    # region model
    tt.tick("building model")
    # source side
    inpemb = q.WordEmb(embdim, worddic=nlD)
    encdims = [encdim] * numlayer
    encoder = q.LSTMEncoder(embdim,
                            *encdims,
                            bidir=False,
                            dropout_in_shared=dropout)

    # target side
    decemb = q.WordEmb(embdim, worddic=flD)
    decinpdim = embdim
    decdims = [decinpdim] + [encdim] * numlayer
    dec_core = \
        [GatedTreeLSTMCell(decdims[i-1], decdims[i], dropout_in=dropout) for i in range(1, len(decdims))]        ###
    dec_core = TreeRNNDecoderCellCore(*dec_core)
    if relatt:
        att = ComboAbsRelAttention(ctxdim=encdim, vecdim=encdim)
    else:
        att = BasicAttention()
    out = torch.nn.Sequential(q.WordLinout(encdim, worddic=flD),
                              # torch.nn.Softmax(-1)
                              )
    merge = q.rnn.FwdDecCellMerge(decdims[-1], encdims[-1], outdim=encdim)
    deccell = TreeRNNDecoderCell(emb=decemb,
                                 core=dec_core,
                                 att=att,
                                 out=out,
                                 merge=merge,
                                 id2pushpop=id2pushpop)
    train_dec = q.TFDecoder(deccell)
    test_dec = q.FreeDecoder(deccell, maxtime=seqlen + 10)
    train_encdec = EncDec(inpemb, encoder, train_dec)
    test_encdec = Test_EncDec(inpemb, encoder, test_dec)

    train_encdec.to(device)
    test_encdec.to(device)
    tt.tock("built model")
    # endregion

    # region training
    # losses:
    if smoothing == 0:
        ce = q.loss.CELoss(mode="logits", ignore_index=0)
    elif goldsmoothing < 0.:
        ce = q.loss.SmoothedCELoss(mode="logits",
                                   ignore_index=0,
                                   smoothing=smoothing)
    else:
        ce = q.loss.DiffSmoothedCELoss(mode="logits",
                                       ignore_index=0,
                                       alpha=goldsmoothing,
                                       beta=smoothing)
    acc = q.loss.SeqAccuracy(ignore_index=0)
    elemacc = q.loss.SeqElemAccuracy(ignore_index=0)
    treeacc = TreeAccuracyLambdaDFPar(flD=flD)
    # optim
    optim = torch.optim.RMSprop(train_encdec.parameters(),
                                lr=lr,
                                alpha=0.95,
                                weight_decay=wreg)
    clipgradnorm = lambda: torch.nn.utils.clip_grad_value_(
        train_encdec.parameters(), clip_value=gradclip)
    # lööps
    batchloop = partial(q.train_batch, on_before_optim_step=[clipgradnorm])
    trainloop = partial(
        q.train_epoch,
        model=train_encdec,
        dataloader=tloader,
        optim=optim,
        device=device,
        losses=[q.LossWrapper(ce),
                q.LossWrapper(elemacc),
                q.LossWrapper(acc)],
        print_every_batch=False,
        _train_batch=batchloop)
    validloop = partial(q.test_epoch,
                        model=test_encdec,
                        dataloader=vloader,
                        device=device,
                        losses=[q.LossWrapper(treeacc)],
                        print_every_batch=False)

    tt.tick("training")
    q.run_training(trainloop, validloop, max_epochs=epochs)
    tt.tock("trained")

    tt.tick("testing")
    test_results = validloop(model=test_encdec, dataloader=xloader)
    print("Test results (freerunning): {}".format(test_results))
    test_results = validloop(model=train_encdec, dataloader=xloader)
    print("Test results (TF): {}".format(test_results))
    tt.tock("tested")
    # endregion
    tt.msg("done")
Exemple #7
0
 def setUp(self):
     worddic = "<MASK> <RARE> first second third fourth fifth"
     worddic = dict(zip(worddic.split(), range(len(worddic.split()))))
     self.linout = q.WordLinout(10, worddic=worddic)
Exemple #8
0
def run_seq2seq_(
    lr=0.001,
    batsize=32,
    evalbatsize=256,
    epochs=100,
    warmup=5,
    embdim=50,
    encdim=100,
    numlayers=2,
    dropout=.0,
    wreg=1e-6,
    cuda=False,
    gpu=0,
):
    settings = locals().copy()
    device = torch.device("cpu") if not cuda else torch.device("cuda", gpu)
    tt = q.ticktock("script")
    tt.msg("running seq2seq on LC-QuAD")

    tt.tick("loading data")
    xsm, ysm, teststart, tok2act = load_data()
    _tok2act = {ysm.RD[k]: v for k, v in tok2act.items()}

    print("Some examples:")
    for i in range(5):
        print(
            f"{xsm[i]}\n ->{ysm[i]}\n -> {Node.from_transitions(' '.join(ysm[i].split()[1:]), _tok2act)}"
        )

    print("Non-leaf tokens:")
    print({ysm.RD[k]: v for k, v in tok2act.items() if v > 0})

    devstart = teststart - 500
    trainds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[:devstart]).long(),
        torch.tensor(ysm.matrix[:devstart, :-1]).long(),
        torch.tensor(ysm.matrix[:devstart, 1:]).long())
    valds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[devstart:teststart]).long(),
        torch.tensor(ysm.matrix[devstart:teststart, :-1]).long(),
        torch.tensor(ysm.matrix[devstart:teststart, 1:]).long())
    testds = torch.utils.data.TensorDataset(
        torch.tensor(xsm.matrix[teststart:]).long(),
        torch.tensor(ysm.matrix[teststart:, :-1]).long(),
        torch.tensor(ysm.matrix[teststart:, 1:]).long())
    tt.msg(
        f"Data splits: train: {len(trainds)}, valid: {len(valds)}, test: {len(testds)}"
    )

    tloader = torch.utils.data.DataLoader(trainds,
                                          batch_size=batsize,
                                          shuffle=True)
    vloader = torch.utils.data.DataLoader(valds,
                                          batch_size=evalbatsize,
                                          shuffle=False)
    xloader = torch.utils.data.DataLoader(testds,
                                          batch_size=evalbatsize,
                                          shuffle=False)
    tt.tock("data loaded")

    # model
    enclayers, declayers = numlayers, numlayers
    decdim = encdim
    xemb = q.WordEmb(embdim, worddic=xsm.D)
    yemb = q.WordEmb(embdim, worddic=ysm.D)
    encdims = [embdim] + [encdim // 2] * enclayers
    xenc = q.LSTMEncoder(embdim,
                         *encdims[1:],
                         bidir=True,
                         dropout_in_shared=dropout)
    decdims = [embdim] + [decdim] * declayers
    dec_core = torch.nn.Sequential(*[
        q.LSTMCell(decdims[i - 1],
                   decdims[i],
                   dropout_in=dropout,
                   dropout_rec=dropout) for i in range(1, len(decdims))
    ])
    yout = q.WordLinout(encdim + decdim, worddic=ysm.D)
    dec_cell = semparse.rnn.LuongCell(emb=yemb,
                                      core=dec_core,
                                      out=yout,
                                      dropout=dropout)
    decoder = q.TFDecoder(dec_cell)
    testdecoder = q.FreeDecoder(dec_cell, maxtime=100)

    m = Seq2Seq(xemb, xenc, decoder)
    testm = Seq2Seq(xemb, xenc, testdecoder, test=True)

    # test model
    tt.tick("running a batch")
    test_y = m(*iter(tloader).next()[:-1])
    q.batch_reset(m)
    test_y = testm(*iter(vloader).next()[:-1])
    q.batch_reset(m)
    tt.tock(f"ran a batch: {test_y.size()}")

    optim = torch.optim.Adam(m.parameters(), lr=lr, weight_decay=wreg)
    tlosses = [
        q.CELoss(mode="logits", ignore_index=0),
        q.Accuracy(ignore_index=0),
        q.SeqAccuracy(ignore_index=0)
    ]
    xlosses = [
        q.CELoss(mode="logits", ignore_index=0),
        q.Accuracy(ignore_index=0),
        q.SeqAccuracy(ignore_index=0)
    ]
    tlosses = [q.LossWrapper(l) for l in tlosses]
    vlosses = [q.LossWrapper(l) for l in xlosses]
    xlosses = [q.LossWrapper(l) for l in xlosses]
    trainloop = partial(q.train_epoch,
                        model=m,
                        dataloader=tloader,
                        optim=optim,
                        losses=tlosses,
                        device=device)
    devloop = partial(q.test_epoch,
                      model=testm,
                      dataloader=vloader,
                      losses=vlosses,
                      device=device)
    testloop = partial(q.test_epoch,
                       model=testm,
                       dataloader=xloader,
                       losses=xlosses,
                       device=device)

    lrplateau = q.util.ReduceLROnPlateau(optim,
                                         mode="max",
                                         factor=.1,
                                         patience=3,
                                         cooldown=1,
                                         warmup=warmup,
                                         threshold=0.,
                                         verbose=True,
                                         eps=1e-9)
    on_after_valid = [lambda: lrplateau.step(vlosses[1].get_epoch_error())]
    _devloop = partial(devloop, on_end=on_after_valid)
    stoptrain = [lambda: all([pg["lr"] <= 1e-7 for pg in optim.param_groups])]

    tt.tick("training")
    q.run_training(trainloop,
                   _devloop,
                   max_epochs=epochs,
                   check_stop=stoptrain)
    tt.tock("done training")

    tt.tick("testing")
    testres = testloop()
    print(testres)
    settings["testres"] = testres
    tt.tock("tested")

    devres = devloop()
    print(devres, vlosses[0].get_epoch_error())

    return vlosses[1].get_epoch_error()