Esempio n. 1
0
 def setUp(self):
     wdic = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 10,
         "a": 5,
         "his": 50,
         "abracadabrqmsd--qsdfmqgf-": 6
     }
     wdic2 = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 2,
         "a": 3,
         "his": 4,
         "abracadabrqmsd--qsdfmqgf-": 5,
         "qsdfqsdf": 7
     }
     self.adapted = q.WordLinout(10, worddic=wdic, bias=False)
     self.vanilla = q.WordLinout(
         10,
         worddic=wdic,
         weight=self.adapted.lin.weight.detach().numpy(),
         bias=False)
     self.adapted = self.adapted.adapt(wdic2)
Esempio n. 2
0
    def __init__(self,
                 rnn_type,
                 ntoken,
                 ninp,
                 nhid,
                 nlayers,
                 dropout=0.5,
                 dropconnect=0.,
                 tie_weights=False):
        super(NewRNNModel, self).__init__()
        worddic = dict(zip([str(x) for x in range(ntoken)], range(ntoken)))
        dims = [ninp] + [nhid] * nlayers
        self.nhid = nhid
        self.nlayers = nlayers

        self.dims = dims
        self.D = worddic
        self.states = None
        # make layers
        self.emb = q.WordEmb(dims[0], worddic=self.D)
        self.out = q.WordLinout(dims[-1], worddic=self.D)
        self.rnn = self.encodertype(*dims,
                                    bidir=False,
                                    bias=True,
                                    dropout_in=dropout,
                                    dropconnect=dropconnect)
        self.rnn.ret_all_states = True
        self.dropout = nn.Dropout(p=dropout)
Esempio n. 3
0
def run(lr=0.001):
    x = torch.randint(1, 100, (5, 8, 6), dtype=torch.int64)
    y = x[:, 1:, :-1]
    y = torch.cat([torch.ones(y.size(0), y.size(1), 1, dtype=y.dtype), y], 2)
    y = torch.cat(
        [y, torch.randint(1, 100, (y.size(0), 1, y.size(2))).long()], 1)
    D = dict(zip(["<MASK>"] + [str(i) for i in range(1, 100)], range(100)))
    m = BasicHierarchicalEncoderDecoder(q.WordEmb(10, worddic=D),
                                        q.WordLinout(25, worddic=D), 10,
                                        (20, ), (30, ), (25, ))
    pred = m(x, y)
Esempio n. 4
0
 def test_overridden(self):
     worddic = "second third fourth fifth"
     worddic = dict(zip(worddic.split(), range(len(worddic.split()))))
     linout = q.WordLinout(10, worddic=worddic)
     l = self.linout.override(linout)
     x = Variable(torch.randn(7, 10))
     msk = Variable(
         torch.FloatTensor([[1, 0, 1, 1, 0, 1, 0]] * 5 +
                           [[0, 1, 0, 0, 1, 0, 1]] * 2))
     y = l(x, mask=msk)
     print(y)
Esempio n. 5
0
 def setUp(self):
     wdic = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 10,
         "a": 5,
         "his": 50,
         "monkey": 6
     }
     wdic2 = {
         "<MASK>": 0,
         "<RARE>": 1,
         "the": 2,
         "a": 3,
         "his": 4,
         "abracadabrqmsd--qsdfmqgf-": 5,
         "qsdfqsdf": 7
     }
     self.base = q.WordLinout(10, worddic=wdic, bias=False)
     self.over = q.WordLinout(10, worddic=wdic2, bias=False)
     self.overridden = self.base.override(self.over)
Esempio n. 6
0
 def __init__(self, D, embdim, zdim, startsym, *innerdim, **kw):
     super(Decoder, self).__init__()
     self.emb = q.WordEmb(embdim, worddic=D)
     innerdim = (embdim+zdim,) + innerdim
     self.layers = torch.nn.ModuleList(modules=[
         q.LSTMCell(innerdim[i-1], innerdim[i]) for i in range(1, len(innerdim))
     ])
     self.linout = q.WordLinout(innerdim[-1], worddic=D)
     self.sm = torch.nn.Softmax(-1)
     self.maxtime = q.getkw(kw, "maxtime", 100)
     self.startid = D[startsym]
     self.sm_sample = True
     self.zdim = zdim
Esempio n. 7
0
 def __init__(self,
              *dims: int,
              worddic: dict = None,
              bias: bool = True,
              dropout: float = 0.,
              **kw):
     super(RNNLayer_LM, self).__init__(**kw)
     self.dims = dims
     self.D = worddic
     self.states = None
     # make layers
     self.emb = q.WordEmb(dims[0], worddic=self.D)
     self.out = q.WordLinout(dims[-1], worddic=self.D)
     self.rnn = self.encodertype(*dims,
                                 bidir=False,
                                 bias=bias,
                                 dropout_in=dropout)
     self.rnn.ret_all_states = True
     self.dropout = torch.nn.Dropout(p=dropout)
Esempio n. 8
0
    def test_it(self):
        x = np.random.randint(0, 100, (1000, 7))
        y_inp = x[:, :-1]
        y_out = x[:, 1:]
        wD = dict((chr(xi), xi) for xi in range(100))

        ctx = torch.randn(1000, 8, 30)

        decoder_emb = q.WordEmb(20, worddic=wD)
        decoder_lstm = q.LSTMCell(20, 30)
        decoder_att = q.DotAttention()
        decoder_out = q.WordLinout(60, worddic=wD)

        decoder_cell = q.DecoderCell(decoder_emb, decoder_lstm, decoder_att,
                                     None, decoder_out)
        decoder_tf = q.TFDecoder(decoder_cell)

        y = decoder_tf(torch.tensor(x), ctx=ctx)

        self.assertTrue(y.size(), (1000, 7, 100))
def run_classify(lr=0.001,
                 seqlen=6,
                 numex=500,
                 epochs=25,
                 batsize=10,
                 test=True,
                 cuda=False,
                 gpu=0):
    device = torch.device("cpu")
    if cuda:
        device = torch.device("cuda", gpu)
    # region construct data
    colors = "red blue green magenta cyan orange yellow grey salmon pink purple teal".split(
    )
    D = dict(zip(colors, range(len(colors))))
    inpseqs = []
    targets = []
    for i in range(numex):
        inpseq = list(np.random.choice(colors, seqlen, replace=False))
        target = np.random.choice(range(len(inpseq)), 1)[0]
        target_class = D[inpseq[target]]
        inpseq[target] = "${}$".format(inpseq[target])
        inpseqs.append("".join(inpseq))
        targets.append(target_class)

    sm = q.StringMatrix()
    sm.tokenize = lambda x: list(x)

    for inpseq in inpseqs:
        sm.add(inpseq)

    sm.finalize()
    print(sm[0])
    print(sm.D)
    targets = np.asarray(targets)

    data = q.dataload(sm.matrix[:-100], targets[:-100], batch_size=batsize)
    valid_data = q.dataload(sm.matrix[-100:],
                            targets[-100:],
                            batch_size=batsize)
    # endregion

    # region model
    embdim = 20
    enc2inpdim = 45
    encdim = 20
    outdim = 20
    emb = q.WordEmb(embdim, worddic=sm.D)  # sm dictionary (characters)
    out = q.WordLinout(outdim, worddic=D)  # target dictionary
    # encoders:
    enc1 = q.RNNEncoder(embdim, encdim, bidir=True)
    enc2 = q.RNNCellEncoder(enc2inpdim, outdim // 2, bidir=True)

    # model
    class Model(torch.nn.Module):
        def __init__(self, dim, _emb, _out, _enc1, _enc2, **kw):
            super(Model, self).__init__(**kw)
            self.dim, self.emb, self.out, self.enc1, self.enc2 = dim, _emb, _out, _enc1, _enc2
            self.score = torch.nn.Sequential(
                torch.nn.Linear(dim, 1, bias=False), torch.nn.Sigmoid())
            self.emb_expander = ExpandVecs(embdim, enc2inpdim, 2)
            self.enc_expander = ExpandVecs(encdim * 2, enc2inpdim, 2)

        def forward(self, x, with_att=False):
            # embed and encode
            xemb, xmask = self.emb(x)
            xenc = self.enc1(xemb, mask=xmask)
            # compute attention
            xatt = self.score(xenc).squeeze(
                2) * xmask.float()[:, :xenc.size(1)]
            # encode again
            _xemb = self.emb_expander(xemb[:, :xenc.size(1)])
            _xenc = self.enc_expander(xenc)
            _, xenc2 = self.enc2(_xemb,
                                 gate=xatt,
                                 mask=xmask[:, :xenc.size(1)],
                                 ret_states=True)
            scores = self.out(xenc2.view(xenc.size(0), -1))
            if with_att:
                return scores, xatt
            else:
                return scores

    model = Model(40, emb, out, enc1, enc2)
    # endregion

    # region test
    if test:
        inps = torch.tensor(sm.matrix[0:2])
        outs = model(inps)
    # endregion

    # region train
    optimizer = torch.optim.Adam(q.params_of(model), lr=lr)
    trainer = q.trainer(model).on(data).loss(torch.nn.CrossEntropyLoss(), q.Accuracy())\
        .optimizer(optimizer).hook(q.ClipGradNorm(5.)).device(device)
    validator = q.tester(model).on(valid_data).loss(
        q.Accuracy()).device(device)
    q.train(trainer, validator).run(epochs=epochs)
    # endregion

    # region check attention    #TODO
    # feed a batch
    inpd = torch.tensor(sm.matrix[400:410])
    outd, att = model(inpd, with_att=True)
    outd = torch.max(outd, 1)[1].cpu().detach().numpy()
    inpd = inpd.cpu().detach().numpy()
    att = att.cpu().detach().numpy()
    rD = {v: k for k, v in sm.D.items()}
    roD = {v: k for k, v in D.items()}
    for i in range(len(att)):
        inpdi = "   ".join([rD[x] for x in inpd[i]])
        outdi = roD[outd[i]]
        print("input:     {}\nattention: {}\nprediction: {}".format(
            inpdi, " ".join(["{:.1f}".format(x) for x in att[i]]), outdi))
def run(lr=0.001,
        dropout=0.2,
        batsize=50,
        embdim=50,
        encdim=50,
        decdim=50,
        numlayers=1,
        bidir=False,
        which="geo",        # "geo", "atis", "jobs"
        test=True,
        ):
    settings = locals().copy()
    logger = q.log.Logger(prefix="seq2seq_base")
    logger.save_settings(**settings)
    # region data
    nlsm, qlsm, splitidxs = load_data(which=which)
    print(nlsm[0], qlsm[0])
    print(nlsm._rarewords)

    trainloader = q.dataload(nlsm.matrix[:splitidxs[0]], qlsm.matrix[:splitidxs[0]], batch_size=batsize, shuffle=True)
    devloader = q.dataload(nlsm.matrix[splitidxs[0]:splitidxs[1]], qlsm.matrix[splitidxs[0]:splitidxs[1]], batch_size=batsize, shuffle=False)
    testloader = q.dataload(nlsm.matrix[splitidxs[1]:], qlsm.matrix[splitidxs[1]:], batch_size=batsize, shuffle=False)
    # endregion

    # region model
    encdims = [encdim] * numlayers
    outdim = (encdim if not bidir else encdim * 2) + decdim
    nlemb = q.WordEmb(embdim, worddic=nlsm.D)
    qlemb = q.WordEmb(embdim, worddic=qlsm.D)
    nlenc = q.LSTMEncoder(embdim, *encdims, bidir=bidir, dropout_in=dropout)
    att = q.att.DotAtt()
    if numlayers > 1:
        qldec_core = torch.nn.Sequential(
            *[q.LSTMCell(_indim, _outdim, dropout_in=dropout)
              for _indim, _outdim in [(embdim, decdim)] + [(decdim, decdim)] * (numlayers - 1)]
        )
    else:
        qldec_core = q.LSTMCell(embdim, decdim, dropout_in=dropout)
    qlout = q.WordLinout(outdim, worddic=qlsm.D)
    qldec = q.LuongCell(emb=qlemb, core=qldec_core, att=att, out=qlout)

    class Model(torch.nn.Module):
        def __init__(self, _nlemb, _nlenc, _qldec, train=True, **kw):
            super(Model, self).__init__(**kw)
            self.nlemb, self.nlenc, self._q_train = _nlemb, _nlenc, train
            if train:
                self.qldec = q.TFDecoder(_qldec)
            else:
                self.qldec = q.FreeDecoder(_qldec, maxtime=100)

        def forward(self, x, y):   # (batsize, seqlen) int ids
            xemb, xmask = self.nlemb(x)
            xenc = self.nlenc(xemb, mask=xmask)
            if self._q_train is False:
                assert(y.dim() == 2)
            dec = self.qldec(y, ctx=xenc, ctxmask=xmask[:, :xenc.size(1)])
            return dec

    m_train = Model(nlemb, nlenc, qldec, train=True)
    m_test = Model(nlemb, nlenc, qldec, train=False)

    if test:
        test_out = m_train(torch.tensor(nlsm.matrix[:5]), torch.tensor(qlsm.matrix[:5]))
        print("test_out.size() = {}".format(test_out.size()))
Esempio n. 11
0
 def setUp(self):
     wd = dict(zip(map(lambda x: chr(x), range(100)), range(100)))
     self.base = q.WordLinout(50, worddic=wd, bias=False)
     self.merg = q.WordLinout(50, worddic=wd, bias=False)
     self.linout = self.base.merge(self.merg)
Esempio n. 12
0
 def setUp(self):
     worddic = "<MASK> <RARE> first second third fourth fifth sixth"
     worddic = dict(zip(worddic.split(), range(len(worddic.split()))))
     self.linout = q.WordLinout(10, worddic=worddic, cosnorm=True)
Esempio n. 13
0
def run_normal_seqvae_toy(
    lr=0.001,
    embdim=64,
    encdim=100,
    zdim=64,
    batsize=50,
    epochs=100,
):

    # test
    vocsize = 100
    seqlen = 12
    wD = dict((chr(xi), xi) for xi in range(vocsize))

    # region encoder
    encoder_emb = q.WordEmb(embdim, worddic=wD)
    encoder_lstm = q.FastestLSTMEncoder(embdim, encdim)

    class EncoderNet(torch.nn.Module):
        def __init__(self, emb, core):
            super(EncoderNet, self).__init__()
            self.emb, self.core = emb, core

        def forward(self, x):
            embs, mask = self.emb(x)
            out, states = self.core(embs, mask, ret_states=True)
            top_state = states[-1][0][:, 0]
            # top_state = top_state.unsqueeze(1).repeat(1, out.size(1), 1)
            return top_state  # (batsize, encdim)

    encoder_net = EncoderNet(encoder_emb, encoder_lstm)
    encoder = Posterior(encoder_net, encdim, zdim)
    # endregion

    # region decoder
    decoder_emb = q.WordEmb(embdim, worddic=wD)
    decoder_lstm = q.LSTMCell(embdim + zdim, encdim)
    decoder_outlin = q.WordLinout(encdim, worddic=wD)

    class DecoderCell(torch.nn.Module):
        def __init__(self, emb, core, out, **kw):
            super(DecoderCell, self).__init__()
            self.emb, self.core, self.out = emb, core, out

        def forward(self, xs, z=None):
            embs, mask = self.emb(xs)
            core_inp = torch.cat([embs, z], 1)
            core_out = self.core(core_inp)
            out = self.out(core_out)
            return out

    decoder_cell = DecoderCell(decoder_emb, decoder_lstm, decoder_outlin)
    decoder = q.TFDecoder(decoder_cell)
    # endregion

    likelihood = Likelihood()

    vae = SeqVAE(encoder, decoder, likelihood)

    x = torch.randint(0, vocsize, (batsize, seqlen), dtype=torch.int64)
    ys = vae(x)

    optim = torch.optim.Adam(q.params_of(vae), lr=lr)

    x = torch.randint(0, vocsize, (batsize * 100, seqlen), dtype=torch.int64)
    dataloader = q.dataload(x, batch_size=batsize, shuffle=True)

    trainer = q.trainer(vae).on(dataloader).optimizer(optim).loss(4).epochs(
        epochs)
    trainer.run()

    print("done \n\n")