예제 #1
0
 def setUp(self):
     vocsize = 10
     innerdim = 50
     encdim = 30
     seqlen = 5
     batsize = 77
     self.att = Attention(
         LinearSumAttentionGenerator(indim=innerdim + encdim),
         WeightedSumAttCon())
     self.decwatt = SeqDecoder([
         IdxToOneHot(vocsize),
         GRU(dim=vocsize + encdim, innerdim=innerdim)
     ],
                               inconcat=True,
                               attention=self.att,
                               innerdim=innerdim)
     self.decwoatt = SeqDecoder([
         IdxToOneHot(vocsize),
         GRU(dim=vocsize + encdim, innerdim=innerdim)
     ],
                                inconcat=True,
                                innerdim=innerdim)
     self.attdata = np.random.random(
         (batsize, seqlen, encdim)).astype("float32")
     self.data = np.random.random((batsize, encdim)).astype("float32")
     self.seqdata = np.random.randint(0, vocsize, (batsize, seqlen))
     self.predshape = (batsize, seqlen, vocsize)
예제 #2
0
    def init(self):
        #memory
        wencpg = WordEncoderPlusGlove(numchars=self.numchars, numwords=self.numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=self.glovepath)
        self.memenco = SeqEncoder(
            wencpg,
            GRU(dim=self.wordembdim + self.wordencdim, innerdim=self.encinnerdim)
        )

        entemb = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.mempayload = ConcatBlock(entemb, self.memenco)
        self.memblock = MemoryBlock(self.mempayload, self.memdata, indim=self.outdim, outdim=self.encinnerdim+self.entembdim)

        #encoder
        wencpg2 = WordEncoderPlusGlove(numchars=self.numchars, numwords=self.numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=self.glovepath)
        self.enc = SeqEncoder(
            wencpg2,
            GRU(dim=self.wordembdim + self.wordencdim, innerdim=self.encinnerdim)
        )

        #decoder
        entemb2 = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.softmaxoutblock = stack(self.memaddr(self.memblock, indim=self.decinnerdim, memdim=self.memblock.outdim, attdim=self.attdim), Softmax())
        self.dec = SeqDecoder(
            [entemb2,  #self.memblock,
             GRU(dim=entemb.outdim + self.encinnerdim, innerdim=self.decinnerdim),             # GRU(dim=self.memblock.outdim + self.encinnerdim, innerdim=self.decinnerdim),
             ],
            inconcat=True,
            innerdim=self.decinnerdim,
            softmaxoutblock=self.softmaxoutblock
        )
예제 #3
0
    def __init__(self,
                 wordembdim=50,
                 entembdim=200,
                 innerdim=200,
                 attdim=100,
                 outdim=1e4,
                 numwords=4e5,
                 **kw):
        super(FBSeqSimpEncDecAtt, self).__init__(**kw)
        self.indim = wordembdim
        self.outdim = outdim
        self.wordembdim = wordembdim
        self.encinnerdim = innerdim
        self.decinnerdim = innerdim
        self.entembdim = entembdim

        self.wordencoder = WordEmbed(indim=numwords,
                                     outdim=self.wordembdim,
                                     trainfrac=1.0)
        self.rnn = RecStack(
            self.wordencoder,
            GRU(dim=self.wordembdim, innerdim=self.encinnerdim))

        attgen = LinearGateAttentionGenerator(indim=self.encinnerdim +
                                              self.decinnerdim,
                                              attdim=attdim)
        attcon = WeightedSumAttCon()
        self.dec = SeqDecoder([
            VectorEmbed(indim=self.outdim, dim=self.entembdim),
            GRU(dim=self.entembdim, innerdim=self.decinnerdim)
        ],
                              attention=Attention(attgen, attcon),
                              outconcat=True,
                              inconcat=False,
                              innerdim=self.encinnerdim + self.decinnerdim)
예제 #4
0
    def test_vector_out(self):
        decdim = 50
        outvocsize = 17
        outemb = IdxToOneHot(outvocsize)
        outembdim = outvocsize
        decrnus = [GRU(dim=outvocsize, innerdim=decdim)]
        dec = SeqDecoder([outemb]+decrnus, innerdim=decdim*2, outconcat=True, inconcat=False)

        ctxdata = np.random.random((1, decdim)).astype("float32")
        seqdata = np.asarray([[2, 3, 4]])
        pred = dec.predict(ctxdata, seqdata)
        self.assertEqual(pred.shape, (1, 3, outvocsize))

        dec = SeqDecoder([outemb]+decrnus, innerdim=decdim*2, softmaxoutblock=False, outconcat=True, inconcat=False)
        pred = dec.predict(ctxdata, seqdata)
        self.assertEqual(pred.shape, (1, 3, decdim*2))
예제 #5
0
 def _getdecoder(self,
                 outvocsize=None,
                 outembdim=None,
                 outemb=None,
                 maskid=-1,
                 attention=None,
                 lastencinnerdim=None,
                 decinnerdim=None,
                 inconcat=False,
                 outconcat=True,
                 softmaxout=None,
                 dropout=None,
                 rnu=None):
     lastencinnerdim = self.lastencinnerdim if lastencinnerdim is None else lastencinnerdim
     decinnerdim = self.decinnerdim if decinnerdim is None else decinnerdim
     rnu = GRU if rnu is None else rnu
     dec = SeqDecoder.RNN(
         emb=outemb,
         embdim=outembdim,
         embsize=outvocsize,
         maskid=maskid,
         ctxdim=lastencinnerdim,
         attention=attention,
         innerdim=decinnerdim,
         inconcat=inconcat,
         softmaxoutblock=softmaxout,
         outconcat=outconcat,
         dropout=dropout,
         rnu=rnu,
         dropout_h=dropout,
     )
     return dec
예제 #6
0
 def __init__(self,
              enclayers,
              declayers,
              attgen,
              attcon,
              decinnerdim,
              statetrans=None,
              vecout=False,
              inconcat=True,
              outconcat=False,
              **kw):
     enc = SeqEncoder(*enclayers)\
         .with_outputs()\
         .with_mask()\
         .maskoptions(-1, MaskMode.AUTO, MaskSetMode.ZERO)
     smo = False if vecout else None
     dec = SeqDecoder(declayers,
                      attention=Attention(attgen, attcon),
                      innerdim=decinnerdim,
                      inconcat=inconcat,
                      softmaxoutblock=smo,
                      outconcat=outconcat)
     super(SeqEncDecAtt, self).__init__(enc,
                                        dec,
                                        statetrans=statetrans,
                                        **kw)
예제 #7
0
 def setUp(self):
     self.batsize = 70
     self.vocsize = 27
     self.encdim = 44
     self.hdim = 33
     self.statedim = 50
     self.seqlen = 30
     self.encodings_data = np.random.random(
         (self.batsize, self.encdim)).astype("float32")
     self.sequence_data = np.random.randint(0, self.vocsize,
                                            (self.batsize, self.seqlen))
     self.dec = SeqDecoder([
         IdxToOneHot(self.vocsize),
         GRU(dim=self.vocsize + self.encdim, innerdim=self.hdim),
         LSTM(dim=self.hdim, innerdim=self.statedim)
     ],
                           inconcat=True,
                           innerdim=self.statedim)
예제 #8
0
class TestAttentionRNNDecoder(TestCase):
    def setUp(self):
        vocsize = 10
        innerdim = 50
        encdim = 30
        seqlen = 5
        batsize = 77
        self.att = Attention(AttGen(BilinearDistance(innerdim, encdim)),
                             WeightedSumAttCon())
        self.decwatt = SeqDecoder(
            [IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)],
            inconcat=True,
            attention=self.att,
            innerdim=innerdim
        )
        self.decwoatt = SeqDecoder(
            [IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)],
            inconcat=True,
            innerdim=innerdim
        )
        self.attdata = np.random.random((batsize, seqlen, encdim)).astype("float32")
        self.data = np.random.random((batsize, encdim)).astype("float32")
        self.seqdata = np.random.randint(0, vocsize, (batsize, seqlen))
        self.predshape = (batsize, seqlen, vocsize)

    def test_shape(self):
        pred = self.decwatt.predict(self.attdata, self.seqdata)
        self.assertEqual(pred.shape, self.predshape)

    def test_shape_wo_att(self):
        pred = self.decwoatt.predict(self.data, self.seqdata)
        self.assertEqual(pred.shape, self.predshape)

    def test_attentiongenerator_param_in_allparams(self):
        inps, outps = self.decwatt.autobuild(self.attdata, self.seqdata)
        allparams = outps[0].allparams
        self.assertIn(self.att.attentiongenerator.dist.W, allparams)

    def test_attentiongenerator_param_not_in_params_of_dec_wo_att(self):
        _, outps = self.decwoatt.autobuild(self.data, self.seqdata)
        allparams = outps[0].allparams
        self.assertNotIn(self.att.attentiongenerator.dist.W, allparams)
예제 #9
0
 def setUp(self):
     vocsize = 10
     innerdim = 50
     encdim = 30
     seqlen = 5
     batsize = 77
     self.att = Attention(LinearSumAttentionGenerator(indim=innerdim + encdim), WeightedSumAttCon())
     self.decwatt = SeqDecoder(
         [IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)],
         inconcat=True,
         attention=self.att,
         innerdim=innerdim
     )
     self.decwoatt = SeqDecoder(
         [IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)],
         inconcat=True,
         innerdim=innerdim
     )
     self.attdata = np.random.random((batsize, seqlen, encdim)).astype("float32")
     self.data = np.random.random((batsize, encdim)).astype("float32")
     self.seqdata = np.random.randint(0, vocsize, (batsize, seqlen))
     self.predshape = (batsize, seqlen, vocsize)
예제 #10
0
    def init(self):
        #MEMORY: encodes how entity is written + custom entity embeddings
        wencpg = WordEncoderPlusGlove(numchars=self.numchars,
                                      numwords=self.numwords,
                                      encdim=self.wordencdim,
                                      embdim=self.wordembdim,
                                      embtrainfrac=0.0,
                                      glovepath=self.glovepath)
        self.memenco = SeqEncoder(
            wencpg,
            GRU(dim=self.wordembdim + self.wordencdim,
                innerdim=self.encinnerdim))

        entemb = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.mempayload = ConcatBlock(entemb, self.memenco)
        self.memblock = MemoryBlock(self.mempayload,
                                    self.memdata,
                                    indim=self.outdim,
                                    outdim=self.encinnerdim + self.entembdim)

        #ENCODER: uses the same language encoder as memory
        #wencpg2 = WordEncoderPlusGlove(numchars=self.numchars, numwords=self.numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=glovepath)
        self.enc = RecStack(
            wencpg,
            GRU(dim=self.wordembdim + self.wordencdim,
                innerdim=self.encinnerdim))

        #ATTENTION
        attgen = LinearGateAttentionGenerator(indim=self.encinnerdim +
                                              self.decinnerdim,
                                              innerdim=self.attdim)
        attcon = WeightedSumAttCon()

        #DECODER
        #entemb2 = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.softmaxoutblock = stack(
            self.memaddr(self.memblock,
                         indim=self.decinnerdim + self.encinnerdim,
                         memdim=self.memblock.outdim,
                         attdim=self.attdim), Softmax())

        self.dec = SeqDecoder([
            self.memblock,
            GRU(dim=self.entembdim + self.encinnerdim,
                innerdim=self.decinnerdim)
        ],
                              outconcat=True,
                              inconcat=False,
                              attention=Attention(attgen, attcon),
                              innerdim=self.decinnerdim + self.encinnerdim,
                              softmaxoutblock=self.softmaxoutblock)
예제 #11
0
 def setUp(self):
     self.batsize = 70
     self.vocsize = 27
     self.encdim = 44
     self.hdim = 33
     self.statedim = 50
     self.seqlen = 30
     self.encodings_data = np.random.random((self.batsize, self.encdim)).astype("float32")
     self.sequence_data = np.random.randint(0, self.vocsize, (self.batsize, self.seqlen))
     self.dec = SeqDecoder(
         [IdxToOneHot(self.vocsize), GRU(dim=self.vocsize+self.encdim, innerdim=self.hdim), LSTM(dim=self.hdim, innerdim=self.statedim)],
         inconcat=True,
         innerdim=self.statedim
     )
예제 #12
0
class TestSimpleRNNDecoder(TestCase):
    def setUp(self):
        self.batsize = 70
        self.vocsize = 27
        self.encdim = 44
        self.hdim = 33
        self.statedim = 50
        self.seqlen = 30
        self.encodings_data = np.random.random((self.batsize, self.encdim)).astype("float32")
        self.sequence_data = np.random.randint(0, self.vocsize, (self.batsize, self.seqlen))
        self.dec = SeqDecoder(
            [IdxToOneHot(self.vocsize), GRU(dim=self.vocsize+self.encdim, innerdim=self.hdim), LSTM(dim=self.hdim, innerdim=self.statedim)],
            inconcat=True,
            innerdim=self.statedim
        )

    def test_rnndecoder_output_shape(self):
        outvals = self.dec.predict(self.encodings_data, self.sequence_data)
        self.assertEqual(outvals.shape, (self.batsize, self.seqlen, self.vocsize))
예제 #13
0
    def __init__(self,
                 wordembdim=50,
                 wordencdim=50,
                 entembdim=200,
                 innerdim=200,
                 attdim=100,
                 outdim=1e4,
                 numwords=4e5,
                 numchars=128,
                 glovepath=None,
                 **kw):
        super(FBSeqCompEncDecAtt, self).__init__(**kw)
        self.indim = wordembdim + wordencdim
        self.outdim = outdim
        self.wordembdim = wordembdim
        self.wordencdim = wordencdim
        self.encinnerdim = innerdim
        self.entembdim = entembdim
        self.decinnerdim = innerdim

        self.wordencoder = WordEncoderPlusGlove(numchars=numchars,
                                                numwords=numwords,
                                                encdim=self.wordencdim,
                                                embdim=self.wordembdim,
                                                embtrainfrac=0.0,
                                                glovepath=glovepath)

        self.rnn = RecStack(
            self.wordencoder,
            GRU(dim=wordembdim + wordencdim, innerdim=self.encinnerdim))
        attgen = LinearGateAttentionGenerator(indim=self.encinnerdim +
                                              self.decinnerdim,
                                              innerdim=attdim)
        attcon = WeightedSumAttCon()
        self.dec = SeqDecoder([
            VectorEmbed(indim=self.outdim, dim=self.entembdim),
            GRU(dim=self.entembdim, innerdim=self.decinnerdim)
        ],
                              attention=Attention(attgen, attcon),
                              outconcat=True,
                              inconcat=False,
                              innerdim=self.encinnerdim + self.decinnerdim)
예제 #14
0
    def __init__(self, wordembdim=50, wordencdim=100, entembdim=200, innerdim=200, outdim=1e4, numwords=4e5, numchars=128, glovepath=None, **kw):
        super(FBSeqCompositeEncDec, self).__init__(**kw)
        self.indim = wordembdim + wordencdim
        self.outdim = outdim
        self.wordembdim = wordembdim
        self.wordencdim = wordencdim
        self.encinnerdim = innerdim
        self.entembdim = entembdim
        self.decinnerdim = innerdim

        self.enc = SeqEncoder(
            WordEncoderPlusGlove(numchars=numchars, numwords=numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=glovepath),
            GRU(dim=self.wordembdim + self.wordencdim, innerdim=self.encinnerdim)
        )

        self.dec = SeqDecoder(
            [VectorEmbed(indim=self.outdim, dim=self.entembdim), GRU(dim=self.entembdim+self.encinnerdim, innerdim=self.decinnerdim)],
            inconcat=True,
            innerdim=self.decinnerdim,
        )
예제 #15
0
 def __init__(self,
              encdim=44,
              invocsize=500,
              outvocsize=27,
              innerdim=300,
              seqlen=20,
              **kw):
     super(idx2seq, self).__init__(**kw)
     self.invocsize = invocsize
     self.outvocsize = outvocsize
     self.innerdim = innerdim
     self.seqlen = seqlen
     self.encdim = encdim
     self.emb = VectorEmbed(indim=self.invocsize,
                            dim=self.encdim,
                            normalize=False)
     self.dec = SeqDecoder([
         IdxToOneHot(self.outvocsize),
         GRU(dim=self.outvocsize + self.encdim,
             innerdim=self.innerdim,
             nobias=True)
     ],
                           inconcat=True,
                           innerdim=self.innerdim)
예제 #16
0
class TestSimpleRNNDecoder(TestCase):
    def setUp(self):
        self.batsize = 70
        self.vocsize = 27
        self.encdim = 44
        self.hdim = 33
        self.statedim = 50
        self.seqlen = 30
        self.encodings_data = np.random.random(
            (self.batsize, self.encdim)).astype("float32")
        self.sequence_data = np.random.randint(0, self.vocsize,
                                               (self.batsize, self.seqlen))
        self.dec = SeqDecoder([
            IdxToOneHot(self.vocsize),
            GRU(dim=self.vocsize + self.encdim, innerdim=self.hdim),
            LSTM(dim=self.hdim, innerdim=self.statedim)
        ],
                              inconcat=True,
                              innerdim=self.statedim)

    def test_rnndecoder_output_shape(self):
        outvals = self.dec.predict(self.encodings_data, self.sequence_data)
        self.assertEqual(outvals.shape,
                         (self.batsize, self.seqlen, self.vocsize))
예제 #17
0
class TestAttentionRNNDecoder(TestCase):
    def setUp(self):
        vocsize = 10
        innerdim = 50
        encdim = 30
        seqlen = 5
        batsize = 77
        self.att = Attention(LinearSumAttentionGenerator(indim=innerdim + encdim), WeightedSumAttCon())
        self.decwatt = SeqDecoder(
            [IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)],
            inconcat=True,
            attention=self.att,
            innerdim=innerdim
        )
        self.decwoatt = SeqDecoder(
            [IdxToOneHot(vocsize), GRU(dim=vocsize+encdim, innerdim=innerdim)],
            inconcat=True,
            innerdim=innerdim
        )
        self.attdata = np.random.random((batsize, seqlen, encdim)).astype("float32")
        self.data = np.random.random((batsize, encdim)).astype("float32")
        self.seqdata = np.random.randint(0, vocsize, (batsize, seqlen))
        self.predshape = (batsize, seqlen, vocsize)

    def test_shape(self):
        pred = self.decwatt.predict(self.attdata, self.seqdata)
        self.assertEqual(pred.shape, self.predshape)

    def test_shape_wo_att(self):
        pred = self.decwoatt.predict(self.data, self.seqdata)
        self.assertEqual(pred.shape, self.predshape)

    def test_attentiongenerator_param_in_allparams(self):
        self.decwatt.predict(self.attdata, self.seqdata)
        allparams = self.decwatt.output.allparams
        self.assertIn(self.att.attentiongenerator.W, allparams)

    def test_attentiongenerator_param_not_in_params_of_dec_wo_att(self):
        self.decwoatt.predict(self.data, self.seqdata)
        allparams = self.decwoatt.output.allparams
        self.assertNotIn(self.att.attentiongenerator.W, allparams)
예제 #18
0
    def test_vector_out(self):
        decdim = 50
        outvocsize = 17
        outemb = IdxToOneHot(outvocsize)
        outembdim = outvocsize
        decrnus = [GRU(dim=outvocsize, innerdim=decdim)]
        dec = SeqDecoder([outemb] + decrnus,
                         innerdim=decdim * 2,
                         outconcat=True,
                         inconcat=False)

        ctxdata = np.random.random((2, decdim)).astype("float32")
        seqdata = np.asarray([[2, 3, 4], [2, 3, 4]])
        pred = dec.predict(ctxdata, seqdata)
        self.assertEqual(pred.shape, (2, 3, outvocsize))

        dec = SeqDecoder([outemb] + decrnus,
                         innerdim=decdim * 2,
                         softmaxoutblock=False,
                         outconcat=True,
                         inconcat=False)
        pred = dec.predict(ctxdata, seqdata)
        self.assertEqual(pred.shape, (2, 3, decdim * 2))