예제 #1
0
    def init(self):
        #memory
        wencpg = WordEncoderPlusGlove(numchars=self.numchars, numwords=self.numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=self.glovepath)
        self.memenco = SeqEncoder(
            wencpg,
            GRU(dim=self.wordembdim + self.wordencdim, innerdim=self.encinnerdim)
        )

        entemb = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.mempayload = ConcatBlock(entemb, self.memenco)
        self.memblock = MemoryBlock(self.mempayload, self.memdata, indim=self.outdim, outdim=self.encinnerdim+self.entembdim)

        #encoder
        wencpg2 = WordEncoderPlusGlove(numchars=self.numchars, numwords=self.numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=self.glovepath)
        self.enc = SeqEncoder(
            wencpg2,
            GRU(dim=self.wordembdim + self.wordencdim, innerdim=self.encinnerdim)
        )

        #decoder
        entemb2 = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.softmaxoutblock = stack(self.memaddr(self.memblock, indim=self.decinnerdim, memdim=self.memblock.outdim, attdim=self.attdim), Softmax())
        self.dec = SeqDecoder(
            [entemb2,  #self.memblock,
             GRU(dim=entemb.outdim + self.encinnerdim, innerdim=self.decinnerdim),             # GRU(dim=self.memblock.outdim + self.encinnerdim, innerdim=self.decinnerdim),
             ],
            inconcat=True,
            innerdim=self.decinnerdim,
            softmaxoutblock=self.softmaxoutblock
        )
예제 #2
0
 def __init__(self,
              numchars=256,
              charembdim=50,
              charemb=None,
              charinnerdim=100,
              numwords=1000,
              wordembdim=100,
              wordemb=None,
              wordinnerdim=200,
              maskid=None,
              bidir=False,
              returnall=False,
              **kw):
     # char level inits
     if charemb is None:
         charemb = VectorEmbed(indim=numchars, dim=charembdim)
     else:
         charemb = charemb
         charembdim = charemb.outdim
     if not issequence(charinnerdim):
         charinnerdim = [charinnerdim]
     charlayers, lastchardim = MakeRNU.make(charembdim,
                                            charinnerdim,
                                            bidir=bidir)
     charenc = SeqEncoder(charemb,
                          *charlayers).maskoptions(maskid, MaskMode.AUTO)
     # word level inits
     if wordemb is None:
         wordemb = VectorEmbed(indim=numwords, dim=wordembdim)
     elif wordemb is False:
         wordemb = None
         wordembdim = 0
     else:
         wordemb = wordemb
         wordembdim = wordemb.outdim
     if not issequence(wordinnerdim):
         wordinnerdim = [wordinnerdim]
     wordlayers, outdim = MakeRNU.make(wordembdim + lastchardim,
                                       wordinnerdim,
                                       bidir=bidir)
     wordenc = SeqEncoder(None, *wordlayers).maskoptions(MaskMode.NONE)
     if returnall:
         wordenc.all_outputs()
     self.outdim = outdim
     super(WordCharSentEnc, self).__init__(l1enc=charenc,
                                           l2emb=wordemb,
                                           l2enc=wordenc,
                                           maskid=maskid)
예제 #3
0
 def __init__(self, indim=1000, dim=50, outdim=None, normalize=False, **kw):
     super(Dummy, self).__init__(**kw)
     self.dim = dim
     self.indim = indim
     self.W = VectorEmbed(indim=indim, dim=dim, normalize=normalize)
     self.outdim = indim if outdim is None else outdim
     self.O = param((dim, self.outdim), lrmul=1.).glorotuniform()
예제 #4
0
 def __init__(self,
              indim=500,
              inpembdim=100,
              inpemb=None,
              innerdim=200,
              bidir=False,
              maskid=None,
              zoneout=False,
              dropout_in=False,
              dropout_h=False,
              **kw):
     if inpemb is None:
         inpemb = VectorEmbed(indim=indim, dim=inpembdim, maskid=maskid)
     elif inpemb is False:
         inpemb = None
     else:
         inpembdim = inpemb.outdim
     if not issequence(innerdim):
         innerdim = [innerdim]
     layers, _ = MakeRNU.make(inpembdim,
                              innerdim,
                              bidir=bidir,
                              zoneout=zoneout,
                              dropout_in=dropout_in,
                              dropout_h=dropout_h)
     super(RNNSeqEncoder, self).__init__(inpemb, *layers, **kw)
예제 #5
0
 def __init__(self,
              indim=400,
              inpembdim=50,
              inpemb=None,
              mode="concat",
              innerdim=100,
              numouts=1,
              maskid=0,
              bidir=False,
              maskmode=MaskMode.NONE,
              **kw):
     super(SimpleSeq2MultiVec, self).__init__(**kw)
     if inpemb is None:
         if inpembdim is None:
             inpemb = IdxToOneHot(indim)
             inpembdim = indim
         else:
             inpemb = VectorEmbed(indim=indim, dim=inpembdim)
     elif inpemb is False:
         inpemb = None
     else:
         inpembdim = inpemb.outdim
     if not issequence(innerdim):
         innerdim = [innerdim]
     innerdim[-1] += numouts
     rnn, lastdim = self.makernu(inpembdim, innerdim, bidir=bidir)
     self.outdim = lastdim * numouts
     self.maskid = maskid
     self.inpemb = inpemb
     self.numouts = numouts
     self.mode = mode
     if not issequence(rnn):
         rnn = [rnn]
     self.enc = SeqEncoder(inpemb, *rnn).maskoptions(maskid, maskmode)
     self.enc.all_outputs()
예제 #6
0
파일: enc.py 프로젝트: Natty307/teafacto
 def __init__(self,
              indim=400,
              inpembdim=50,
              inpemb=None,
              innerdim=100,
              maskid=0,
              bidir=False,
              pool=False,
              **kw):
     if inpemb is False:
         inpemb = None
     elif inpemb is None:
         if inpembdim is None:
             inpemb = IdxToOneHot(indim)
             inpembdim = indim
         else:
             inpemb = VectorEmbed(indim=indim, dim=inpembdim)
     else:
         inpembdim = inpemb.outdim
     rnn, lastdim = self.makernu(inpembdim, innerdim, bidir=bidir)
     self.outdim = lastdim
     poolblock = None if pool is False else Pool(
         (None, ), axis=(1, ), mode="max")
     super(SimpleSeq2Vec, self).__init__(inpemb,
                                         rnn,
                                         maskid=maskid,
                                         pool=poolblock,
                                         **kw)
예제 #7
0
    def __init__(self,
                 wordembdim=50,
                 entembdim=200,
                 innerdim=200,
                 attdim=100,
                 outdim=1e4,
                 numwords=4e5,
                 **kw):
        super(FBSeqSimpEncDecAtt, self).__init__(**kw)
        self.indim = wordembdim
        self.outdim = outdim
        self.wordembdim = wordembdim
        self.encinnerdim = innerdim
        self.decinnerdim = innerdim
        self.entembdim = entembdim

        self.wordencoder = WordEmbed(indim=numwords,
                                     outdim=self.wordembdim,
                                     trainfrac=1.0)
        self.rnn = RecStack(
            self.wordencoder,
            GRU(dim=self.wordembdim, innerdim=self.encinnerdim))

        attgen = LinearGateAttentionGenerator(indim=self.encinnerdim +
                                              self.decinnerdim,
                                              attdim=attdim)
        attcon = WeightedSumAttCon()
        self.dec = SeqDecoder([
            VectorEmbed(indim=self.outdim, dim=self.entembdim),
            GRU(dim=self.entembdim, innerdim=self.decinnerdim)
        ],
                              attention=Attention(attgen, attcon),
                              outconcat=True,
                              inconcat=False,
                              innerdim=self.encinnerdim + self.decinnerdim)
예제 #8
0
파일: enc.py 프로젝트: Natty307/teafacto
 def __init__(self,
              indim=400,
              inpembdim=50,
              inpemb=None,
              innerdim=100,
              maskid=None,
              bidir=False,
              **kw):
     if inpemb is None:
         if inpembdim is None:
             inpemb = IdxToOneHot(indim)
             inpembdim = indim
         else:
             inpemb = VectorEmbed(indim=indim, dim=inpembdim)
     else:
         inpembdim = inpemb.outdim
     lastdim = inpembdim
     if not issequence(innerdim):  # single encoder
         innerdim = [innerdim]
     rnns = []
     for innerdimi in innerdim:
         if not issequence(innerdimi):  # one layer in encoder
             innerdimi = [innerdimi]
         rnn, lastdim = MakeRNU.make(lastdim, innerdimi, bidir=bidir)
         rnns.append(rnn)
     self.outdim = lastdim
     super(SimpleSeqStar2Vec, self).__init__(inpemb,
                                             *rnns,
                                             maskid=maskid,
                                             **kw)
예제 #9
0
 def setUp(self):
     dim = 50
     self.vocabsize = 2000
     data = np.arange(0, self.vocabsize).astype("int32")
     self.O = param((dim, self.vocabsize)).uniform()
     self.W = VectorEmbed(indim=self.vocabsize, dim=50)
     self.out = stack(self.W, asblock(lambda x: T.dot(self.O, x)),
                      Softmax())(Input(ndim=1, dtype="int32"))
예제 #10
0
 def __init__(self, invocsize=27, outvocsize=500, innerdim=300, **kw):
     super(seq2idx, self).__init__(**kw)
     self.invocsize = invocsize
     self.outvocsize = outvocsize
     self.innerdim = innerdim
     self.enc = SeqEncoder(
         VectorEmbed(indim=self.invocsize, dim=self.invocsize),
         GRU(dim=self.invocsize, innerdim=self.innerdim))
     self.outlin = Lin(indim=self.innerdim, dim=self.outvocsize)
예제 #11
0
 def __init__(self,
              indim=4000,
              outdim=100,
              embdim=50,
              embtrainfrac=0.0,
              **kw):
     super(WordEmbedPlusGlove, self).__init__(indim, outdim + embdim, **kw)
     self.glove = Glove(embdim, vocabsize=indim,
                        trainfrac=embtrainfrac).block
     self.emb = VectorEmbed(indim=indim, dim=outdim)
예제 #12
0
    def __init__(
            self,
            entembdim=50,
            wordembdim=50,
            wordencdim=100,
            memdata=None,
            attdim=100,
            numchars=128,  # number of different chars
            numwords=4e5,  # number of different words
            glovepath=None,
            innerdim=100,  # dim of memory payload encoder output
            outdim=1e4,  # number of entities
            memaddr=DotMemAddr,
            **kw):
        super(FBMemMatch, self).__init__(**kw)
        self.wordembdim = wordembdim
        self.wordencdim = wordencdim
        self.entembdim = entembdim
        self.attdim = attdim
        self.encinnerdim = innerdim
        self.outdim = outdim

        memaddr = TransDotMemAddr

        # memory encoder per word
        #wencpg = WordEmbed(indim=numwords, outdim=self.wordembdim, trainfrac=1.0)
        wordencoder = WordEncoderPlusGlove(numchars=numchars,
                                           numwords=numwords,
                                           encdim=self.wordencdim,
                                           embdim=self.wordembdim,
                                           embtrainfrac=0.0,
                                           glovepath=glovepath)

        # memory encoder for one cell
        self.phraseencoder = SeqEncoder(
            wordencoder,
            GRU(dim=self.wordembdim + self.wordencdim,
                innerdim=self.encinnerdim))
        # entity embedder
        entemb = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.entembs = entemb(
            memdata[0])  #Val(np.arange(0, self.outdim, dtype="int32")))
        # memory block
        self.mempayload = self.phraseencoder  #ConcatBlock(entemb, self.phraseencoder)
        self.memblock = MemoryBlock(
            self.mempayload,
            memdata[1],
            indim=self.outdim,
            outdim=self.encinnerdim)  # + self.entembdim)
        # memory addressing
        self.mema = memaddr(self.memblock,
                            memdim=self.memblock.outdim,
                            attdim=attdim,
                            indim=self.encinnerdim)
예제 #13
0
 def __init__(self,
              indim=400,
              outdim=50,
              inpembdim=50,
              outembdim=50,
              innerdim=100,
              **kw):
     self.inpemb = VectorEmbed(indim=indim, dim=inpembdim)
     self.outemb = VectorEmbed(indim=outdim, dim=outembdim)
     self.rnn = []
     if not issequence(innerdim):
         innerdim = [innerdim]
     innerdim = [inpembdim + outembdim] + innerdim
     self.rnn = MakeRNU.fromdims(innerdim)[0]
     super(SimpleSeqTransDec, self).__init__(self.inpemb,
                                             self.outemb,
                                             *self.rnn,
                                             smodim=innerdim[-1],
                                             outdim=outdim,
                                             **kw)
예제 #14
0
 def __init__(self, indim=400, embdim=50, innerdim=100, outdim=50, **kw):
     self.emb = VectorEmbed(indim=indim, dim=embdim)
     if not issequence(innerdim):
         innerdim = [innerdim]
     innerdim = [embdim] + innerdim
     self.rnn = self.getrnnfrominnerdim(innerdim)
     super(SimpleSeqTransducer, self).__init__(self.emb,
                                               *self.rnn,
                                               smodim=innerdim[-1],
                                               outdim=outdim,
                                               **kw)
예제 #15
0
 def getemb(emb=None, embdim=None, vocsize=None, maskid=-1):
     if emb is False:
         assert (embdim is not None)
         return None, embdim
     elif emb is not None:
         return emb, emb.outdim
     else:
         if embdim is None:
             return IdxToOneHot(vocsize), vocsize
         else:
             return VectorEmbed(indim=vocsize, dim=embdim,
                                maskid=maskid), embdim
예제 #16
0
    def test_ns_training(self):
        num = 2000
        self.expshape = (num, 50)
        Glove.defaultpath = "../../../data/glove/miniglove.%dd.txt"
        self.glove = Glove(self.expshape[1], self.expshape[0])
        self.cemb = VectorEmbed(indim=self.expshape[0] + 1,
                                dim=self.expshape[1])
        self.assertRaises(Exception, self.glove.block.predict, [num + 1])
        self.assertRaises(Exception, self.cemb.predict, [num + 1])

        m = MatchScore(self.glove.block, self.cemb, scorer=CosineDistance())
        mg = MatchScore(self.glove.block,
                        self.glove.block)  # TODO factor out matchscore tests
        idxs = np.arange(num + 1)

        # glove against glove
        self.assertTrue(
            np.allclose(mg.predict([num, 100], [num, 100]), [
                np.linalg.norm(self.glove % num)**2,
                np.linalg.norm(self.glove % 100)**2
            ]))

        class NegIdxGen():
            def __init__(self, num):
                self.n = num

            def __call__(self, l, r):
                return l, np.random.randint(0, self.n, r.shape)

        vdata = np.arange(num)
        negrate = 5

        def obj(p, n):
            return n - p
        m, err, verr, _, _ = m.nstrain([idxs, idxs]).negsamplegen(NegIdxGen(num+1)).negrate(negrate)\
            .adagrad(lr=0.1).objective(obj) \
            .validate_on([vdata, vdata]).extvalid(geteval(m.predict, num, negrate)).validinter(30) \
            .train(numbats=50, epochs=29, returnerrors=True)
        #.writeresultstofile("testingresultswriter.tsv") \

        tdata = np.arange(num)
        tt = ticktock("eval")
        tt.tick()
        mrr, recat1, recat10 = geteval(m.predict, num, 1)(tdata)
        tt.tock("evaluated test data")
        print "%.4f MRR,\t%.4f MR@10,\t%.4f MR@1" % (mrr, recat10, recat1)
        self.assertGreater(mrr, 0.85)
        self.assertGreater(recat10, 0.9)
        print verr
        self.assertTrue(
            np.allclose(np.asarray([mrr, recat1, recat10]),
                        np.asarray(verr[-1][1:])))
예제 #17
0
    def test_seq_scoring(self):
        vocsize = 100
        dim = 10
        numsam = 17
        seqlen = 5
        ve = VectorEmbed(vocsize, dim)
        m = SeqMatchScore(SeqUnroll(ve), SeqUnroll(ve), scorer=CosineDistance())

        data = np.random.randint(0, vocsize, (numsam, seqlen))
        #print data.shape
        pred = m.predict(data, data)
        #print pred
        self.assertTrue(np.allclose(np.ones_like(pred)*seqlen*1., pred))
예제 #18
0
    def init(self):
        #MEMORY: encodes how entity is written + custom entity embeddings
        wencpg = WordEncoderPlusGlove(numchars=self.numchars,
                                      numwords=self.numwords,
                                      encdim=self.wordencdim,
                                      embdim=self.wordembdim,
                                      embtrainfrac=0.0,
                                      glovepath=self.glovepath)
        self.memenco = SeqEncoder(
            wencpg,
            GRU(dim=self.wordembdim + self.wordencdim,
                innerdim=self.encinnerdim))

        entemb = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.mempayload = ConcatBlock(entemb, self.memenco)
        self.memblock = MemoryBlock(self.mempayload,
                                    self.memdata,
                                    indim=self.outdim,
                                    outdim=self.encinnerdim + self.entembdim)

        #ENCODER: uses the same language encoder as memory
        #wencpg2 = WordEncoderPlusGlove(numchars=self.numchars, numwords=self.numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=glovepath)
        self.enc = RecStack(
            wencpg,
            GRU(dim=self.wordembdim + self.wordencdim,
                innerdim=self.encinnerdim))

        #ATTENTION
        attgen = LinearGateAttentionGenerator(indim=self.encinnerdim +
                                              self.decinnerdim,
                                              innerdim=self.attdim)
        attcon = WeightedSumAttCon()

        #DECODER
        #entemb2 = VectorEmbed(indim=self.outdim, dim=self.entembdim)
        self.softmaxoutblock = stack(
            self.memaddr(self.memblock,
                         indim=self.decinnerdim + self.encinnerdim,
                         memdim=self.memblock.outdim,
                         attdim=self.attdim), Softmax())

        self.dec = SeqDecoder([
            self.memblock,
            GRU(dim=self.entembdim + self.encinnerdim,
                innerdim=self.decinnerdim)
        ],
                              outconcat=True,
                              inconcat=False,
                              attention=Attention(attgen, attcon),
                              innerdim=self.decinnerdim + self.encinnerdim,
                              softmaxoutblock=self.softmaxoutblock)
예제 #19
0
 def __init__(self, encdim, invocsize, outvocsize, innerdim, seqlen, **kw):
     super(idx2seqStupid, self).__init__(**kw)
     self.encdim = encdim
     self.invocsize = invocsize
     self.outvocsize = outvocsize
     self.innerdim = innerdim
     self.seqlen = seqlen
     self.emb = VectorEmbed(indim=self.invocsize,
                            dim=self.encdim,
                            normalize=True)
     self.aletter = stack(Lin(indim=self.encdim, dim=self.outvocsize),
                          Softmax())
     self.bletter = stack(Lin(indim=self.encdim, dim=self.outvocsize),
                          Softmax())
     self.cletter = stack(Lin(indim=self.encdim, dim=self.outvocsize),
                          Softmax())
예제 #20
0
 def getdeclayers(self, outembdim, outvocsize, lastencinnerdim, decinnerdim,
                  rnu, inconcat):
     if outembdim is None:
         outemb = IdxToOneHot(outvocsize)
         outembdim = outvocsize
     elif isinstance(outembdim, Block):
         outemb = outembdim
         outembdim = outemb.outdim
     else:
         outemb = VectorEmbed(indim=outvocsize, dim=outembdim)
     decrnus = []
     firstdecdim = outembdim + lastencinnerdim if inconcat else outembdim
     dims = [firstdecdim] + decinnerdim
     i = 1
     while i < len(dims):
         decrnus.append(rnu(dim=dims[i - 1], innerdim=dims[i]))
         i += 1
     declayers = [outemb] + decrnus
     return declayers
예제 #21
0
    def __init__(self,
                 wordembdim=50,
                 wordencdim=50,
                 entembdim=200,
                 innerdim=200,
                 attdim=100,
                 outdim=1e4,
                 numwords=4e5,
                 numchars=128,
                 glovepath=None,
                 **kw):
        super(FBSeqCompEncDecAtt, self).__init__(**kw)
        self.indim = wordembdim + wordencdim
        self.outdim = outdim
        self.wordembdim = wordembdim
        self.wordencdim = wordencdim
        self.encinnerdim = innerdim
        self.entembdim = entembdim
        self.decinnerdim = innerdim

        self.wordencoder = WordEncoderPlusGlove(numchars=numchars,
                                                numwords=numwords,
                                                encdim=self.wordencdim,
                                                embdim=self.wordembdim,
                                                embtrainfrac=0.0,
                                                glovepath=glovepath)

        self.rnn = RecStack(
            self.wordencoder,
            GRU(dim=wordembdim + wordencdim, innerdim=self.encinnerdim))
        attgen = LinearGateAttentionGenerator(indim=self.encinnerdim +
                                              self.decinnerdim,
                                              innerdim=attdim)
        attcon = WeightedSumAttCon()
        self.dec = SeqDecoder([
            VectorEmbed(indim=self.outdim, dim=self.entembdim),
            GRU(dim=self.entembdim, innerdim=self.decinnerdim)
        ],
                              attention=Attention(attgen, attcon),
                              outconcat=True,
                              inconcat=False,
                              innerdim=self.encinnerdim + self.decinnerdim)
예제 #22
0
 def __init__(self,
              indim=400,
              embdim=50,
              inpemb=None,
              innerdim=100,
              outdim=50,
              rnu=GRU,
              **kw):
     if inpemb is None:
         emb = VectorEmbed(indim=indim, dim=embdim)
     else:
         emb = inpemb
         embdim = emb.outdim
     if not issequence(innerdim):
         innerdim = [innerdim]
     innerdim = [embdim] + innerdim
     rnn, _ = MakeRNU.fromdims(innerdim, rnu=rnu)
     smo = Lin(indim=innerdim[-1], dim=outdim)
     super(SimpleSeqTrans, self).__init__(emb, *(rnn + [smo, Softmax()]),
                                          **kw)
예제 #23
0
    def __init__(self, wordembdim=50, wordencdim=100, entembdim=200, innerdim=200, outdim=1e4, numwords=4e5, numchars=128, glovepath=None, **kw):
        super(FBSeqCompositeEncDec, self).__init__(**kw)
        self.indim = wordembdim + wordencdim
        self.outdim = outdim
        self.wordembdim = wordembdim
        self.wordencdim = wordencdim
        self.encinnerdim = innerdim
        self.entembdim = entembdim
        self.decinnerdim = innerdim

        self.enc = SeqEncoder(
            WordEncoderPlusGlove(numchars=numchars, numwords=numwords, encdim=self.wordencdim, embdim=self.wordembdim, embtrainfrac=0.0, glovepath=glovepath),
            GRU(dim=self.wordembdim + self.wordencdim, innerdim=self.encinnerdim)
        )

        self.dec = SeqDecoder(
            [VectorEmbed(indim=self.outdim, dim=self.entembdim), GRU(dim=self.entembdim+self.encinnerdim, innerdim=self.decinnerdim)],
            inconcat=True,
            innerdim=self.decinnerdim,
        )
예제 #24
0
    def test_mask_propagation_all_states(self):
        m = SeqEncoder(VectorEmbed(maskid=0, indim=100, dim=7),
                       GRU(dim=7, innerdim=30)).all_outputs()\
            .maskoptions(MaskSetMode.ZERO)
        data = np.random.randint(1, 100, (5, 3), dtype="int32")
        ndata = np.zeros_like(data)
        data = np.concatenate([data, ndata], axis=1)

        dataval = Val(data)
        embvar = m.embedder(dataval)
        embpred = embvar.eval()
        embmaskpred = embvar.mask.eval()

        encvar = m(dataval)
        encpred = encvar.eval()
        encmaskpred = encvar.mask.eval()
        print encpred.shape
        print encmaskpred.shape
        print encmaskpred
        self.assertTrue(np.sum(encmaskpred - embmaskpred) == 0)
예제 #25
0
 def __init__(self,
              indim=500,
              inpembdim=100,
              inpemb=None,
              innerdim=200,
              window=5,
              poolmode="max",
              activation=Tanh,
              maskid=None,
              **kw):
     if inpemb is None:
         self.embedder = VectorEmbed(indim, inpembdim, maskid=maskid)
     else:
         self.embedder = inpemb
         inpembdim = inpemb.outdim
     super(CNNSeqEncoder, self).__init__(indim=inpembdim,
                                         innerdim=innerdim,
                                         window=window,
                                         poolmode=poolmode,
                                         activation=activation,
                                         **kw)
예제 #26
0
 def __init__(self,
              indim=400,
              embdim=50,
              inpemb=None,
              innerdim=100,
              outdim=50,
              rnu=GRU,
              **kw):
     if inpemb is None:
         self.emb = VectorEmbed(indim=indim, dim=embdim)
     else:
         self.emb = inpemb
         embdim = self.emb.outdim
     if not issequence(innerdim):
         innerdim = [innerdim]
     innerdim = [embdim] + innerdim
     self.rnn = MakeRNU.fromdims(innerdim, rnu=rnu)[0]
     super(SimpleSeqTransducer, self).__init__(self.emb,
                                               *self.rnn,
                                               smodim=innerdim[-1],
                                               outdim=outdim,
                                               **kw)
예제 #27
0
def run(epochs=50,
        numbats=25,
        lr=0.1,
        layers=1,
        embdim=100,
        encdim=200,
        bidir=False,
        mode="wordchar",        # "char" or "word" or "wordchar"
        maxlen=75,
        maxwordlen=15,
        ):
    maskid = -1
    (traindata, traingold), (testdata, testgold), dic = \
        readdata("../../../data/hatespeech/train.csv",
                 "../../../data/hatespeech/test.csv",
                 masksym=maskid, mode=mode, maxlen=maxlen)

    # data stats
    print "class distribution in train: {}% positive".format(np.sum(traingold)*1. / np.sum(np.ones_like(traingold)))
    print "class distribution in test: {}% positive".format(np.sum(testgold)*1. / np.sum(np.ones_like(testgold)))

    inpemb = VectorEmbed(indim=len(dic), dim=embdim)
    encdim = [encdim] * layers
    if mode == "wordchar":
        enc = WordCharSentEnc(charemb=inpemb, charinnerdim=embdim,
                              wordemb=False, wordinnerdim=encdim,
                              maskid=maskid, bidir=bidir)
    else:
        enc = SimpleSeq2Vec(inpemb=inpemb, innerdim=encdim, maskid=maskid, bidir=bidir)

    m = SMOWrap(enc, outdim=2, nobias=True)
    #print enc.predict(traindata[:5, :])
    m = m.train([traindata], traingold)\
        .adadelta(lr=lr).grad_total_norm(1.0)\
        .cross_entropy().split_validate(6, random=True).cross_entropy().accuracy()\
        .train(numbats=numbats, epochs=epochs)

    m.save("hatemodel.{}.Emb{}D.Enc{}D.{}L.model".format(mode, embdim, encdim, layers))
예제 #28
0
 def getenclayers(self, inpembdim, inpvocsize, encinnerdim, bidir, rnu):
     if inpembdim is None:
         inpemb = IdxToOneHot(inpvocsize)
         inpembdim = inpvocsize
     elif isinstance(inpembdim, Block):
         inpemb = inpembdim
         inpembdim = inpemb.outdim
     else:
         inpemb = VectorEmbed(indim=inpvocsize, dim=inpembdim)
     encrnus = []
     dims = [inpembdim] + encinnerdim
     #print dims
     i = 1
     lastencinnerdim = dims[-1] if not bidir else dims[-1] * 2
     while i < len(dims):
         if bidir:
             newrnu = BiRNU.fromrnu(rnu, dim=dims[i - 1], innerdim=dims[i])
         else:
             newrnu = rnu(dim=dims[i - 1], innerdim=dims[i])
         encrnus.append(newrnu)
         i += 1
     enclayers = [inpemb] + encrnus
     return enclayers, lastencinnerdim
예제 #29
0
 def __init__(self,
              encdim=44,
              invocsize=500,
              outvocsize=27,
              innerdim=300,
              seqlen=20,
              **kw):
     super(idx2seq, self).__init__(**kw)
     self.invocsize = invocsize
     self.outvocsize = outvocsize
     self.innerdim = innerdim
     self.seqlen = seqlen
     self.encdim = encdim
     self.emb = VectorEmbed(indim=self.invocsize,
                            dim=self.encdim,
                            normalize=False)
     self.dec = SeqDecoder([
         IdxToOneHot(self.outvocsize),
         GRU(dim=self.outvocsize + self.encdim,
             innerdim=self.innerdim,
             nobias=True)
     ],
                           inconcat=True,
                           innerdim=self.innerdim)
예제 #30
0
 def __init__(self, embdim, numents, numrels, **kw):
     self.A = VectorEmbed(indim=numents, dim=embdim, normalize=True)
     self.R = VectorEmbed(indim=numrels, dim=embdim, normalize=True)
     self.scorer = EuclideanDistance()
     super(TransE, self).__init__(**kw)