Exemple #1
0
 def test_idx2seq_shape(self):
     b = idx2seq(encdim=self.encdim,
                 invocsize=self.numwords,
                 outvocsize=self.numchars,
                 seqlen=self.seqlen,
                 innerdim=self.innerdim)
     data = np.random.randint(0, self.numwords, (self.batsize, ))
     pdata = np.random.randint(0, self.numchars,
                               (self.batsize, self.seqlen))
     p = b.predict(data, pdata)
     self.assertEqual(p.shape, (self.batsize, self.seqlen, self.numchars))
Exemple #2
0
    def setUp(self):
        wreg = 0.001
        epochs = 3
        numbats = 10
        lr = 0.1
        statedim = 70
        encdim = 70
        # get words
        numchars = 27
        embdim = 50
        Glove.defaultpath = "../../../data/glove/miniglove.%dd.txt"
        lm = Glove(embdim, 1000)
        words = filter(lambda x: re.match("^[a-z]+$", x), lm.D.keys())
        data = words2ints(words)
        sdata = shiftdata(data)
        wordidxs = np.arange(0, len(words))
        numwords = wordidxs.shape[0]
        print "random seq neg log prob %.3f" % math.log(numchars**
                                                        data.shape[1])
        testneglogprob = 17
        print "%.2f neg log prob for a whole sequence is %.3f prob per slot" % (
            testneglogprob, math.exp(-testneglogprob * 1. / data.shape[1]))

        testpred = wordidxs[:15]
        testdata = data[:15]
        testsdata = sdata[:15]
        print testpred
        print testdata
        print testsdata
        #testpred = words2ints(testpred)
        block = idx2seq(encdim=encdim,
                        invocsize=numwords,
                        outvocsize=numchars,
                        innerdim=statedim,
                        seqlen=data.shape[1])
        print np.argmax(block.predict(testpred, testsdata), axis=2)
        self.block_before_training_frozen = block.freeze()
        block.train([wordidxs, sdata], data).seq_cross_entropy().grad_total_norm(0.5).adagrad(lr=lr).l2(wreg)\
             .autovalidate().seq_accuracy().validinter(5)\
             .train(numbats=numbats, epochs=epochs)
        self.block_after_training_frozen = block.freeze()
        pred = block.predict(testpred, testsdata)
Exemple #3
0
    def setUp(self):
        wreg=0.001
        epochs=3
        numbats=10
        lr=0.1
        statedim=70
        encdim=70
        # get words
        numchars = 27
        embdim = 50
        Glove.defaultpath = "../../../data/glove/miniglove.%dd.txt"
        lm = Glove(embdim, 1000)
        words = filter(lambda x: re.match("^[a-z]+$", x), lm.D.keys())
        data = words2ints(words)
        sdata = shiftdata(data)
        wordidxs = np.arange(0, len(words))
        numwords = wordidxs.shape[0]
        print "random seq neg log prob %.3f" % math.log(numchars**data.shape[1])
        testneglogprob = 17
        print "%.2f neg log prob for a whole sequence is %.3f prob per slot" % (testneglogprob, math.exp(-testneglogprob*1./data.shape[1]))

        testpred = wordidxs[:15]
        testdata = data[:15]
        testsdata = sdata[:15]
        print testpred
        print testdata
        print testsdata
        #testpred = words2ints(testpred)
        block = idx2seq(encdim=encdim, invocsize=numwords, outvocsize=numchars, innerdim=statedim, seqlen=data.shape[1])
        print np.argmax(block.predict(testpred, testsdata), axis=2)
        self.block_before_training_frozen = block.freeze()
        block.train([wordidxs, sdata], data).seq_cross_entropy().grad_total_norm(0.5).adagrad(lr=lr).l2(wreg)\
             .autovalidate().seq_accuracy().validinter(5)\
             .train(numbats=numbats, epochs=epochs)
        self.block_after_training_frozen = block.freeze()
        pred = block.predict(testpred, testsdata)
Exemple #4
0
 def test_idx2seq_shape(self):
     b = idx2seq(encdim=self.encdim, invocsize=self.numwords, outvocsize=self.numchars, seqlen=self.seqlen, innerdim=self.innerdim)
     data = np.random.randint(0, self.numwords, (self.batsize, ))
     pdata = np.random.randint(0, self.numchars, (self.batsize, self.seqlen))
     p = b.predict(data, pdata)
     self.assertEqual(p.shape, (self.batsize, self.seqlen, self.numchars))