def test_output_shape(self): batsize = 100 wordembdim = 50 wordencdim = 20 innerdim = 40 datanuments = 77 vocnumwords = 100 numchars = 10 wseqlen = 3 cseqlen = 5 m = FBBasicCompositeEncoder( wordembdim=wordembdim, wordencdim=wordencdim, innerdim=innerdim, outdim=datanuments, numchars=numchars, numwords=vocnumwords, glovepath="../../../data/glove/miniglove.%dd.txt", ) worddata = np.random.randint(0, vocnumwords, (batsize, wseqlen, 1)) chardata = np.random.randint(0, numchars, (batsize, wseqlen, cseqlen)) data = np.concatenate([worddata, chardata], axis=2) predshape = m.predict(data).shape self.assertEqual(predshape, (batsize, datanuments))
def test_output_shape(self): batsize = 100 wordembdim = 50 wordencdim = 20 innerdim = 40 datanuments = 77 vocnumwords = 100 numchars = 10 wseqlen = 3 cseqlen = 5 m = FBBasicCompositeEncoder( wordembdim=wordembdim, wordencdim=wordencdim, innerdim=innerdim, outdim=datanuments, numchars=numchars, numwords=vocnumwords, glovepath="../../../data/glove/miniglove.%dd.txt", ) worddata = np.random.randint(0, vocnumwords, (batsize, wseqlen, 1)) chardata = np.random.randint(0, numchars, (batsize, wseqlen, cseqlen)) data = np.concatenate([worddata, chardata], axis=2) predshape = m.predict(data).shape self.assertEqual(predshape, (batsize, datanuments))
def run( epochs=100, lr=0.5, wreg=0.0001, numbats=100, fblexpath="../../data/freebase/labelsrevlex.map.sample", glovepath="../../data/glove/glove.6B.50d.txt", fbentdicp="../../data/freebase/entdic.all.map", numwords=10, numchars=30, wordembdim=50, wordencdim=100, innerdim=300, wordoffset=1, validinter=3, gradnorm=1.0, validsplit=100, ): tt = ticktock("fblextransrun") traindata, golddata, vocnuments, vocnumwords, datanuments = \ loadlexdata(glovepath, fbentdicp, fblexpath, wordoffset, numwords, numchars) tt.tock("made data").tick() # define model m = FBBasicCompositeEncoder( wordembdim=wordembdim, wordencdim=wordencdim, innerdim=innerdim, outdim=datanuments, numchars=128, # ASCII numwords=vocnumwords, ) #wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim) # train model TODO tt.tick("training") m.train([traindata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).cross_entropy()\ .autovalidate(splits=validsplit, random=True).validinter(validinter).accuracy()\ .train(numbats, epochs) #embed() tt.tock("trained").tick("predicting") print m.predict(traindata).shape tt.tock("predicted sample")
def run( epochs=100, lr=0.5, wreg=0.0001, numbats=100, fblexpath="../../data/freebase/labelsrevlex.map.sample", glovepath="../../data/glove/glove.6B.50d.txt", fbentdicp="../../data/freebase/entdic.all.map", numwords=10, numchars=30, wordembdim=50, wordencdim=100, innerdim=300, wordoffset=1, validinter=3, gradnorm=1.0, validsplit=100, ): tt = ticktock("fblextransrun") traindata, golddata, vocnuments, vocnumwords, datanuments = \ loadlexdata(glovepath, fbentdicp, fblexpath, wordoffset, numwords, numchars) tt.tock("made data").tick() # define model m = FBBasicCompositeEncoder( wordembdim=wordembdim, wordencdim=wordencdim, innerdim=innerdim, outdim=datanuments, numchars=128, # ASCII numwords=vocnumwords, ) #wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim) # train model TODO tt.tick("training") m.train([traindata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).cross_entropy()\ .autovalidate(splits=validsplit, random=True).validinter(validinter).accuracy()\ .train(numbats, epochs) #embed() tt.tock("trained").tick("predicting") print m.predict(traindata).shape tt.tock("predicted sample")