def test_seqdecatt( # seems to work wreg=0.00001, # TODO: regularization other than 0.0001 first stagnates, then goes down epochs=50, numbats=20, lr=0.1, statedim=50, encdim=50, attdim=50, startsym=0, ): # get words vocsize = 27 embdim = 50 lm = Glove(embdim, 2000) allwords = filter(lambda x: re.match("^[a-z]+$", x), lm.D.keys()) words = allwords[1000:] vwords = allwords[:1000] data = words2ints(words) sdata = shiftdata(data) vdata = words2ints(vwords) svdata = shiftdata(vdata) testneglogprob = 17 testpred = ["the", "alias", "mock", "test", "stalin", "allahuakbar", "python", "pythonista", " " * (data.shape[1])] testpred = words2ints(testpred) print testpred block = SimpleSeqEncDecAtt(inpvocsize=vocsize, outvocsize=vocsize, encdim=encdim, decdim=statedim, attdim=attdim, inconcat=False) s = SeqEncDecSearch(block) pred, probs = s.decode(testpred, startsym, testpred.shape[1]) print ints2words(pred), probs
def run_seqdecatt( # seems to work wreg=0.00001, epochs=50, numbats=50, lr=0.1, statedim=50, encdim=50, attdim=50, numwords=5000, ): # get words vocsize = 28 lm = Glove(50, numwords) allwords = filter(lambda x: re.match("^[a-z]+$", x), lm.D.keys()) #embed() invwords = [word[::-1] for word in allwords] data = words2ints(allwords) idata = words2ints(invwords) startsym = 0 golddata = data #golddata = idata print data[:10] print shiftdata(data, startsym)[:10] testwords = [ "the", "alias", "mock", "test", "stalin", "allahuakbar", "python", "pythonista" ] testpred = words2ints(testwords) block = SimpleSeqEncDecAtt(inpvocsize=vocsize, outvocsize=vocsize, encdim=encdim, decdim=statedim, attdim=attdim, inconcat=False, bidir=False, statetrans=None) block.train([data, shiftdata(golddata, startsym)], golddata).seq_cross_entropy().grad_total_norm(1.0).adagrad(lr=lr).l2(wreg) \ .split_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(2) \ .train(numbats=numbats, epochs=epochs) s = SeqEncDecSearch(block) pred, probs = s.decode(testpred, startsym, testpred.shape[1]) print ints2words(pred), probs
def test_seqdecatt( # seems to work wreg=0.00001, # TODO: regularization other than 0.0001 first stagnates, then goes down epochs=50, numbats=20, lr=0.1, statedim=50, encdim=50, attdim=50, startsym=0, ): # get words vocsize = 27 embdim = 50 lm = Glove(embdim, 2000) allwords = filter(lambda x: re.match("^[a-z]+$", x), lm.D.keys()) words = allwords[1000:] vwords = allwords[:1000] data = words2ints(words) sdata = shiftdata(data) vdata = words2ints(vwords) svdata = shiftdata(vdata) testneglogprob = 17 testpred = [ "the", "alias", "mock", "test", "stalin", "allahuakbar", "python", "pythonista", " " * (data.shape[1]) ] testpred = words2ints(testpred) print testpred block = SimpleSeqEncDecAtt(inpvocsize=vocsize, outvocsize=vocsize, encdim=encdim, decdim=statedim, attdim=attdim, inconcat=False) s = SeqEncDecSearch(block) pred, probs = s.decode(testpred, startsym, testpred.shape[1]) print ints2words(pred), probs
def run_seqdecatt( # seems to work wreg=0.00001, epochs=50, numbats=50, lr=0.1, statedim=50, encdim=50, attdim=50, numwords=5000, ): # get words vocsize = 28 lm = Glove(50, numwords) allwords = filter(lambda x: re.match("^[a-z]+$", x), lm.D.keys()) #embed() invwords = [word[::-1] for word in allwords] data = words2ints(allwords) idata = words2ints(invwords) startsym = 0 golddata = data #golddata = idata print data[:10] print shiftdata(data, startsym)[:10] testwords = ["the", "alias", "mock", "test", "stalin", "allahuakbar", "python", "pythonista"] testpred = words2ints(testwords) block = SimpleSeqEncDecAtt(inpvocsize=vocsize, outvocsize=vocsize, encdim=encdim, decdim=statedim, attdim=attdim, inconcat=False, bidir=False, statetrans=None) block.train([data, shiftdata(golddata, startsym)], golddata).seq_cross_entropy().grad_total_norm(1.0).adagrad(lr=lr).l2(wreg) \ .split_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(2) \ .train(numbats=numbats, epochs=epochs) s = SeqEncDecSearch(block) pred, probs = s.decode(testpred, startsym, testpred.shape[1]) print ints2words(pred), probs