print np.vectorize(lambda x: reventdic[x] if x in reventdic else None)( np.argmax(pred, axis=1) - 1) tt.tock("predicted sample") tt.tick("training") m.train([lexdata[1:151]], entids[1:151]).adagrad(lr=lr).cross_entropy().grad_total_norm(0.5)\ .split_validate(5, random=True).validinter(validinter).accuracy()\ .train(numbats, epochs) else: #embed() tt.tick("predicting") print traindata[:5].shape, outdata[:5].shape pred = m.predict(traindata[:5], outdata[:5]) print np.argmax(pred, axis=2) - 1 print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") tt.tick("training") m.train([traindata, outdata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).seq_cross_entropy()\ .split_validate(splits=5, random=False).validinter(validinter).seq_accuracy().seq_cross_entropy()\ .train(numbats, epochs) #embed() tt.tock("trained").tick("predicting") pred = m.predict(traindata[:50], outdata[:50]) print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") if __name__ == "__main__": argprun(run, model="mem att")
m = SimpleSeqEncDecAtt( inpvocsize=numwords, inpembdim=wordembdim, outvocsize=numlabels, outembdim=lablembdim, encdim=innerdim, decdim=innerdim, attdim=attdim, inconcat=False ) # training m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(1.).seq_cross_entropy().l2(wreg)\ .validate_on([testdata, shiftdata(testgold), testmask], testgold).seq_cross_entropy().seq_accuracy().validinter(validinter)\ .train(numbats, epochs) # predict after training s = SeqEncDecAttSearch(m) testpred = s.decode(testdata) testpred = testpred * testmask #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask) #testpred = np.argmax(testpredprobs, axis=2)-1 #testpred = testpred * testmask #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred) evalres = atiseval(testpred-1, testgold-1, label2idxrev); print evalres if __name__ == "__main__": argprun(run, epochs=1)
for k, v in entdic.items(): reventdic[v] = k # embed() outdata = shiftdata(golddata) tt.tick("predicting") print traindata[:5].shape, outdata[:5].shape #print golddata[:5] ; exit() pred = m.predict(traindata[:5], outdata[:5]) print np.argmax(pred, axis=2) - 1 print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") tt.tick("training") m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \ .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \ .train(numbats, epochs) # embed() tt.tock("trained").tick("predicting") pred = m.predict(validdata, shiftdata(validgold)) print np.argmax(pred, axis=2) - 1 #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") if __name__ == "__main__": argprun(run, model="lex")
testgold = getdatamatrix(test, maxlen, 2).astype("int32") testmask = (testdata > 0).astype("float32") res = atiseval(testgold-1, testgold-1, label2idxrev); print res#; exit() # define model innerdim = [innerdim] * depth m = SimpleSeqTransDec(indim=numwords, inpembdim=wordembdim, outembdim=lablembdim, innerdim=innerdim, outdim=numlabels) # training m = m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(5.0).seq_cross_entropy().l2(wreg)\ .split_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(validinter).takebest()\ .train(numbats, epochs) # predict after training s = SeqTransDecSearch(m) testpred, _ = s.decode(testdata) testpred = testpred * testmask evalres = atiseval(testpred-1, testgold-1, label2idxrev); print evalres #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask) #testpred = np.argmax(testpredprobs, axis=2)-1 #testpred = testpred * testmask #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred) if __name__ == "__main__": argprun(run, epochs=10)
def runstuff(modelname, griddict, scriptname): for i in range(reduce(lambda x, y: x * y, map(len, griddict.values()))): indexes = OrderedDict() for k, v in griddict.items(): indexes[k] = i % len(v) i //= len(v) #print indexes options = "".join([ "-{} {} ".format(x, griddict[x][indexes[x]]) for x in griddict.keys() ]) cmd = """python {} -loadmodel {} {}"""\ .format(scriptname, modelname, options ) cmd = re.sub("\n", "", cmd) cmd = re.sub("\s{2,}", " ", cmd) print cmd targetname = "alleval/{}.out".format(re.sub("\s", "_", cmd)) os.system("echo {} > {}".format(cmd, targetname)) os.system("{} >> {} 2>&1".format(cmd, targetname)) if __name__ == "__main__": argprun(main)
####data = np.eye(numchars, numchars)[data, :] block = seq2idx(invocsize=numchars, outvocsize=numwords, innerdim=statedim) '''gru = GRU(innerdim=statedim, dim=numchars) lin = Lin(indim=statedim, dim=numwords) lin2 = Lin(indim=numwords, dim=numwords) block = asblock(lambda x: Softmax()(lin(gru(x)[:, -1, :])))''' ###block = asblock(lambda x: Softmax()(lin2(x))) ''' print testpred probepred = np.argmax(block.predict(testpred), axis=1) print probepred for p in block.output.allparams: print p ''' block.train([data], wordidxs).cross_entropy().adagrad(lr=lr).autovalidate().accuracy().validinter(5)\ .train(numbats=numbats, epochs=epochs) #embed() pred = block.predict(testpred) print pred.shape print np.argmax(pred, axis=1) #''' if __name__ == "__main__": argprun(run_seqdecatt, epochs=50) #print ints2words(np.asarray([[20,8,5,0,0,0], [1,2,3,0,0,0]]))
print np.argmax(pred, axis=1)-1 print np.vectorize(lambda x: reventdic[x] if x in reventdic else None)(np.argmax(pred, axis=1)-1) tt.tock("predicted sample") tt.tick("training") m.train([lexdata[1:151]], entids[1:151]).adagrad(lr=lr).cross_entropy().grad_total_norm(0.5)\ .split_validate(5, random=True).validinter(validinter).accuracy()\ .train(numbats, epochs) else: #embed() tt.tick("predicting") print traindata[:5].shape, outdata[:5].shape pred = m.predict(traindata[:5], outdata[:5]) print np.argmax(pred, axis=2)-1 print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2)-1) tt.tock("predicted sample") tt.tick("training") m.train([traindata, outdata], golddata).adagrad(lr=lr).grad_total_norm(gradnorm).seq_cross_entropy()\ .split_validate(splits=5, random=False).validinter(validinter).seq_accuracy().seq_cross_entropy()\ .train(numbats, epochs) #embed() tt.tock("trained").tick("predicting") pred = m.predict(traindata[:50], outdata[:50]) print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2)-1) tt.tock("predicted sample") if __name__ == "__main__": argprun(run, model="mem att")
#wenc = WordEncoderPlusGlove(numchars=numchars, numwords=vocnumwords, encdim=wordencdim, embdim=wordembdim) tt.tock("model defined") # embed() outdata = shiftdata(golddata) tt.tick("predicting") print traindata[:5].shape, outdata[:5].shape #print golddata[:5] ; exit() pred = m.predict(traindata[:5], outdata[:5]) print np.argmax(pred, axis=2) - 1 print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") tt.tick("training") m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \ .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \ .train(numbats, epochs) # embed() tt.tock("trained").tick("predicting") pred = m.predict(validdata, shiftdata(validgold)) print np.argmax(pred, axis=2) - 1 #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") if __name__ == "__main__": argprun(run, model="att lex")
debugarg = "subj" if subjpred else "pred" if predpred else False evalres = eval.eval(pred, testgold, debug=debugarg) for k, evalre in evalres.items(): print("{}:\t{}".format(k, evalre)) tt.tock("evaluated") # save basename = os.path.splitext(os.path.basename(__file__))[0] dirname = basename + ".results" if not os.path.exists(dirname): os.makedirs(dirname) savenamegen = lambda i: "{}/{}.res".format(dirname, i) savename = None for i in xrange(100): savename = savenamegen(i) if not os.path.exists(savename): break savename = None if savename is None: raise Exception("exceeded number of saved results") with open(savename, "w") as f: f.write("{}\n".format(" ".join(sys.argv))) for k, evalre in evalres.items(): f.write("{}:\t{}\n".format(k, evalre)) #scorer.save(filepath=savename) if __name__ == "__main__": argprun(run, debug=True)
####data = np.eye(numchars, numchars)[data, :] block = seq2idx(invocsize=numchars, outvocsize=numwords, innerdim=statedim) '''gru = GRU(innerdim=statedim, dim=numchars) lin = Lin(indim=statedim, dim=numwords) lin2 = Lin(indim=numwords, dim=numwords) block = asblock(lambda x: Softmax()(lin(gru(x)[:, -1, :])))''' ###block = asblock(lambda x: Softmax()(lin2(x))) ''' print testpred probepred = np.argmax(block.predict(testpred), axis=1) print probepred for p in block.output.allparams: print p ''' block.train([data], wordidxs).cross_entropy().adagrad(lr=lr).autovalidate().accuracy().validinter(5)\ .train(numbats=numbats, epochs=epochs) #embed() pred = block.predict(testpred) print pred.shape print np.argmax(pred, axis=1) #''' if __name__ == "__main__": argprun(run_RNNAutoEncoder) #print ints2words(np.asarray([[20,8,5,0,0,0], [1,2,3,0,0,0]]))
# define model innerdim = [innerdim] * depth m = SimpleSeqTransDec(indim=numwords, inpembdim=wordembdim, outembdim=lablembdim, innerdim=innerdim, outdim=numlabels) # training m = m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(5.0).seq_cross_entropy().l2(wreg)\ .split_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(validinter).takebest()\ .train(numbats, epochs) # predict after training s = GreedySearch(m, startsymbol=0) testpred, _ = s.decode(testdata) testpred = testpred * testmask evalres = atiseval(testpred - 1, testgold - 1, label2idxrev) print evalres #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask) #testpred = np.argmax(testpredprobs, axis=2)-1 #testpred = testpred * testmask #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred) if __name__ == "__main__": argprun(run, epochs=10)
reventdic = {} for k, v in entdic.items(): reventdic[v] = k # embed() outdata = shiftdata(golddata) tt.tick("predicting") print traindata[:5].shape, outdata[:5].shape #print golddata[:5] ; exit() pred = m.predict(traindata[:5], outdata[:5]) print np.argmax(pred, axis=2) - 1 print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") tt.tick("training") m.train([traindata, outdata], golddata).adagrad(lr=lr).l2(wreg).grad_total_norm(gradnorm).seq_cross_entropy() \ .validate_on([validdata, shiftdata(validgold)], validgold).validinter(validinter).seq_accuracy().seq_cross_entropy() \ .train(numbats, epochs) # embed() tt.tock("trained").tick("predicting") pred = m.predict(validdata, shiftdata(validgold)) print np.argmax(pred, axis=2) - 1 #print np.vectorize(lambda x: reventdic[x])(np.argmax(pred, axis=2) - 1) tt.tock("predicted sample") if __name__ == "__main__": argprun(run, model="lex")
res = atiseval(testgold-1, testgold-1, label2idxrev); print res#; exit() print asizeof(traindata) # define model innerdim = [innerdim] * depth m = SimpleSeqTransDec(indim=numwords, inpembdim=wordembdim, outembdim=lablembdim, innerdim=innerdim, outdim=numlabels) # training m = m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(5.0).seq_cross_entropy().l2(wreg)\ .cross_validate(splits=5, random=True).seq_cross_entropy().seq_accuracy().validinter(validinter).takebest()\ .train(numbats, epochs) # predict after training s = SeqTransDecSearch(m) testpred = s.decode(testdata) testpred = testpred * testmask evalres = atiseval(testpred-1, testgold-1, label2idxrev); print evalres #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask) #testpred = np.argmax(testpredprobs, axis=2)-1 #testpred = testpred * testmask #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred) if __name__ == "__main__": argprun(run, epochs=0)
debugarg = "subj" if subjpred else "pred" if predpred else False evalres = eval.eval(pred, testgold, debug=debugarg) for k, evalre in evalres.items(): print("{}:\t{}".format(k, evalre)) tt.tock("evaluated") # save basename = os.path.splitext(os.path.basename(__file__))[0] dirname = basename + ".results" if not os.path.exists(dirname): os.makedirs(dirname) savenamegen = lambda i: "{}/{}.res".format(dirname, i) savename = None for i in xrange(1000): savename = savenamegen(i) if not os.path.exists(savename): break savename = None if savename is None: raise Exception("exceeded number of saved results") with open(savename, "w") as f: f.write("{}\n".format(" ".join(sys.argv))) for k, evalre in evalres.items(): f.write("{}:\t{}\n".format(k, evalre)) #scorer.save(filepath=savename) if __name__ == "__main__": argprun(run, debug=True)
memaddr = DotMemAddr elif memaddr == "lin": memaddr = LinearGateMemAddr dec = MemVec2Idx(memenc, memdata, memdim=innerdim, memaddr=memaddr, memattdim=memattdim) else: dec = SimpleVec2Idx(indim=innerdim, outdim=numrels) m = Seq2Idx(enc, dec) m = ( m.train([traindata], traingold) .adagrad(lr=lr) .l2(wreg) .grad_total_norm(1.0) .cross_entropy() .validate_on([validdata], validgold) .accuracy() .cross_entropy() .takebest() .train(numbats=numbats, epochs=epochs) ) pred = m.predict(testdata) print pred.shape evalres = evaluate(np.argmax(pred, axis=1), testgold) print str(evalres) + "%" if __name__ == "__main__": argprun(run)
innerdim = [innerdim] * depth m = SimpleSeqEncDecAtt(inpvocsize=numwords, inpembdim=wordembdim, outvocsize=numlabels, outembdim=lablembdim, encdim=innerdim, decdim=innerdim, attdim=attdim, inconcat=False) # training m.train([traindata, shiftdata(traingold), trainmask], traingold).adagrad(lr=lr).grad_total_norm(1.).seq_cross_entropy().l2(wreg)\ .validate_on([testdata, shiftdata(testgold), testmask], testgold).seq_cross_entropy().seq_accuracy().validinter(validinter)\ .train(numbats, epochs) # predict after training s = SeqEncDecAttSearch(m) testpred = s.decode(testdata) testpred = testpred * testmask #testpredprobs = m.predict(testdata, shiftdata(testgold), testmask) #testpred = np.argmax(testpredprobs, axis=2)-1 #testpred = testpred * testmask #print np.vectorize(lambda x: label2idxrev[x] if x > -1 else " ")(testpred) evalres = atiseval(testpred - 1, testgold - 1, label2idxrev) print evalres if __name__ == "__main__": argprun(run, epochs=1)
break datamat = np.zeros((c, maxlen)).astype("int32") - 1 goldmat = np.zeros((c, 2)).astype("int32") i = 0 for x in data: datamat[i, :len(x)] = x i += 1 i = 0 for x in gold: goldmat[i, :] = x i += 1 # making chardic and transforming through chardic thischardic = dict(map(lambda (x, y): (ord(x), y), chardic.items())) nextid = 0 while nextid in thischardic.values(): nextid += 1 uniquechars = np.unique(datamat) for uniquechar in list(uniquechars): if not uniquechar in thischardic and uniquechar >= 0: thischardic[uniquechar] = nextid while nextid in thischardic.values(): nextid += 1 chardic.update(dict(map(lambda (x, y): (chr(x), y), thischardic.items()))) print len(chardic), chardic datamat = np.vectorize(lambda x: thischardic[x] if x >= 0 else x)(datamat) return datamat, goldmat if __name__ == "__main__": argprun(run)
wordidxsonehot = np.eye(numwords, numwords)[wordidxs, :] ####data = np.eye(numchars, numchars)[data, :] block = seq2idx(invocsize=numchars, outvocsize=numwords, innerdim=statedim) '''gru = GRU(innerdim=statedim, dim=numchars) lin = Lin(indim=statedim, dim=numwords) lin2 = Lin(indim=numwords, dim=numwords) block = asblock(lambda x: Softmax()(lin(gru(x)[:, -1, :])))''' ###block = asblock(lambda x: Softmax()(lin2(x))) ''' print testpred probepred = np.argmax(block.predict(testpred), axis=1) print probepred for p in block.output.allparams: print p ''' block.train([data], wordidxs).cross_entropy().adagrad(lr=lr).autovalidate().accuracy().validinter(5)\ .train(numbats=numbats, epochs=epochs) #embed() pred = block.predict(testpred) print pred.shape print np.argmax(pred, axis=1) #''' if __name__ == "__main__": argprun(run_seqdecatt, epochs=50) #print ints2words(np.asarray([[20,8,5,0,0,0], [1,2,3,0,0,0]]))