def test(netFile,dataSet,trees=None): if trees==None: trees = tr.printtree(dataSet) assert netFile is not None, "Must give model to test" print "Testing netFile %s"%netFile with open(netFile,'r') as fid: opts = pickle.load(fid) _ = pickle.load(fid) nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch) nn.initParams() nn.fromFile(fid) cost, Mis = nn.costAndGrad(trees,test=True) print "Cost = %f, Acc = %f"%(cost, 1.0 - Mis) return (1.0 - Mis)
#Print a sentence. prnt(train[0].root) nltktree = Tree.fromstring(treeTxt) nltktree.pretty_print() ############################### # Create a toy model for testing. ############################### numW = len(treeM.loadWordMap()) wvecDim = 10 outputDim = 5 rnn = RNN(wvecDim, outputDim, numW, mbSize = 4) rnn.initParams() rnn.L, rnn.W, rnn.b, rnn.Ws, rnn.bs = rnn.stack # Zero gradients rnn.dW[:] = 0 rnn.db[:] = 0 rnn.dWs[:] = 0 rnn.dbs[:] = 0 rnn.dL = collections.defaultdict(rnn.defaultVec) ost = 0.0 correct = [] guess = [] total = 0.0
def run(): print "Loading data..." model = "RNN" trees = tr.loadTrees('train') dev_trees = tr.loadTrees('dev') wvecDimList = [5, 15, 25, 35, 45] #wvecDimList = [10,20,40] accuracy_per_wvecDim = [] epochs = 100 outFileText = "./param/%s/%s_cost_and_acc" % (model, model) f = open(outFileText, 'w') for wvecDim in wvecDimList: nn = RNN(wvecDim, 5, len(tr.loadWordMap()), 30) nn.initParams() sgd = optimizer.SGD(nn, alpha=0.01, minibatch=30, optimizer="adagrad") outFile = "./param/%s/%s_wvecDim_%d_epochs_%d_step_001.bin" % ( model, model, wvecDim, epochs) train_cost = [] train_acc = [] dev_cost = [] dev_acc = [] cost = 0 accuracy = 0 for e in range(epochs): start = time.time() sgd.run(trees) end = time.time() print "Time per epoch : %f" % (end - start) with open(outFile, 'w') as fid: hyperparam = {} hyperparam['alpha'] = 0.01 hyperparam['minibatch'] = 30 hyperparam['wvecDim'] = wvecDim pickle.dump(hyperparam, fid) nn.toFile(fid) cost, accuracy = test(nn, trees) train_cost.append(cost) train_acc.append(accuracy) cost, accuracy = test(nn, dev_trees) dev_cost.append(cost) dev_acc.append(accuracy) for tree in trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) for tree in dev_trees: tr.leftTraverse(tree.root, nodeFn=tr.clearFprop) print "fprop in trees cleared" plot_cost_acc( train_cost, dev_cost, "./figures/%s/%s_Cost_Figure_%d" % (model, model, wvecDim), epochs) plot_cost_acc( train_acc, dev_acc, "./figures/%s/%s_Accuracy_Figure_%d" % (model, model, wvecDim), epochs) anwser = "Cost = %f, Acc= %f" % (cost, accuracy) f.write(anwser) accuracy_per_wvecDim.append(accuracy) f.close() plt.figure(figsize=(6, 4)) plt.title(r"Accuracies and vector Dimension") plt.xlabel("vector Dimension") plt.ylabel(r"Accuracy") plt.ylim(ymin=min(accuracy_per_wvecDim) * 0.8, ymax=max(accuracy_per_wvecDim) * 1.2) plt.plot(wvecDimList, accuracy_per_wvecDim, color='b', marker='o', linestyle='-') plt.savefig("./figures/%s/%s_Accuracy_and_vectorDimsension.png" % (model, model)) plt.close()
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test",action="store_true",dest="test",default=False) # Optimizer parser.add_option("--minibatch",dest="minibatch",type="int",default=30) parser.add_option("--optimizer",dest="optimizer",type="string", default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--step",dest="step",type="float",default=1e-2) parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30) parser.add_option("--outputDim",dest="outputDim",type="int",default=2) parser.add_option("--alpha",dest="alpha",type="int",default=0.2) parser.add_option("--outFile",dest="outFile",type="string", default="models/test.bin") parser.add_option("--inFile",dest="inFile",type="string", default="models/test.bin") parser.add_option("--data",dest="data",type="string",default="brae.pos") parser.add_option("--dev",dest="dev",type="string",default="brae.dev") parser.add_option("--wordMap",dest="map",type="string",default="brae.tot") (opts,args)=parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: test(opts.inFile,opts.data) return print "Loading data..." train_accuracies = [] dev_accuracies = [] trees = tr.printtree(opts.data) opts.numWords = len(tr.loadWordMap(opts.map)) nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch) nn.initParams() sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.printtree(opts.dev) for e in range(opts.epochs): start = time.time() print "Running epoch %d"%e sgd.run(trees) end = time.time() print "Time per epoch : %f"%(end-start) with open(opts.outFile,'w') as fid: pickle.dump(opts,fid) pickle.dump(sgd.costt,fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set real quick" train_accuracies.append(test(opts.outFile,opts.data,trees)) print "testing on dev set real quick" dev_accuracies.append(test(opts.outFile,opts.dev,dev_trees)) if evaluate_accuracy_while_training: pdb.set_trace() print train_accuracies print dev_accuracies