def test(netFile,dataSet,trees=None): if trees==None: trees = tr.printtree(dataSet) assert netFile is not None, "Must give model to test" print "Testing netFile %s"%netFile with open(netFile,'r') as fid: opts = pickle.load(fid) _ = pickle.load(fid) nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch) nn.initParams() nn.fromFile(fid) cost, Mis = nn.costAndGrad(trees,test=True) print "Cost = %f, Acc = %f"%(cost, 1.0 - Mis) return (1.0 - Mis)
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage=usage) parser.add_option("--test",action="store_true",dest="test",default=False) # Optimizer parser.add_option("--minibatch",dest="minibatch",type="int",default=30) parser.add_option("--optimizer",dest="optimizer",type="string", default="adagrad") parser.add_option("--epochs",dest="epochs",type="int",default=50) parser.add_option("--step",dest="step",type="float",default=1e-2) parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30) parser.add_option("--outputDim",dest="outputDim",type="int",default=2) parser.add_option("--alpha",dest="alpha",type="int",default=0.2) parser.add_option("--outFile",dest="outFile",type="string", default="models/test.bin") parser.add_option("--inFile",dest="inFile",type="string", default="models/test.bin") parser.add_option("--data",dest="data",type="string",default="brae.pos") parser.add_option("--dev",dest="dev",type="string",default="brae.dev") parser.add_option("--wordMap",dest="map",type="string",default="brae.tot") (opts,args)=parser.parse_args(args) # make this false if you dont care about your accuracies per epoch, makes things faster! evaluate_accuracy_while_training = True # Testing if opts.test: test(opts.inFile,opts.data) return print "Loading data..." train_accuracies = [] dev_accuracies = [] trees = tr.printtree(opts.data) opts.numWords = len(tr.loadWordMap(opts.map)) nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch) nn.initParams() sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch, optimizer=opts.optimizer) dev_trees = tr.printtree(opts.dev) for e in range(opts.epochs): start = time.time() print "Running epoch %d"%e sgd.run(trees) end = time.time() print "Time per epoch : %f"%(end-start) with open(opts.outFile,'w') as fid: pickle.dump(opts,fid) pickle.dump(sgd.costt,fid) nn.toFile(fid) if evaluate_accuracy_while_training: print "testing on training set real quick" train_accuracies.append(test(opts.outFile,opts.data,trees)) print "testing on dev set real quick" dev_accuracies.append(test(opts.outFile,opts.dev,dev_trees)) if evaluate_accuracy_while_training: pdb.set_trace() print train_accuracies print dev_accuracies