Beispiel #1
0
def test(netFile,dataSet,trees=None):
    if trees==None:
        trees = tr.printtree(dataSet)
    assert netFile is not None, "Must give model to test"
    print "Testing netFile %s"%netFile
    with open(netFile,'r') as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)
        
        nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch)        
        nn.initParams()
        nn.fromFile(fid)

    cost, Mis = nn.costAndGrad(trees,test=True)
    
    print "Cost = %f, Acc = %f"%(cost, 1.0 - Mis)
    return (1.0 - Mis)
Beispiel #2
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    parser.add_option("--test",action="store_true",dest="test",default=False)

    # Optimizer
    parser.add_option("--minibatch",dest="minibatch",type="int",default=30)
    parser.add_option("--optimizer",dest="optimizer",type="string",
        default="adagrad")
    parser.add_option("--epochs",dest="epochs",type="int",default=50)
    parser.add_option("--step",dest="step",type="float",default=1e-2)

    parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30)
    parser.add_option("--outputDim",dest="outputDim",type="int",default=2)
    parser.add_option("--alpha",dest="alpha",type="int",default=0.2)
    
    parser.add_option("--outFile",dest="outFile",type="string",
        default="models/test.bin")
    parser.add_option("--inFile",dest="inFile",type="string",
        default="models/test.bin")
    parser.add_option("--data",dest="data",type="string",default="brae.pos")
    parser.add_option("--dev",dest="dev",type="string",default="brae.dev")
    parser.add_option("--wordMap",dest="map",type="string",default="brae.tot")

    (opts,args)=parser.parse_args(args)


    # make this false if you dont care about your accuracies per epoch, makes things faster!
    evaluate_accuracy_while_training = True

    # Testing
    if opts.test:
        test(opts.inFile,opts.data)
        return
    
    print "Loading data..."
    train_accuracies = []
    dev_accuracies = []
    
    trees = tr.printtree(opts.data)
    opts.numWords = len(tr.loadWordMap(opts.map))

    nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch)    
    nn.initParams()
    sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch,
        optimizer=opts.optimizer)


    dev_trees = tr.printtree(opts.dev)
    for e in range(opts.epochs):
        start = time.time()
        print "Running epoch %d"%e
        sgd.run(trees)
        end = time.time()
        print "Time per epoch : %f"%(end-start)

        with open(opts.outFile,'w') as fid:
            pickle.dump(opts,fid)
            pickle.dump(sgd.costt,fid)
            nn.toFile(fid)
        if evaluate_accuracy_while_training:
            print "testing on training set real quick"
            train_accuracies.append(test(opts.outFile,opts.data,trees))
            print "testing on dev set real quick"
            dev_accuracies.append(test(opts.outFile,opts.dev,dev_trees))


    if evaluate_accuracy_while_training:
        pdb.set_trace()
        print train_accuracies
        print dev_accuracies