Esempio n. 1
0
def test(netFile,dataSet,trees=None):
    if trees==None:
        trees = tr.printtree(dataSet)
    assert netFile is not None, "Must give model to test"
    print "Testing netFile %s"%netFile
    with open(netFile,'r') as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)
        
        nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch)        
        nn.initParams()
        nn.fromFile(fid)

    cost, Mis = nn.costAndGrad(trees,test=True)
    
    print "Cost = %f, Acc = %f"%(cost, 1.0 - Mis)
    return (1.0 - Mis)
Esempio n. 2
0
#Print a sentence.
prnt(train[0].root)
nltktree = Tree.fromstring(treeTxt)
nltktree.pretty_print()

###############################
# Create a toy model for testing.
###############################
numW = len(treeM.loadWordMap())

wvecDim = 10
outputDim = 5

rnn = RNN(wvecDim, outputDim, numW, mbSize = 4)
rnn.initParams()

rnn.L, rnn.W, rnn.b, rnn.Ws, rnn.bs = rnn.stack

# Zero gradients
rnn.dW[:] = 0
rnn.db[:] = 0
rnn.dWs[:] = 0
rnn.dbs[:] = 0
rnn.dL = collections.defaultdict(rnn.defaultVec)

ost = 0.0
correct = []
guess = []
total = 0.0
Esempio n. 3
0
def run():
    print "Loading data..."
    model = "RNN"
    trees = tr.loadTrees('train')
    dev_trees = tr.loadTrees('dev')
    wvecDimList = [5, 15, 25, 35, 45]
    #wvecDimList = [10,20,40]
    accuracy_per_wvecDim = []
    epochs = 100
    outFileText = "./param/%s/%s_cost_and_acc" % (model, model)
    f = open(outFileText, 'w')
    for wvecDim in wvecDimList:
        nn = RNN(wvecDim, 5, len(tr.loadWordMap()), 30)
        nn.initParams()
        sgd = optimizer.SGD(nn, alpha=0.01, minibatch=30, optimizer="adagrad")
        outFile = "./param/%s/%s_wvecDim_%d_epochs_%d_step_001.bin" % (
            model, model, wvecDim, epochs)

        train_cost = []
        train_acc = []
        dev_cost = []
        dev_acc = []
        cost = 0
        accuracy = 0
        for e in range(epochs):
            start = time.time()
            sgd.run(trees)
            end = time.time()
            print "Time per epoch : %f" % (end - start)
            with open(outFile, 'w') as fid:
                hyperparam = {}
                hyperparam['alpha'] = 0.01
                hyperparam['minibatch'] = 30
                hyperparam['wvecDim'] = wvecDim
                pickle.dump(hyperparam, fid)
                nn.toFile(fid)

            cost, accuracy = test(nn, trees)
            train_cost.append(cost)
            train_acc.append(accuracy)

            cost, accuracy = test(nn, dev_trees)
            dev_cost.append(cost)
            dev_acc.append(accuracy)

            for tree in trees:
                tr.leftTraverse(tree.root, nodeFn=tr.clearFprop)
            for tree in dev_trees:
                tr.leftTraverse(tree.root, nodeFn=tr.clearFprop)
            print "fprop in trees cleared"

        plot_cost_acc(
            train_cost, dev_cost,
            "./figures/%s/%s_Cost_Figure_%d" % (model, model, wvecDim), epochs)
        plot_cost_acc(
            train_acc, dev_acc,
            "./figures/%s/%s_Accuracy_Figure_%d" % (model, model, wvecDim),
            epochs)

        anwser = "Cost = %f, Acc= %f" % (cost, accuracy)
        f.write(anwser)
        accuracy_per_wvecDim.append(accuracy)

    f.close()
    plt.figure(figsize=(6, 4))
    plt.title(r"Accuracies and vector Dimension")
    plt.xlabel("vector Dimension")
    plt.ylabel(r"Accuracy")
    plt.ylim(ymin=min(accuracy_per_wvecDim) * 0.8,
             ymax=max(accuracy_per_wvecDim) * 1.2)
    plt.plot(wvecDimList,
             accuracy_per_wvecDim,
             color='b',
             marker='o',
             linestyle='-')
    plt.savefig("./figures/%s/%s_Accuracy_and_vectorDimsension.png" %
                (model, model))
    plt.close()
Esempio n. 4
0
def run(args=None):
    usage = "usage : %prog [options]"
    parser = optparse.OptionParser(usage=usage)

    parser.add_option("--test",action="store_true",dest="test",default=False)

    # Optimizer
    parser.add_option("--minibatch",dest="minibatch",type="int",default=30)
    parser.add_option("--optimizer",dest="optimizer",type="string",
        default="adagrad")
    parser.add_option("--epochs",dest="epochs",type="int",default=50)
    parser.add_option("--step",dest="step",type="float",default=1e-2)

    parser.add_option("--wvecDim",dest="wvecDim",type="int",default=30)
    parser.add_option("--outputDim",dest="outputDim",type="int",default=2)
    parser.add_option("--alpha",dest="alpha",type="int",default=0.2)
    
    parser.add_option("--outFile",dest="outFile",type="string",
        default="models/test.bin")
    parser.add_option("--inFile",dest="inFile",type="string",
        default="models/test.bin")
    parser.add_option("--data",dest="data",type="string",default="brae.pos")
    parser.add_option("--dev",dest="dev",type="string",default="brae.dev")
    parser.add_option("--wordMap",dest="map",type="string",default="brae.tot")

    (opts,args)=parser.parse_args(args)


    # make this false if you dont care about your accuracies per epoch, makes things faster!
    evaluate_accuracy_while_training = True

    # Testing
    if opts.test:
        test(opts.inFile,opts.data)
        return
    
    print "Loading data..."
    train_accuracies = []
    dev_accuracies = []
    
    trees = tr.printtree(opts.data)
    opts.numWords = len(tr.loadWordMap(opts.map))

    nn = RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.alpha,opts.minibatch)    
    nn.initParams()
    sgd = optimizer.SGD(nn,alpha=opts.step,minibatch=opts.minibatch,
        optimizer=opts.optimizer)


    dev_trees = tr.printtree(opts.dev)
    for e in range(opts.epochs):
        start = time.time()
        print "Running epoch %d"%e
        sgd.run(trees)
        end = time.time()
        print "Time per epoch : %f"%(end-start)

        with open(opts.outFile,'w') as fid:
            pickle.dump(opts,fid)
            pickle.dump(sgd.costt,fid)
            nn.toFile(fid)
        if evaluate_accuracy_while_training:
            print "testing on training set real quick"
            train_accuracies.append(test(opts.outFile,opts.data,trees))
            print "testing on dev set real quick"
            dev_accuracies.append(test(opts.outFile,opts.dev,dev_trees))


    if evaluate_accuracy_while_training:
        pdb.set_trace()
        print train_accuracies
        print dev_accuracies