Beispiel #1
0
def test(netFile, data, dataset):
    # trees = tr.loadTrees(dataSet)
    trees, vocab = tr.loadTrees(dataset, data)
    assert netFile is not None, "Must give model to test"
    with open(netFile, "r") as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)
        # rnn = nnet.RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch)

        print "Reading word vectors..."
        x = pickle.load(open("mr_%s.p" % dataset, "rb"))
        W = x[0]
        W2 = 0.01 * np.random.randn(opts.wvecDim, opts.numWords)
        rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
        rnn.initParams(W)
        rnn.fromFile(fid)
    print "Testing..."
    cost, correct, total = rnn.costAndGrad(trees, test=True)
    print "Cost %f, Correct %d/%d, Acc %f" % (cost, correct, total, correct / float(total))
Beispiel #2
0
def start(opts):
    print "Loading data..."
    # load training data
    trees, vocab = tr.loadTrees(opts.dataset, opts.data)  # sick, train_parsed
    opts.numWords = len(tr.loadWordMap(opts.dataset))

    print "Loading word2vec vectors..."
    # Load pre-built word matrix using cPickle
    # w2v_file = "/Users/pentiumx/Projects/word2vec/GoogleNews-vectors-negative300.bin"
    # word_vecs = process_data.load_bin_vec(w2v_file, vocab)
    # revs, W, W2, word_idx_map, vocab = x[0], x[1], x[2], x[3], x[4]

    x = pickle.load(open("mr_%s.p" % opts.dataset, "rb"))
    W = x[0]
    W2 = 0.01 * np.random.randn(opts.wvecDim, opts.numWords)

    # rnn = nnet.RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch)
    # embeddingDim=200 for now

    if opts.use_denotation == 0:
        rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
        rnn.initParams(W)  # Use W2 for experiments with randomly initialized vectors
        sgd = optimizer.SGD(rnn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer)
    else:
        """with open('models/denotation_sample.bin','r') as fid:
            _ = pickle.load(fid)# skip opts data
            __ = pickle.load(fid)

            x = pickle.load(open("mr_%s.p" % opts.dg_dataset, "rb"))
            W_dg = x[0]
            rnn = nnet_rte.RNNRTE(opts.wvecDim,opts.outputDim,200,opts.numWords,opts.minibatch)
            rnn.initParams(W, W_dg)
            rnn.from_file_denotation(fid)

            sgd = optimizer.SGD(rnn,alpha=opts.step,minibatch=opts.minibatch,
                optimizer=opts.optimizer)"""
        rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
        rnn.initParams(W, True)

        x = pickle.load(open("mr_%s.p" % opts.dg_dataset, "rb"))
        W_dg = x[0]
        rnn_dg = nnet_rte.RNNRTE(opts.wvecDim, 2, 200, opts.numWords, opts.minibatch)
        rnn_dg.initParams(W_dg, True)

        sgd = optimizer.SGD(rnn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer, model_dg=rnn_dg)

    for e in range(opts.epochs):
        start = time.time()
        print "Running epoch %d" % e
        if opts.use_denotation == 0:
            sgd.run(trees)
        else:
            lines = tr.get_lines(opts.dg_dataset, opts.data)
            sgd.run_using_denotation(trees, lines)
        end = time.time()
        print "Time per epoch : %f" % (end - start)

        with open(opts.outFile, "w") as fid:
            pickle.dump(opts, fid)
            pickle.dump(sgd.costt, fid)

            # debug
            if e == opts.epochs - 1:
                rnn.toFile(fid, True)
            else:
                rnn.toFile(fid)
        dL = grad[0]
        L = self.stack[0]
        for j in dL.iterkeys():
            for i in xrange(L.shape[0]):
                L[i,j] += epsilon
                costP,_ = self.costAndGrad(data)
                L[i,j] -= epsilon
                numGrad = (costP - cost)/epsilon
                err = np.abs(dL[j][i] - numGrad)
                print "Analytic %.9f, Numerical %.9f, Relative Error %.9f"%(dL[j][i],numGrad,err)
        """

if __name__ == '__main__':

    import tree_rte as treeM
    train, vocab = treeM.loadTrees()
    numW = len(treeM.loadWordMap('sick'))

    wvecDim = 10
    outputDim = 5
    print numW

    x = pickle.load(open("mr.p","rb"))
    W = x[0]
    rnn = RNNRTE(wvecDim,outputDim,200,numW,mbSize=4)
    rnn.initParams(W)

    mbData = train[:4]

    print "Numerical gradient check..."
    rnn.check_grad(mbData)