def make_functions(
        input_size, output_size, mem_size, mem_width, hidden_sizes=[100]):

    start_time = time.time()

    input_seqs  = T.btensor3('input_sequences')
    output_seqs = T.btensor3('output_sequences')

    P = Parameters()
    process = model.build(P,
            input_size, output_size, mem_size, mem_width, hidden_sizes[0])
    outputs = process(T.cast(input_seqs,'float32'))
    output_length = (input_seqs.shape[1] - 2) // 2

    Y = output_seqs[:,-output_length:,:-2]
    Y_hat = T.nnet.sigmoid(outputs[:,-output_length:,:-2])

    cross_entropy = T.mean(T.nnet.binary_crossentropy(Y_hat,Y))
    bits_loss = cross_entropy * (Y.shape[1] * Y.shape[2]) / T.log(2)

    params = P.values()

    cost = cross_entropy # + 1e-5 * sum(T.sum(T.sqr(w)) for w in params)

    print "Computing gradients",
    grads = T.grad(cost, wrt=params)
    grads = updates.clip_deltas(grads, np.float32(clip_length))

    print "Done. (%0.3f s)"%(time.time() - start_time)
    start_time = time.time()
    print "Compiling function",
    P_learn = Parameters()

    update_pairs = updates.rmsprop(
                params, grads,
                learning_rate=1e-4,
                P=P_learn
            )

    train = theano.function(
            inputs=[input_seqs, output_seqs],
            outputs=cross_entropy,
            updates=update_pairs,
        )

    test = theano.function(
            inputs=[input_seqs, output_seqs],
            outputs=bits_loss
        )

    print "Done. (%0.3f s)"%(time.time() - start_time)
    print P.parameter_count()
    return P, P_learn, train, test
Beispiel #2
0
def make_functions(input_size,
                   output_size,
                   mem_size,
                   mem_width,
                   hidden_sizes=[100]):

    start_time = time.time()

    input_seqs = T.btensor3('input_sequences')
    output_seqs = T.btensor3('output_sequences')

    P = Parameters()
    process = model.build(P, input_size, output_size, mem_size, mem_width,
                          hidden_sizes[0])
    outputs = process(T.cast(input_seqs, 'float32'))
    output_length = (input_seqs.shape[1] - 2) // 2

    Y = output_seqs[:, -output_length:, :-2]
    Y_hat = T.nnet.sigmoid(outputs[:, -output_length:, :-2])

    cross_entropy = T.mean(T.nnet.binary_crossentropy(Y_hat, Y))
    bits_loss = cross_entropy * (Y.shape[1] * Y.shape[2]) / T.log(2)

    params = P.values()

    cost = cross_entropy  # + 1e-5 * sum(T.sum(T.sqr(w)) for w in params)

    print "Computing gradients",
    grads = T.grad(cost, wrt=params)
    grads = updates.clip_deltas(grads, np.float32(clip_length))

    print "Done. (%0.3f s)" % (time.time() - start_time)
    start_time = time.time()
    print "Compiling function",
    P_learn = Parameters()

    update_pairs = updates.rmsprop(params,
                                   grads,
                                   learning_rate=1e-4,
                                   P=P_learn)

    train = theano.function(
        inputs=[input_seqs, output_seqs],
        outputs=cross_entropy,
        updates=update_pairs,
    )

    test = theano.function(inputs=[input_seqs, output_seqs], outputs=bits_loss)

    print "Done. (%0.3f s)" % (time.time() - start_time)
    print P.parameter_count()
    return P, P_learn, train, test
Beispiel #3
0
				word_rep_size = 128,
				stmt_hidden_size = 128,
				diag_hidden_size = 128,
				vocab_size  = vocab_size,
				output_size = vocab_size,
				map_fun_size = 128,
				evidence_count = evidence_count
				)

		output_evds,output_ans = attention(story,idxs,qstn)
		cross_entropy = -T.log(output_ans[ans_lbl]) \
				+ -T.log(output_evds[0][ans_evds[0]]) \
				+ -T.log(output_evds[1][ans_evds[1]]) 
		#cost += -T.log(ordered_probs(output_evds,ans_e.vds)) 
		print "Done."
		print "Parameter count:", P.parameter_count()

		print "Calculating gradient expression...",
		params = P.values()
		cost = cross_entropy
		grads = T.grad(cost,wrt=params)
		print "Done."

		inputs = [story,idxs,qstn,ans_lbl,ans_evds]
		outputs = cross_entropy
		pickle.dump(
				(inputs,outputs,params,grads),
				open("compute_tree.pkl","wb"),2
				)

		print "Compiling native...",
Beispiel #4
0
        attention = model.build(P,
                                word_rep_size=128,
                                stmt_hidden_size=128,
                                diag_hidden_size=128,
                                vocab_size=vocab_size,
                                output_size=vocab_size,
                                map_fun_size=128,
                                evidence_count=evidence_count)

        output_evds, output_ans = attention(story, idxs, qstn)
        cross_entropy = -T.log(output_ans[ans_lbl]) \
          + -T.log(output_evds[0][ans_evds[0]]) \
          + -T.log(output_evds[1][ans_evds[1]])
        #cost += -T.log(ordered_probs(output_evds,ans_e.vds))
        print "Done."
        print "Parameter count:", P.parameter_count()

        print "Calculating gradient expression...",
        params = P.values()
        cost = cross_entropy
        grads = T.grad(cost, wrt=params)
        print "Done."

        inputs = [story, idxs, qstn, ans_lbl, ans_evds]
        outputs = cross_entropy
        pickle.dump((inputs, outputs, params, grads),
                    open("compute_tree.pkl", "wb"), 2)

        print "Compiling native...",
    lr = T.fscalar('lr')
    acc, update = make_functions(inputs, outputs, params, grads, lr)