def make_train(input_size,output_size,mem_size,mem_width,hidden_sizes=[100]):
	P = Parameters()
	ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes)
	predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl)
	
	input_seq = T.matrix('input_sequence')
	output_seq = T.matrix('output_sequence')
	seqs = predict(input_seq)
	output_seq_pred = seqs[-1]
	cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1)
	params = P.values()
	l2 = T.sum(0)
	for p in params:
		l2 = l2 + (p ** 2).sum()
	cost = T.sum(cross_entropy) + 1e-4*l2
	grads  = [ T.clip(g,-10,10) for g in T.grad(cost,wrt=params) ]
	
	train = theano.function(
			inputs=[input_seq,output_seq],
			outputs=cost,
			# updates=updates.adadelta(params,grads)
			updates = updates.rmsprop(params,grads,learning_rate = 1e-5)
		)

	return P,train
Beispiel #2
0
def make_train(input_size,output_size,mem_size,mem_width,hidden_size=100):
	P = Parameters()

        # Build controller. ctrl is a network that takes an external and read input
        # and returns the output of the network and its hidden layer
	ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_size)

        # Build model that predicts output sequence given input sequence
	predict = model.build(P,mem_size,mem_width,hidden_size,ctrl)

	input_seq = T.matrix('input_sequence')
	output_seq = T.matrix('output_sequence')
        [M,weights,output_seq_pred] = predict(input_seq)

        # Setup for adadelta updates
	cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1)
	params = P.values()
	l2 = T.sum(0)
	for p in params:
		l2 = l2 + (p ** 2).sum()
	cost = T.sum(cross_entropy) + 1e-3*l2
        # clip gradients
	grads  = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]

	train = theano.function(
			inputs=[input_seq,output_seq],
			outputs=cost,
			updates=updates.adadelta(params,grads)
		)

	return P,train
Beispiel #3
0
def make_train(input_size,
               output_size,
               mem_size,
               mem_width,
               hidden_sizes=[100]):
    P = Parameters()
    ctrl = controller.build(P, input_size, output_size, mem_size, mem_width,
                            hidden_sizes)
    predict = model.build(P, mem_size, mem_width, hidden_sizes[-1], ctrl)

    input_seq = T.matrix('input_sequence')
    output_seq = T.matrix('output_sequence')
    seqs = predict(input_seq)
    output_seq_pred = seqs[-1]
    cross_entropy = T.sum(T.nnet.binary_crossentropy(
        5e-6 + (1 - 2 * 5e-6) * output_seq_pred, output_seq),
                          axis=1)
    cost = T.sum(cross_entropy)  # + 1e-3 * l2
    params = P.values()
    grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)]

    train = theano.function(inputs=[input_seq, output_seq],
                            outputs=T.sum(cross_entropy),
                            updates=updates.adadelta(params, grads))

    return P, train
Beispiel #4
0
def make_train(input_size, output_size, mem_size, mem_width, hidden_size=100):
    P = Parameters()

    # Build controller. ctrl is a network that takes an external and read input
    # and returns the output of the network and its hidden layer
    ctrl = controller.build(P, input_size, output_size, mem_size, mem_width,
                            hidden_size)

    # Build model that predicts output sequence given input sequence
    predict = model.build(P, mem_size, mem_width, hidden_size, ctrl)

    input_seq = T.matrix('input_sequence')
    output_seq = T.matrix('output_sequence')
    [M, weights, output_seq_pred] = predict(input_seq)

    # Setup for adadelta updates
    cross_entropy = T.sum(T.nnet.binary_crossentropy(
        5e-6 + (1 - 2 * 5e-6) * output_seq_pred, output_seq),
                          axis=1)
    params = P.values()
    l2 = T.sum(0)
    for p in params:
        l2 = l2 + (p**2).sum()
    cost = T.sum(cross_entropy) + 1e-3 * l2
    # clip gradients
    grads = [T.clip(g, -100, 100) for g in T.grad(cost, wrt=params)]

    train = theano.function(inputs=[input_seq, output_seq],
                            outputs=cost,
                            updates=updates.adadelta(params, grads))

    return P, train
Beispiel #5
0
def make_model(input_size=8,
               output_size=8,
               mem_size=128,
               mem_width=20,
               hidden_sizes=[100]):
    P = Parameters()
    ctrl = controller.build(P, input_size, output_size, mem_size, mem_width,
                            hidden_sizes)
    predict = model.build(P, mem_size, mem_width, hidden_sizes[-1], ctrl)
    input_seq = T.matrix('input_sequence')
    [M_curr, weights, output] = predict(input_seq)

    test_fun = theano.function(inputs=[input_seq], outputs=[weights, output])
    return P, test_fun
def make_model(
		input_size=8,
		output_size=8,
		mem_size=128,
		mem_width=20,
		hidden_sizes=[100]):
	P = Parameters()
	ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes)
	predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl)
	input_seq = T.matrix('input_sequence')
	[M_curr,weights,output] = predict(input_seq)

	test_fun = theano.function(
			inputs=[input_seq],
			outputs=[weights,output]
		)
	return P,test_fun
def build(P , input_size , mem_width , weighted_mem_width , output_size) :
    ctrl = controller.build(P, input_size, output_size, weighted_mem_width)
    predict = model.build(P, input_size, mem_width, weighted_mem_width, ctrl)

    def turing_updates(cost , lr) :
        params = P.values()
        #whether add P weight decay
        l2 = T.sum(0)
        for p in params:
            l2 = l2 + (p ** 2).sum()
        all_cost = cost + 1e-3 * l2 
        grads = [T.clip(g, -100, 100) for g in T.grad(all_cost, wrt=params)]
        return updates.rmsprop(params, grads, learning_rate=lr)
    
    def init_parameter(name , value) :
        P[name] = value #used by getvalue

    return turing_updates , predict
def build(P , mem_width , output_size) :
    ctrl = controller.build(P, mem_width, output_size, mem_width)
    predict = model.build(P, mem_width, ctrl)

    def turing_updates(cost , lr) :
        params = P.values()
        #whether add P weight decay
        l2 = T.sum(0)
        for p in params:
            l2 = l2 + (p ** 2).sum()
        all_cost = cost + 1e-3 * l2
        clipper = updates.clip(5.) 
        g = T.grad(all_cost, wrt=params)
        grads = clipper(g)
        return updates.momentum(params, grads, mu = 0, learning_rate=lr)
    
    def init_parameter(name , value) :
        P[name] = value #used by getvalue

    return turing_updates , predict
Beispiel #9
0
def make_model(input_size=8,
               output_size=8,
               mem_size=128,
               mem_width=20,
               hidden_size=100):
    """
	Given the model parameters, return a Theano function for the NTM's model
	"""

    P = Parameters()

    # Build the controller
    ctrl = controller.build(P, input_size, output_size, mem_size, mem_width,
                            hidden_size)
    predict = model.build(P, mem_size, mem_width, hidden_size, ctrl)
    input_seq = T.matrix('input_sequence')
    [M_curr, weights, output] = predict(input_seq)

    # Return a Theano function for the NTM
    test_fun = theano.function(inputs=[input_seq], outputs=[weights, output])
    return P, test_fun
def make_train(input_size,output_size,mem_size,mem_width,hidden_sizes=[100]):
	P = Parameters()
	ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_sizes)
	predict = model.build(P,mem_size,mem_width,hidden_sizes[-1],ctrl)
	
	input_seq = T.matrix('input_sequence')
	output_seq = T.matrix('output_sequence')
	seqs = predict(input_seq)
	output_seq_pred = seqs[-1]
	cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output_seq_pred,output_seq),axis=1)
	cost = T.sum(cross_entropy) # + 1e-3 * l2
	params = P.values()
	grads  = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]

	response_length = input_seq.shape[0]/2
	train = theano.function(
			inputs=[input_seq,output_seq],
			outputs=T.mean(cross_entropy[-response_length:]),
			updates=updates.adadelta(params,grads)
		)

	return P,train
Beispiel #11
0
def make_model(
		input_size=8,
		output_size=8,
		mem_size=128,
		mem_width=20,
		hidden_size=100):
	"""
	Given the model parameters, return a Theano function for the NTM's model
	"""

	P = Parameters()

	# Build the controller
	ctrl = controller.build(P,input_size,output_size,mem_size,mem_width,hidden_size)
	predict = model.build(P,mem_size,mem_width,hidden_size,ctrl)
	input_seq = T.matrix('input_sequence')
	[M_curr,weights,output] = predict(input_seq)

	# Return a Theano function for the NTM
	test_fun = theano.function(
			inputs=[input_seq],
			outputs=[weights,output]
		)
	return P,test_fun
Beispiel #12
0
Datei: ntm.py Projekt: c3h3/pyntm
    def __init__(self, 
                 input_size, output_size, mem_size, mem_width, hidden_sizes, num_heads,
                 max_epochs, momentum, learning_rate ,grad_clip, l2_norm):
        
        self.input_size = input_size
        self.output_size = output_size
        self.mem_size = mem_size
        self.mem_width = mem_width
        self.hidden_sizes = hidden_sizes
        self.num_heads = num_heads
        self.max_epochs = max_epochs
        self.momentum = momentum
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.l2_norm = l2_norm
        
        self.best_train_cost = np.inf
        self.best_valid_cost = np.inf
        #self.train = None
        #self.cost = None
        
        self.train_his = []
        
        P = Parameters()
        ctrl = controller.build( P, self.input_size, self.output_size, self.mem_size, self.mem_width, self.hidden_sizes)
        predict = model.build( P, self.mem_size, self.mem_width, self.hidden_sizes[-1], ctrl, self.num_heads)

        input_seq = T.matrix('input_sequence')
        output_seq = T.matrix('output_sequence')
        
        [M_curr,weights,output] = predict(input_seq)
        # output_seq_pred = seqs[-1]
        
        cross_entropy = T.sum(T.nnet.binary_crossentropy(5e-6 + (1 - 2*5e-6)*output, output_seq),axis=1)
        
        self.params = P.values()
        
        l2 = T.sum(0)
        for p in self.params:
            l2 = l2 + (p ** 2).sum()
            
        cost = T.sum(cross_entropy) + self.l2_norm * l2
    #     cost = T.sum(cross_entropy) + 1e-3*l2
        
        grads  = [ T.clip(g, grad_clip[0], grad_clip[1]) for g in T.grad(cost, wrt=self.params) ]
    #     grads  = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]
    #     grads  = [ T.clip(g,1e-9, 0.2) for g in T.grad(cost,wrt=params) ]

        self.train = theano.function(
                inputs=[input_seq,output_seq],
                outputs=cost,
    #             updates=updates.adadelta(params,grads)
                updates = updates.rmsprop(self.params, grads, momentum=self.momentum, learning_rate=self.learning_rate )
            )
        
        self.predict_cost = theano.function(
            inputs=[input_seq,output_seq],
            outputs= cost
        )
        
        self.predict = theano.function(
            inputs=[input_seq],
            outputs= [ weights, output]
        )
Beispiel #13
0
    def __init__(self, input_size, output_size, mem_size, mem_width,
                 hidden_sizes, num_heads, max_epochs, momentum, learning_rate,
                 grad_clip, l2_norm):

        self.input_size = input_size
        self.output_size = output_size
        self.mem_size = mem_size
        self.mem_width = mem_width
        self.hidden_sizes = hidden_sizes
        self.num_heads = num_heads
        self.max_epochs = max_epochs
        self.momentum = momentum
        self.learning_rate = learning_rate
        self.grad_clip = grad_clip
        self.l2_norm = l2_norm

        self.best_train_cost = np.inf
        self.best_valid_cost = np.inf
        #self.train = None
        #self.cost = None

        self.train_his = []

        P = Parameters()
        ctrl = controller.build(P, self.input_size, self.output_size,
                                self.mem_size, self.mem_width,
                                self.hidden_sizes)
        predict = model.build(P, self.mem_size, self.mem_width,
                              self.hidden_sizes[-1], ctrl, self.num_heads)

        input_seq = T.matrix('input_sequence')
        output_seq = T.matrix('output_sequence')

        [M_curr, weights, output] = predict(input_seq)
        # output_seq_pred = seqs[-1]

        cross_entropy = T.sum(T.nnet.binary_crossentropy(
            5e-6 + (1 - 2 * 5e-6) * output, output_seq),
                              axis=1)

        self.params = P.values()

        l2 = T.sum(0)
        for p in self.params:
            l2 = l2 + (p**2).sum()

        cost = T.sum(cross_entropy) + self.l2_norm * l2
        #     cost = T.sum(cross_entropy) + 1e-3*l2

        grads = [
            T.clip(g, grad_clip[0], grad_clip[1])
            for g in T.grad(cost, wrt=self.params)
        ]
        #     grads  = [ T.clip(g,-100,100) for g in T.grad(cost,wrt=params) ]
        #     grads  = [ T.clip(g,1e-9, 0.2) for g in T.grad(cost,wrt=params) ]

        self.train = theano.function(
            inputs=[input_seq, output_seq],
            outputs=cost,
            #             updates=updates.adadelta(params,grads)
            updates=updates.rmsprop(self.params,
                                    grads,
                                    momentum=self.momentum,
                                    learning_rate=self.learning_rate))

        self.predict_cost = theano.function(inputs=[input_seq, output_seq],
                                            outputs=cost)

        self.predict = theano.function(inputs=[input_seq],
                                       outputs=[weights, output])