コード例 #1
0
def build(P,input_size,output_size,mem_size,mem_width,layer_size):
	"""
	Create controller function for use during scan op
	"""
        # Weights for external input
	P.W_input_hidden = U.initial_weights(input_size,layer_size)

        # Weights for input from read head (read from memory)
	P.W_read_hidden  = U.initial_weights(mem_width, layer_size)

        # Shared bias for external input and read head input
	P.b_hidden_0 = 0. * U.initial_weights(layer_size)

        # Weights and biases for output of controller
	P.W_hidden_output = 0. * U.initial_weights(layer_size,output_size)
	P.b_output = 0. * U.initial_weights(output_size)

	def controller(input_t,read_t):
                """
                Controller consists of single hidden layer between inputs and outputs
                """
		hidden_layer = T.tanh(
                        T.dot(input_t,P.W_input_hidden) +\
                        T.dot(read_t,P.W_read_hidden) +\
                        P.b_hidden_0
                )

		output_t = T.nnet.sigmoid(T.dot(hidden_layer,P.W_hidden_output) + P.b_output)

                # Return output and hidden layer of controller used by heads (in model.py)
		return output_t,hidden_layer
	return controller
コード例 #2
0
def build(P, input_size, output_size, mem_width):
    """
    Create controller function for use during scan op
    """

    P.W_input_hidden = U.initial_weights(input_size, output_size)
    P.b_hidden_0 = 0. * U.initial_weights(output_size)
    P.attention_weight = np.array(0.1,dtype=theano.config.floatX)

    def controller(input_t, read_t):
        #		print "input_t",input_t.type
        lstm_weight = 1-P.attention_weight
        weighted_sum = lstm_weight*input_t + P.attention_weight*read_t

        if input_t.ndim > 1 :
            output_t = T.nnet.softmax(
                T.dot(weighted_sum, P.W_input_hidden) +
                P.b_hidden_0
            )
        else :
            output_t = U.vector_softmax(
                T.dot(weighted_sum, P.W_input_hidden) +
                P.b_hidden_0)

#		print "input",read_t.type,input_t.type
#		print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type
#		print "layer", hidden_0.type

        return output_t
    return controller
コード例 #3
0
def build_model(hidden_size, predict_only=False):
    X = T.matrix('X')
    Y = T.ivector('Y')
    #* (0.001 * U.initial_weights(2,hidden_size) + np.array([[0,0,1,1],[1,1,0,0]])))
    W_input_hidden = U.create_shared(U.initial_weights(2, hidden_size))
    b_hidden = U.create_shared(U.initial_weights(hidden_size))
    W_hidden_predict = U.create_shared(U.initial_weights(hidden_size, 2))
    b_predict = U.create_shared(U.initial_weights(2))

    params = [W_input_hidden, b_hidden, W_hidden_predict, b_predict]
    hidden_lin = T.dot(X, W_input_hidden) + b_hidden
    hidden = T.nnet.sigmoid(hidden_lin)
    predict = T.nnet.softmax(T.dot(hidden, W_hidden_predict) + b_predict)

    cost = -T.mean(T.log(
        predict[T.arange(Y.shape[0]), Y])) + 1e-3 * adjacency_constraint(
            hidden_lin)  # + 1e-4 * sum(T.sum(p**2) for p in params)
    accuracy = T.mean(T.eq(T.argmax(predict, axis=1), Y))
    grad = T.grad(cost, params)

    train = theano.function(
        inputs=[X, Y],
        #updates =  updates.momentum(params,grad,0.9999,0.1) if not predict_only else None,
        #updates =  updates.momentum(params,grad,0.999,0.0005),
        updates=updates.adadelta(params, grad),
        outputs=[accuracy, W_input_hidden, b_hidden, (hidden > 0.5)])
    predict = theano.function(inputs=[X], outputs=predict[:, 0])

    i = T.iscalar('i')
    hidden_p = theano.function(inputs=[X, i], outputs=hidden[:, i])

    return train, predict, hidden_p, params
コード例 #4
0
    def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None):

        if parameters is None:
            W_if = U.create_shared(U.initial_weights(input_size, output_size), name='W_if')
            W_ff = U.create_shared(U.initial_weights(output_size, output_size), name='W_ff')
            b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            W_if = theano.shared(parameters['W_if'], name='W_if')
            W_ff = theano.shared(parameters['W_ff'], name='W_ff')
            b = theano.shared(parameters['b'], name='b')

        initial = U.create_shared(U.initial_weights(output_size))
        self.is_backward = is_backward

        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        def step(in_t, out_tminus1):
            return self.activation_fn(T.dot(out_tminus1, W_ff) + T.dot(in_t, W_if) + b)

        self.output, _ = theano.scan(
            step,
            sequences=[inputs],
            outputs_info=[initial],
            go_backwards=self.is_backward
        )

        self.params = [W_if, W_ff, b]
コード例 #5
0
    def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None):

        if parameters is None:
            self.W_if = U.create_shared(U.initial_weights(input_size, output_size), name='W_if')
            self.W_ff = U.create_shared(U.initial_weights(output_size, output_size), name='W_ff')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W_if = theano.shared(parameters['W_if'], name='W_if')
            self.W_ff = theano.shared(parameters['W_ff'], name='W_ff')
            self.b = theano.shared(parameters['b'], name='b')

        initial = T.zeros((output_size,))
        self.is_backward = is_backward
        self.activation_fn = lambda x: T.cast(T.minimum(x * (x > 0), 20), dtype='float32')#dtype=theano.config.floatX)
        
        nonrecurrent = T.dot(inputs, self.W_if) + self.b

        self.output, _ = theano.scan(
            lambda in_t, out_tminus1, weights: self.activation_fn(in_t + T.dot(out_tminus1, weights)),
            sequences=[nonrecurrent],
            outputs_info=[initial],
            non_sequences=[self.W_ff],
            go_backwards=self.is_backward
        )

        self.params = [self.W_if, self.W_ff, self.b]
コード例 #6
0
ファイル: head.py プロジェクト: darongliu/Lstm_Turing_LM
def build(P, id, input_size, mem_width):

#    P["W_%d_key" % id] = U.initial_weights(input_size, mem_width)
    P["W_%d_key" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_key" % id] = 0. * U.initial_weights(mem_width)
    P["W_%d_sim" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_sim" % id] = 0. * U.initial_weights(mem_width)
#    P["W_%d_shift" % id] = U.initial_weights(input_size, shift_width)
#    P["b_%d_shift" % id] = 0. * U.initial_weights(shift_width)

    if id != 0 :
        P["W_%d_g" % id] = U.initial_weights(input_size)
        P["b_%d_g" % id] = 0.

    def head_params(x):
        # key
        key_t = T.dot(x, P["W_%d_key" % id]) + P["b_%d_key" % id]
        
        #similarity weight
        sim_t = T.nnet.sigmoid(T.dot(x, P["W_%d_sim" % id]) + P["b_%d_sim" % id])
        #attention  weight
        att_t = 1 - sim_t

        
        if id != 0 :
            g_t = T.nnet.sigmoid(T.dot(x, P["W_%d_g" % id]) + P["b_%d_g" % id])#guess this is a vector        
        else :
            g_t = T.ones([x.shape[0]])

        return key_t, g_t, sim_t, att_t
    return head_params
コード例 #7
0
def build(P, input_size, output_size, mem_width):
    """
    Create controller function for use during scan op
    """

    P.W_input_hidden = U.initial_weights(input_size, output_size)
    P.b_hidden_0 = 0. * U.initial_weights(output_size)

    P.W_read_hidden  = U.initial_weights(mem_width, input_size)
    P.b_hidden_read = 0. * U.initial_weights(input_size)

    def controller(input_t, read_t):
        #		print "input_t",input_t.type
        new_input_t = (input_t + T.nnet.sigmoid(T.dot(read_t,P.W_read_hidden) + P.b_hidden_read))/2

        if input_t.ndim > 1 :
            output_t = T.nnet.softmax(
                T.dot(new_input_t, P.W_input_hidden) +
                P.b_hidden_0
            )
        else :
            output_t = U.vector_softmax(
                T.dot(new_input_t, P.W_input_hidden) +
                P.b_hidden_0)

#		print "input",read_t.type,input_t.type
#		print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type
#		print "layer", hidden_0.type

        return output_t
    return controller
コード例 #8
0
    def __init__(self, inputs, input_size, output_size, parameters=None):

        if parameters is None:
            self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W = theano.shared(parameters['W'], name='W')
            self.b = theano.shared(parameters['b'], name='b')

        self.output = T.nnet.softmax(T.dot(inputs, self.W) + self.b)
        self.params = [self.W, self.b]
コード例 #9
0
    def __init__(self, inputs, input_size, output_size, parameters=None):

        if parameters is None:
            W = U.create_shared(U.initial_weights(input_size, output_size), name='W')
            b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            W = theano.shared(parameters['W'], name='W')
            b = theano.shared(parameters['b'], name='b')

        self.output = T.nnet.softmax(T.dot(inputs, W) + b)
        self.params = [W, b]
コード例 #10
0
    def __init__(self, forward_in, backward_in, input_size, output_size):
        Wf = U.create_shared(U.initial_weights(input_size, output_size))
        Wb = U.create_shared(U.initial_weights(input_size, output_size))
        b = U.create_shared(U.initial_weights(output_size))

        self.activations = T.dot(forward_in, Wf) + T.dot(backward_in, Wb) + b

        self.output, _ = theano.scan(lambda inpt: T.nnet.softmax(inpt),
                                     sequences=[self.activations])

        self.params = [Wf, Wb, b]
コード例 #11
0
ファイル: simple_model.py プロジェクト: jeffiar/theano-learn
def build(P, n_input, n_hidden, n_output):
    P.W_i_h = U.initial_weights(n_input, n_hidden)
    P.W_h_o = U.initial_weights(n_hidden, n_output)
    P.b_h = U.initial_weights(n_hidden)
    P.b_o = U.initial_weights(n_output)

    def f(X):
        hidden = T.nnet.sigmoid(T.dot(X,      P.W_i_h) + P.b_h)
        output = T.nnet.softmax(T.dot(hidden, P.W_h_o) + P.b_o)
        return output

    return f
コード例 #12
0
    def __init__(self, forward_in, backward_in, input_size, output_size):
        Wf = U.create_shared(U.initial_weights(input_size, output_size))
        Wb = U.create_shared(U.initial_weights(input_size, output_size))
        b = U.create_shared(U.initial_weights(output_size))

        self.activations = T.dot(forward_in, Wf) + T.dot(backward_in, Wb) + b

        self.output, _ = theano.scan(
            lambda inpt: T.nnet.softmax(inpt),
            sequences=[self.activations]
        )

        self.params = [Wf, Wb, b]
コード例 #13
0
ファイル: brnn.py プロジェクト: copyfun/python-deep-speech
    def __init__(self, inputs, input_size, output_size, parameters=None):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)
        
        if parameters is None:
            self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W = theano.shared(parameters['W'], name='W')
            self.b = theano.shared(parameters['b'], name='b')
        
        self.output = self.activation_fn(T.dot(inputs, self.W) + self.b)

        self.params = [self.W, self.b]
コード例 #14
0
    def __init__(self, inputs, input_size, output_size):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        W = U.create_shared(U.initial_weights(input_size, output_size))
        b = U.create_shared(U.initial_weights(output_size))

        self.output2 = self.activation_fn(T.dot(inputs, W) + b)

        self.output, _ = theano.scan(
            lambda element: self.activation_fn(T.dot(element, W) + b),
            sequences=[inputs])

        self.params = [W, b]
コード例 #15
0
    def __init__(self, inputs, input_size, output_size, rng, dropout_rate, parameters=None):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        if parameters is None:
            self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W = theano.shared(parameters['W'], name='W')
            self.b = theano.shared(parameters['b'], name='b')


        self.output = T.cast(self.activation_fn( (T.dot(inputs, self.W) + self.b)*(1.0-dropout_rate) ), dtype=theano.config.floatX)

        self.params = [self.W, self.b]
コード例 #16
0
    def __init__(self, inputs, input_size, output_size):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        W = U.create_shared(U.initial_weights(input_size, output_size))
        b = U.create_shared(U.initial_weights(output_size))

        self.output2 = self.activation_fn(T.dot(inputs, W) + b)

        self.output, _ = theano.scan(
            lambda element: self.activation_fn(T.dot(element, W) + b),
            sequences=[inputs]
        )

        self.params = [W, b]
コード例 #17
0
    def __init__(self, inputs, input_size, output_size, parameters=None):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        if parameters is None:
            self.W = U.create_shared(U.initial_weights(input_size,
                                                       output_size),
                                     name='W')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W = theano.shared(parameters['W'], name='W')
            self.b = theano.shared(parameters['b'], name='b')

        self.output = self.activation_fn(T.dot(inputs, self.W) + self.b)

        self.params = [self.W, self.b]
コード例 #18
0
def build_network(input_size, hidden_size, constraint_adj=False):
    P = Parameters()
    X = T.bmatrix('X')

    P.W_input_hidden = U.initial_weights(input_size, hidden_size)
    P.b_hidden = U.initial_weights(hidden_size)
    P.b_output = U.initial_weights(input_size)
    hidden_lin = T.dot(X, P.W_input_hidden) + P.b_hidden
    hidden = T.nnet.sigmoid(hidden_lin)
    output = T.nnet.softmax(T.dot(hidden, P.W_input_hidden.T) + P.b_output)
    parameters = P.values()
    cost = build_error(X, output, P)
    if constraint_adj: pass
    #cost = cost + adjacency_constraint(hidden_lin)

    return X, output, cost, P
コード例 #19
0
def build_network(input_size,hidden_size,constraint_adj=False):
	P = Parameters()
	X = T.bmatrix('X')
	
	P.W_input_hidden = U.initial_weights(input_size,hidden_size)
	P.b_hidden       = U.initial_weights(hidden_size)
	P.b_output       = U.initial_weights(input_size)
	hidden_lin = T.dot(X,P.W_input_hidden)+P.b_hidden
	hidden = T.nnet.sigmoid(hidden_lin)
	output = T.nnet.softmax(T.dot(hidden,P.W_input_hidden.T) + P.b_output)
	parameters = P.values() 
	cost = build_error(X,output,P) 
	if constraint_adj:pass
		#cost = cost + adjacency_constraint(hidden_lin)

	return X,output,cost,P
コード例 #20
0
    def __init__(self, inputs, input_size, output_size, parameters=None):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        if parameters is None:
            W = U.create_shared(U.initial_weights(input_size, output_size), name='W')
            b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            W = theano.shared(parameters['W'], name='W')
            b = theano.shared(parameters['b'], name='b')

        self.output, _ = theano.scan(
            lambda element: self.activation_fn(T.dot(element, W) + b),
            sequences=[inputs]
        )

        self.params = [W, b]
コード例 #21
0
def build(P, input_size, output_size, mem_size, mem_width, layer_sizes):
    """
	Create controller function for use during scan op
	"""

    P.W_input_hidden = U.initial_weights(input_size, layer_sizes[0])
    P.W_read_hidden = U.initial_weights(mem_width, layer_sizes[0])
    P.b_hidden_0 = 0.0 * U.initial_weights(layer_sizes[0])

    hidden_weights = []
    for i in xrange(len(layer_sizes) - 1):
        P["W_hidden_%d" % (i + 1)] = U.initial_weights(layer_sizes[i], layer_sizes[i + 1])
        P["b_hidden_%d" % (i + 1)] = 0.0 * U.initial_weights(layer_sizes[i + 1])
        hidden_weights.append((P["W_hidden_%d" % (i + 1)], P["b_hidden_%d" % (i + 1)]))

    P.W_hidden_output = 0.0 * U.initial_weights(layer_sizes[-1], output_size)
    P.b_output = 0.0 * U.initial_weights(output_size)

    def controller(input_t, read_t):
        # 		print "input_t",input_t.type
        prev_layer = hidden_0 = T.dot(input_t, P.W_input_hidden) + T.dot(read_t, P.W_read_hidden) + P.b_hidden_0

        # 		print "input",read_t.type,input_t.type
        # 		print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type
        # 		print "layer", hidden_0.type
        for W, b in hidden_weights:
            prev_layer = T.tanh(T.dot(prev_layer, W) + b)

        fin_hidden = prev_layer
        output_t = T.nnet.sigmoid(T.dot(fin_hidden, P.W_hidden_output) + P.b_output)

        return output_t, fin_hidden

    return controller
コード例 #22
0
ファイル: lenet_model.py プロジェクト: jeffiar/theano-learn
def _build_conv_pool(P, n_layer, input_layer, n_feats_out, n_feats_in, conv_size, pool_size):
    P["W_%d"%n_layer] = U.initial_weights(n_feats_out, n_feats_in, conv_size, conv_size)
    P["b_%d"%n_layer] = np.zeros((n_feats_out, ))
    W = P["W_%d"%n_layer]
    b = P["b_%d"%n_layer]
    out_conv = T.nnet.conv2d(input_layer, W)
    out_pool = max_pool_2d(out_conv, (pool_size, pool_size))
    output = T.nnet.sigmoid(out_pool + b.dimshuffle('x', 0, 'x', 'x'))
    return output
コード例 #23
0
def build(P, input_size, proj_size) :
    P["image_projection matrix"] = U.initial_weights(input_size, proj_size) #issue: initial method

    def image_project(x) :
        #projection
        proj_result = T.dot(x,P["image_projection matrix"])
        return proj_result #whether normalize or not
        
    return image_project
コード例 #24
0
    def __init__(self, inputs, input_size, output_size, is_backward=False):
        W_if = U.create_shared(U.initial_weights(input_size, output_size))
        W_ff = U.create_shared(U.initial_weights(output_size, output_size))
        b = U.create_shared(U.initial_weights(output_size))
        initial = U.create_shared(U.initial_weights(output_size))

        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        self.output, _ = theano.scan(
            lambda in_t: theano.scan(
                lambda index, out_tminus1: self.activation_fn(T.dot(out_tminus1, W_ff) + T.dot(in_t[index], W_if) + b),
                sequences=[T.arange(inputs.shape[1])],
                outputs_info=[initial],
                go_backwards=is_backward
            ),
            sequences=[inputs]  # for each sample at time "t"
        )

        self.params = [W_if, W_ff, b]
コード例 #25
0
    def __init__(self, inputs, input_size, output_size, is_backward=False):
        W_if = U.create_shared(U.initial_weights(input_size, output_size))
        W_ff = U.create_shared(U.initial_weights(output_size, output_size))
        b = U.create_shared(U.initial_weights(output_size))
        initial = U.create_shared(U.initial_weights(output_size))

        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        self.output, _ = theano.scan(
            lambda in_t: theano.scan(
                lambda index, out_tminus1: self.activation_fn(
                    T.dot(out_tminus1, W_ff) + T.dot(in_t[index], W_if) + b),
                sequences=[T.arange(inputs.shape[1])],
                outputs_info=[initial],
                go_backwards=is_backward),
            sequences=[inputs]  # for each sample at time "t"
        )

        self.params = [W_if, W_ff, b]
コード例 #26
0
def build_network(input_size, hidden_size):
    X = T.dmatrix('X')
    W_input_to_hidden = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    W_hidden_to_hidden = U.create_shared(
        U.initial_weights(hidden_size, hidden_size))
    initial_hidden = U.create_shared(U.initial_weights(hidden_size),
                                     name='init_hidden')

    b_hidden = U.create_shared(U.initial_weights(hidden_size))
    b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size))
    b_input_reproduction = U.create_shared(U.initial_weights(input_size))

    parameters = [
        W_input_to_hidden,
        W_hidden_to_hidden,
        b_hidden,
        initial_hidden,
        b_hidden_reproduction,
        b_input_reproduction,
    ]

    hidden, hidden1_reproduction, input_reproduction = make_rae(
        X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden,
        b_hidden_reproduction, b_input_reproduction)

    unrolled = unroll(hidden[-1], W_input_to_hidden, W_hidden_to_hidden,
                      b_hidden_reproduction, b_input_reproduction,
                      hidden.shape[0])

    return X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled
コード例 #27
0
    def __init__(self,
                 inputs,
                 input_size,
                 output_size,
                 rng,
                 dropout_rate,
                 parameters=None):
        self.activation_fn = lambda x: T.minimum(x * (x > 0), 20)

        if parameters is None:
            self.W = U.create_shared(U.initial_weights(input_size,
                                                       output_size),
                                     name='W')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W = theano.shared(parameters['W'], name='W')
            self.b = theano.shared(parameters['b'], name='b')

        self.output = T.cast(self.activation_fn(
            (T.dot(inputs, self.W) + self.b) * (1.0 - dropout_rate)),
                             dtype=theano.config.floatX)

        self.params = [self.W, self.b]
コード例 #28
0
ファイル: TF_model.py プロジェクト: jeffiar/theano-learn
def build(P):
    image_row = 35 # num of base pairs
    image_col = 4  # num of nucleotides
    n_input   = image_row * image_col

    n_feats = [1, 16] # num of "motifs" We'll learn 16 PWM's

    conv_row = 8 # 8-long PWM
    conv_col = 4 # 4 nucleotides
    pool_row = 28 # ??
    pool_col = 1

    n_pool_out = (n_feats[1] 
                  * ((image_row - conv_row + 1) / pool_row)
                  * ((image_col - conv_col + 1) / pool_col))
    n_hidden   = 32
    n_output   = 1

    P.W_input_conv      = U.initial_weights(n_feats[1], n_feats[0], conv_row, conv_col)
    P.b_pool_out        = np.zeros(n_pool_out)
    P.W_pool_out_hidden = U.initial_weights(n_pool_out, n_hidden)
    P.b_hidden          = np.zeros(n_hidden)
    P.W_hidden_output   = U.initial_weights(n_hidden, n_output)
    P.b_output          = np.zeros(n_output)

    def f(X):
        n_samples = X.shape[0]

        input    = X.reshape((n_samples, n_feats[0], image_row, image_col))
        conv_out = T.nnet.conv2d(input, P.W_input_conv)
        pool_out_= max_pool_2d(conv_out, (pool_row, pool_col))
        pool_out = pool_out_.flatten(2) + P.b_pool_out
        hidden   = relu(T.dot(pool_out, P.W_pool_out_hidden) + P.b_hidden)
        output   = T.dot(hidden, P.W_hidden_output) + P.b_output
        return output.astype(theano.config.floatX)

    return f
コード例 #29
0
    def __init__(self,
                 inputs,
                 input_size,
                 output_size,
                 is_backward=False,
                 parameters=None):

        if parameters is None:
            self.W_if = U.create_shared(U.initial_weights(
                input_size, output_size),
                                        name='W_if')
            self.W_ff = U.create_shared(U.initial_weights(
                output_size, output_size),
                                        name='W_ff')
            self.b = U.create_shared(U.initial_weights(output_size), name='b')
        else:
            self.W_if = theano.shared(parameters['W_if'], name='W_if')
            self.W_ff = theano.shared(parameters['W_ff'], name='W_ff')
            self.b = theano.shared(parameters['b'], name='b')

        initial = T.zeros((output_size, ))
        self.is_backward = is_backward
        self.activation_fn = lambda x: T.cast(T.minimum(x * (x > 0), 20),
                                              dtype='float32'
                                              )  #dtype=theano.config.floatX)

        nonrecurrent = T.dot(inputs, self.W_if) + self.b

        self.output, _ = theano.scan(
            lambda in_t, out_tminus1, weights: self.activation_fn(in_t + T.dot(
                out_tminus1, weights)),
            sequences=[nonrecurrent],
            outputs_info=[initial],
            non_sequences=[self.W_ff],
            go_backwards=self.is_backward)

        self.params = [self.W_if, self.W_ff, self.b]
コード例 #30
0
ファイル: grid_learn.py プロジェクト: shawntan/viz-speech
def build_model(hidden_size,predict_only=False):
	X = T.matrix('X')
	Y = T.ivector('Y')
		#* (0.001 * U.initial_weights(2,hidden_size) + np.array([[0,0,1,1],[1,1,0,0]])))
	W_input_hidden   = U.create_shared(U.initial_weights(2,hidden_size))
	b_hidden         = U.create_shared(U.initial_weights(hidden_size))
	W_hidden_predict = U.create_shared(U.initial_weights(hidden_size,2))
	b_predict        = U.create_shared(U.initial_weights(2))

	params = [W_input_hidden,b_hidden,W_hidden_predict,b_predict]
	hidden_lin = T.dot(X,W_input_hidden) + b_hidden
	hidden = T.nnet.sigmoid(hidden_lin)
	predict = T.nnet.softmax(T.dot(hidden,W_hidden_predict) + b_predict)
	
	cost = -T.mean(T.log(predict[T.arange(Y.shape[0]),Y])) + 1e-3*adjacency_constraint(hidden_lin)# + 1e-4 * sum(T.sum(p**2) for p in params)
	accuracy = T.mean(T.eq(T.argmax(predict,axis=1),Y))
	grad = T.grad(cost,params)
	
	train = theano.function(
			inputs = [X,Y],
			#updates =  updates.momentum(params,grad,0.9999,0.1) if not predict_only else None,
			#updates =  updates.momentum(params,grad,0.999,0.0005),
			updates =  updates.adadelta(params,grad),
			outputs = [accuracy,W_input_hidden,b_hidden,(hidden>0.5)]
		)
	predict = theano.function(
			inputs  = [X],
			outputs = predict[:,0]
		)

	i = T.iscalar('i')
	hidden_p = theano.function(
			inputs = [X,i],
			outputs = hidden[:,i]
		)

	return train,predict,hidden_p,params
コード例 #31
0
ファイル: lenet_model.py プロジェクト: jeffiar/theano-learn
def build(P, n_input, n_hidden, n_output):
    P.W_hidden_output = U.initial_weights(n_hidden, n_output)
    P.b_output        = np.zeros(n_output)
    # n_hidden = 50 * 4 * 4  = 800 (n_feats of layer2 * pixels in image)

    # TODO: fix these magic numbers (especially the 800)
    def f(X):
        layer0 = X.reshape((X.shape[0], 1, 28, 28))
        layer1 = _build_conv_pool(P, 1, layer0, 20,  1, 5, 2)
        layer2_= _build_conv_pool(P, 2, layer1, 50, 20, 5, 2)
        layer2 = layer2_.flatten(2)
        output = T.nnet.softmax(T.dot(layer2, P.W_hidden_output) + P.b_output)
        return output

    return f
コード例 #32
0
ファイル: toy.py プロジェクト: kastnerkyle/theano-ctc
def build_model(P,X,input_size,hidden_size,output_size):
	W_input_hidden  = U.create_shared(U.initial_weights(input_size,hidden_size))
	W_hidden_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size))
	W_hidden_output = U.create_shared(U.initial_weights(hidden_size,output_size))
	b_hidden = U.create_shared(U.initial_weights(hidden_size))
	i_hidden = U.create_shared(U.initial_weights(hidden_size))
	b_output = U.create_shared(U.initial_weights(output_size))
	hidden = build_rnn(T.dot(X,W_input_hidden),W_hidden_hidden,b_hidden,i_hidden)

	predict = T.nnet.softmax(T.dot(hidden,W_hidden_output) + b_output)

	return X,predict
コード例 #33
0
def build(P,
          input_size=8,
          output_size=8,
          mem_size=128,
          mem_width=20,
          layer_sizes=[100]):
    """
    Create controller function for use during scan op
    """

    P.W_input_hidden = U.initial_weights(input_size, layer_sizes[0])
    P.W_read_hidden = U.initial_weights(mem_width, layer_sizes[0])
    P.b_hidden_0 = 0. * U.initial_weights(layer_sizes[0])

    hidden_weights = []
    for i in xrange(len(layer_sizes) - 1):
        P["W_hidden_%d" % (i + 1)] = U.initial_weights(layer_sizes[i],
                                                       layer_sizes[i + 1])
        P["b_hidden_%d" % (i + 1)] = 0. * U.initial_weights(layer_sizes[i + 1])
        hidden_weights.append(
            (P["W_hidden_%d" % (i + 1)], P["b_hidden_%d" % (i + 1)]))

    P.W_hidden_output = 0. * U.initial_weights(layer_sizes[-1], output_size)
    P.b_output = 0. * U.initial_weights(output_size)

    def controller(input_t, read_t):
        #		print "input_t",input_t.type
        prev_layer = hidden_0 = T.tanh(
            T.dot(input_t, P.W_input_hidden) + T.dot(read_t, P.W_read_hidden) +
            P.b_hidden_0)

        #		print "input",read_t.type,input_t.type
        #		print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type
        #		print "layer", hidden_0.type
        for W, b in hidden_weights:
            prev_layer = T.tanh(T.dot(prev_layer, W) + b)

        fin_hidden = prev_layer
        output_t = T.nnet.sigmoid(
            T.dot(fin_hidden, P.W_hidden_output) + P.b_output)

        return output_t, fin_hidden

    return controller
コード例 #34
0
def build_model(P, X, input_size, hidden_size, output_size):
    W_input_hidden = U.create_shared(U.initial_weights(input_size,
                                                       hidden_size))
    W_hidden_hidden = U.create_shared(
        U.initial_weights(hidden_size, hidden_size))
    W_hidden_output = U.create_shared(
        U.initial_weights(hidden_size, output_size))
    b_hidden = U.create_shared(U.initial_weights(hidden_size))
    i_hidden = U.create_shared(U.initial_weights(hidden_size))
    b_output = U.create_shared(U.initial_weights(output_size))
    hidden = build_rnn(T.dot(X, W_input_hidden), W_hidden_hidden, b_hidden,
                       i_hidden)

    predict = T.nnet.softmax(T.dot(hidden, W_hidden_output) + b_output)

    return X, predict
コード例 #35
0
ファイル: rae.py プロジェクト: OlafLee/rnn-experiment
def build_network(input_size,hidden_size):
	X = T.dmatrix('X')
	W_input_to_hidden  = U.create_shared(U.initial_weights(input_size,hidden_size))
	W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size))
	initial_hidden = U.create_shared(U.initial_weights(hidden_size))
	
	b_hidden              = U.create_shared(U.initial_weights(hidden_size))
	b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size))
	b_input_reproduction  = U.create_shared(U.initial_weights(input_size))

	parameters = [
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden,
			initial_hidden,
			b_hidden_reproduction,
			b_input_reproduction,
		]

	hidden, hidden1_reproduction, input_reproduction = make_rae(
			X,
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden,
			initial_hidden,
			b_hidden_reproduction,
			b_input_reproduction
		)

	unrolled = unroll(
			hidden[-1],
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden_reproduction,
			b_input_reproduction,
			hidden.shape[0]
		)

	return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled
コード例 #36
0
def build(P, input_size, mem_width, mem_size, shift_width):
    """
        NTM heads are implemented as another hidden layer coming after
        the last hidden layer of the controller that emits
        k_t, beta_t, g_t, s_t, gamma_t as outputs (see Controller outputs
        of Figure 2 in paper) along with erase and add vectors
        """
    P["W_key"] = U.initial_weights(input_size, mem_width)
    P["b_key"] = 0. * U.initial_weights(mem_width)

    P["W_beta"] = 0. * U.initial_weights(input_size)
    P["b_beta"] = 0.

    P["W_g"] = U.initial_weights(input_size)
    P["b_g"] = 0.

    P["W_shift"] = U.initial_weights(input_size, shift_width)
    P["b_shift"] = 0. * U.initial_weights(shift_width)

    P["W_gamma"] = U.initial_weights(input_size)
    P["b_gamma"] = 0.

    P["W_erase"] = U.initial_weights(input_size, mem_width)
    P["b_erase"] = 0. * U.initial_weights(mem_width)

    P["W_add"] = U.initial_weights(input_size, mem_width)
    P["b_add"] = 0. * U.initial_weights(mem_width)

    def head_params(x):
        """
                Takes hidden layer from controller computes
                k_t, beta_t, g_t, s_t,  and erase and add
                vectors as outputs
                """
        # key
        key_t = T.dot(x, P["W_key"]) + P["b_key"]

        # key strength
        _beta_t = T.dot(x, P["W_beta"]) + P["b_beta"]
        beta_t = T.nnet.softplus(_beta_t)

        # interpolation gate
        g_t = T.nnet.sigmoid(T.dot(x, P["W_g"]) + P["b_g"])

        # shift
        shift_t = U.vector_softmax(T.dot(x, P["W_shift"]) + P["b_shift"])
        shift_t.name = "shift_t"

        # sharpening
        _gamma_t = T.dot(x, P["W_gamma"]) + P["b_gamma"]
        gamma_t = T.nnet.softplus(_gamma_t) + 1.

        # erase and add vectors
        erase_t = T.nnet.sigmoid(T.dot(x, P["W_erase"]) + P["b_erase"])
        add_t = T.dot(x, P["W_add"]) + P["b_add"]

        return key_t, beta_t, g_t, shift_t, gamma_t, erase_t, add_t

    return head_params
コード例 #37
0
ファイル: head.py プロジェクト: BigeyeDestroyer/rnn-project
def build(P, id, input_size, mem_width, mem_size, shift_width):

    # 1. content addressing 
    P["W_%d_key" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_key" % id] = 0. * U.initial_weights(mem_width)
    P["W_%d_beta" % id] = 0. * U.initial_weights(input_size)
    P["b_%d_beta" % id] = 0.

    # 2. interpolation 
    P["W_%d_g" % id] = U.initial_weights(input_size)
    P["b_%d_g" % id] = 0.

    # 3. convolutional shift
    P["W_%d_shift" % id] = U.initial_weights(input_size, shift_width)
    P["b_%d_shift" % id] = 0. * U.initial_weights(shift_width)
    
    # 4. sharpening 
    P["W_%d_gamma" % id] = U.initial_weights(input_size)
    P["b_%d_gamma" % id] = 0.
    
    # 5. erase and add vector 
    P["W_%d_erase" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_erase" % id] = 0. * U.initial_weights(mem_width)
    P["W_%d_add" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_add" % id] = 0. * U.initial_weights(mem_width)

    def head_params(x):
        # key
        key_t = T.dot(x, P["W_%d_key" % id]) + P["b_%d_key" % id]

        # shift
        shift_t = U.vector_softmax(
            T.dot(x, P["W_%d_shift" % id]) + P["b_%d_shift" % id])
        shift_t.name = "shift_t"

        # scalars
        _beta_t = T.dot(x, P["W_%d_beta" % id]) + P["b_%d_beta" % id]
        _gamma_t = T.dot(x, P["W_%d_gamma" % id]) + P["b_%d_gamma" % id]

        beta_t = T.nnet.softplus(_beta_t)
        gamma_t = T.nnet.softplus(_gamma_t) + 1.
#		beta_t  = (_beta_t  > 0)*_beta_t
#		gamma_t = (_gamma_t > 0)*_gamma_t + 1.
#		beta_t  = T.exp(_beta_t)
#		gamma_t = T.exp(_gamma_t) + 1.

        g_t = T.nnet.sigmoid(T.dot(x, P["W_%d_g" % id]) + P["b_%d_g" % id])

        erase_t = T.nnet.sigmoid(
            T.dot(x, P["W_%d_erase" % id]) + P["b_%d_erase" % id])
        add_t = T.dot(x, P["W_%d_add" % id]) + P["b_%d_add" % id]

        return key_t, beta_t, g_t, shift_t, gamma_t, erase_t, add_t
    return head_params
コード例 #38
0
ファイル: head.py プロジェクト: c3h3/pyntm
def build(P,id,input_size,mem_width,mem_size,shift_width):

    P["W_%d_key"%id]   = U.initial_weights(input_size,mem_width)
    P["b_%d_key"%id]   = 0. * U.initial_weights(mem_width)

#     P["W_%d_shift"%id] = U.initial_weights(input_size) # 100
#     P["b_%d_shift"%id] = 0. 
    P["W_%d_shift"%id] = U.initial_weights(input_size,shift_width) # 100X3
    P["b_%d_shift"%id] = 0. * U.initial_weights(shift_width)

#     P["W_%d_beta"%id]  = 0. * U.initial_weights(input_size)
    P["W_%d_beta"%id]  = U.initial_weights(input_size)
    P["b_%d_beta"%id]  = 0.
    P["W_%d_gamma"%id] = U.initial_weights(input_size)
    P["b_%d_gamma"%id] = 0.
    P["W_%d_g"%id]     = U.initial_weights(input_size)
    P["b_%d_g"%id]     = 0.

    P["W_%d_erase"%id] = U.initial_weights(input_size,mem_width)
    P["b_%d_erase"%id] = 0. * U.initial_weights(mem_width)
    P["W_%d_add"%id]   = U.initial_weights(input_size,mem_width)
    P["b_%d_add"%id]   = 0. * U.initial_weights(mem_width)


    def head_params(x):
        # key
        key_t = T.dot(x,P["W_%d_key"%id]) + P["b_%d_key"%id]

        # shift
        shift_t = T.nnet.sigmoid(T.dot(x,P["W_%d_shift"%id]) + P["b_%d_shift"%id]) # *2 - 1
#         shift_t = U.vector_softmax(T.dot(x,P["W_%d_shift"%id]) + P["b_%d_shift"%id])
#         shift_t.name = "shift_t"

        # scalars
        _beta_t  = T.dot(x,P["W_%d_beta"%id])  + P["b_%d_beta"%id]
        _gamma_t = T.dot(x,P["W_%d_gamma"%id]) + P["b_%d_gamma"%id]

        beta_t  = T.nnet.softplus(_beta_t)
        gamma_t = T.nnet.softplus(_gamma_t) + 1.
        #		beta_t  = (_beta_t  > 0)*_beta_t
        #		gamma_t = (_gamma_t > 0)*_gamma_t + 1.
        #		beta_t  = T.exp(_beta_t)
        #		gamma_t = T.exp(_gamma_t) + 1.



        g_t     = T.nnet.sigmoid(T.dot(x,P["W_%d_g"%id]) + P["b_%d_g"%id])

        erase_t = T.nnet.sigmoid(T.dot(x,P["W_%d_erase"%id]) + P["b_%d_erase"%id])
        add_t   = T.dot(x,P["W_%d_add"%id]) + P["b_%d_add"%id]

        return key_t,beta_t,g_t,shift_t,gamma_t,erase_t,add_t
    return head_params
コード例 #39
0
ファイル: head.py プロジェクト: c3h3/pyntm
def build(P, id, input_size, mem_width, mem_size, shift_width):

    P["W_%d_key" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_key" % id] = 0. * U.initial_weights(mem_width)

    #     P["W_%d_shift"%id] = U.initial_weights(input_size) # 100
    #     P["b_%d_shift"%id] = 0.
    P["W_%d_shift" % id] = U.initial_weights(input_size, shift_width)  # 100X3
    P["b_%d_shift" % id] = 0. * U.initial_weights(shift_width)

    #     P["W_%d_beta"%id]  = 0. * U.initial_weights(input_size)
    P["W_%d_beta" % id] = U.initial_weights(input_size)
    P["b_%d_beta" % id] = 0.
    P["W_%d_gamma" % id] = U.initial_weights(input_size)
    P["b_%d_gamma" % id] = 0.
    P["W_%d_g" % id] = U.initial_weights(input_size)
    P["b_%d_g" % id] = 0.

    P["W_%d_erase" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_erase" % id] = 0. * U.initial_weights(mem_width)
    P["W_%d_add" % id] = U.initial_weights(input_size, mem_width)
    P["b_%d_add" % id] = 0. * U.initial_weights(mem_width)

    def head_params(x):
        # key
        key_t = T.dot(x, P["W_%d_key" % id]) + P["b_%d_key" % id]

        # shift
        shift_t = T.nnet.sigmoid(
            T.dot(x, P["W_%d_shift" % id]) + P["b_%d_shift" % id])  # *2 - 1
        #         shift_t = U.vector_softmax(T.dot(x,P["W_%d_shift"%id]) + P["b_%d_shift"%id])
        #         shift_t.name = "shift_t"

        # scalars
        _beta_t = T.dot(x, P["W_%d_beta" % id]) + P["b_%d_beta" % id]
        _gamma_t = T.dot(x, P["W_%d_gamma" % id]) + P["b_%d_gamma" % id]

        beta_t = T.nnet.softplus(_beta_t)
        gamma_t = T.nnet.softplus(_gamma_t) + 1.
        #		beta_t  = (_beta_t  > 0)*_beta_t
        #		gamma_t = (_gamma_t > 0)*_gamma_t + 1.
        #		beta_t  = T.exp(_beta_t)
        #		gamma_t = T.exp(_gamma_t) + 1.

        g_t = T.nnet.sigmoid(T.dot(x, P["W_%d_g" % id]) + P["b_%d_g" % id])

        erase_t = T.nnet.sigmoid(
            T.dot(x, P["W_%d_erase" % id]) + P["b_%d_erase" % id])
        add_t = T.dot(x, P["W_%d_add" % id]) + P["b_%d_add" % id]

        return key_t, beta_t, g_t, shift_t, gamma_t, erase_t, add_t

    return head_params
コード例 #40
0
def build_lstm_step(P,word_vector_size,hidden_state_size):
	P.W_input_in  = U.initial_weights(word_vector_size,hidden_state_size)
	P.W_hidden_in = U.initial_weights(hidden_state_size,hidden_state_size)
	P.W_cell_in   = U.initial_weights(hidden_state_size,hidden_state_size)
	P.b_in        = U.initial_weights(hidden_state_size)

	P.W_input_forget  = U.initial_weights(word_vector_size,hidden_state_size)
	P.W_hidden_forget = U.initial_weights(hidden_state_size,hidden_state_size)
	P.W_cell_forget   = U.initial_weights(hidden_state_size,hidden_state_size)
	P.b_forget        = U.initial_weights(hidden_state_size)

	P.W_input_output  = U.initial_weights(word_vector_size,hidden_state_size)
	P.W_hidden_output = U.initial_weights(hidden_state_size,hidden_state_size)
	P.W_cell_output   = U.initial_weights(hidden_state_size,hidden_state_size)
	P.b_output        = U.initial_weights(hidden_state_size)

	P.W_input_cell  = U.initial_weights(word_vector_size,hidden_state_size)
	P.W_hidden_cell = U.initial_weights(hidden_state_size,hidden_state_size)
	P.b_cell        = U.initial_weights(hidden_state_size)

	P.init_h = U.initial_weights(hidden_state_size)
	P.init_c = U.initial_weights(hidden_state_size)

	def step(x,prev_h,prev_c):
		input_gate = T.nnet.sigmoid(
				T.dot(x,P.W_input_in) +\
				T.dot(prev_h,P.W_hidden_in) +\
				T.dot(prev_c,P.W_cell_in) +\
				P.b_in
			)

		forget_gate = T.nnet.sigmoid(
				T.dot(x,P.W_input_forget) +\
				T.dot(prev_h,P.W_hidden_forget) +\
				T.dot(prev_c,P.W_cell_forget) +\
				P.b_forget
			)

		curr_c = forget_gate * prev_c + input_gate * T.tanh(
				T.dot(x,P.W_input_cell) +\
				T.dot(prev_h,P.W_hidden_cell) +\
				P.b_cell
			)

		output_gate = T.nnet.sigmoid(
				T.dot(x,P.W_input_output) +\
				T.dot(prev_h,P.W_hidden_output) +\
				T.dot(curr_c,P.W_cell_output) +\
				P.b_output
			)
		
		curr_h = output_gate * T.tanh(curr_c)

		return curr_h,curr_c

	return step
コード例 #41
0
def create_vocab_vectors(P,vocab2id,size):
	return U.initial_weights(len(vocab2id) + 1,size)
コード例 #42
0
def create_vocab_vectors(P, vocab2id, size):
    return U.initial_weights(len(vocab2id) + 1, size)
コード例 #43
0
def build_lstm_step(P, word_vector_size, hidden_state_size):
    P.W_input_in = U.initial_weights(word_vector_size, hidden_state_size)
    P.W_hidden_in = U.initial_weights(hidden_state_size, hidden_state_size)
    P.W_cell_in = U.initial_weights(hidden_state_size, hidden_state_size)
    P.b_in = U.initial_weights(hidden_state_size)

    P.W_input_forget = U.initial_weights(word_vector_size, hidden_state_size)
    P.W_hidden_forget = U.initial_weights(hidden_state_size, hidden_state_size)
    P.W_cell_forget = U.initial_weights(hidden_state_size, hidden_state_size)
    P.b_forget = U.initial_weights(hidden_state_size)

    P.W_input_output = U.initial_weights(word_vector_size, hidden_state_size)
    P.W_hidden_output = U.initial_weights(hidden_state_size, hidden_state_size)
    P.W_cell_output = U.initial_weights(hidden_state_size, hidden_state_size)
    P.b_output = U.initial_weights(hidden_state_size)

    P.W_input_cell = U.initial_weights(word_vector_size, hidden_state_size)
    P.W_hidden_cell = U.initial_weights(hidden_state_size, hidden_state_size)
    P.b_cell = U.initial_weights(hidden_state_size)

    P.init_h = U.initial_weights(hidden_state_size)
    P.init_c = U.initial_weights(hidden_state_size)

    def step(x, prev_h, prev_c):
        input_gate = T.nnet.sigmoid(
          T.dot(x,P.W_input_in) +\
          T.dot(prev_h,P.W_hidden_in) +\
          T.dot(prev_c,P.W_cell_in) +\
          P.b_in
         )

        forget_gate = T.nnet.sigmoid(
          T.dot(x,P.W_input_forget) +\
          T.dot(prev_h,P.W_hidden_forget) +\
          T.dot(prev_c,P.W_cell_forget) +\
          P.b_forget
         )

        curr_c = forget_gate * prev_c + input_gate * T.tanh(
          T.dot(x,P.W_input_cell) +\
          T.dot(prev_h,P.W_hidden_cell) +\
          P.b_cell
         )

        output_gate = T.nnet.sigmoid(
          T.dot(x,P.W_input_output) +\
          T.dot(prev_h,P.W_hidden_output) +\
          T.dot(curr_c,P.W_cell_output) +\
          P.b_output
         )

        curr_h = output_gate * T.tanh(curr_c)

        return curr_h, curr_c

    return step
コード例 #44
0
ファイル: head.py プロジェクト: alee101/598c-project
def build(P,input_size,mem_width,mem_size,shift_width):
        """
        NTM heads are implemented as another hidden layer coming after
        the last hidden layer of the controller that emits
        k_t, beta_t, g_t, s_t, gamma_t as outputs (see Controller outputs
        of Figure 2 in paper) along with erase and add vectors
        """
	P["W_key"]   = U.initial_weights(input_size,mem_width)
	P["b_key"]   = 0. * U.initial_weights(mem_width)

	P["W_beta"]  = 0. * U.initial_weights(input_size)
	P["b_beta"]  = 0.

	P["W_g"]     = U.initial_weights(input_size)
	P["b_g"]     = 0.

	P["W_shift"] = U.initial_weights(input_size,shift_width)
	P["b_shift"] = 0. * U.initial_weights(shift_width)

        P["W_gamma"] = U.initial_weights(input_size)
	P["b_gamma"] = 0.

	P["W_erase"] = U.initial_weights(input_size,mem_width)
	P["b_erase"] = 0. * U.initial_weights(mem_width)

	P["W_add"]   = U.initial_weights(input_size,mem_width)
	P["b_add"]   = 0. * U.initial_weights(mem_width)


	def head_params(x):
                """
                Takes hidden layer from controller computes
                k_t, beta_t, g_t, s_t,  and erase and add
                vectors as outputs
                """
		# key
		key_t = T.dot(x,P["W_key"]) + P["b_key"]

                # key strength
		_beta_t  = T.dot(x,P["W_beta"])  + P["b_beta"]
		beta_t  = T.nnet.softplus(_beta_t)

                # interpolation gate
		g_t     = T.nnet.sigmoid(T.dot(x,P["W_g"]) + P["b_g"])

		# shift
		shift_t = U.vector_softmax(T.dot(x,P["W_shift"]) + P["b_shift"])
		shift_t.name = "shift_t"

                # sharpening
                _gamma_t = T.dot(x,P["W_gamma"]) + P["b_gamma"]
		gamma_t = T.nnet.softplus(_gamma_t) + 1.

                # erase and add vectors
		erase_t = T.nnet.sigmoid(T.dot(x,P["W_erase"]) + P["b_erase"])
		add_t   = T.dot(x,P["W_add"]) + P["b_add"]

		return key_t,beta_t,g_t,shift_t,gamma_t,erase_t,add_t
	return head_params