def build(P,input_size,output_size,mem_size,mem_width,layer_size): """ Create controller function for use during scan op """ # Weights for external input P.W_input_hidden = U.initial_weights(input_size,layer_size) # Weights for input from read head (read from memory) P.W_read_hidden = U.initial_weights(mem_width, layer_size) # Shared bias for external input and read head input P.b_hidden_0 = 0. * U.initial_weights(layer_size) # Weights and biases for output of controller P.W_hidden_output = 0. * U.initial_weights(layer_size,output_size) P.b_output = 0. * U.initial_weights(output_size) def controller(input_t,read_t): """ Controller consists of single hidden layer between inputs and outputs """ hidden_layer = T.tanh( T.dot(input_t,P.W_input_hidden) +\ T.dot(read_t,P.W_read_hidden) +\ P.b_hidden_0 ) output_t = T.nnet.sigmoid(T.dot(hidden_layer,P.W_hidden_output) + P.b_output) # Return output and hidden layer of controller used by heads (in model.py) return output_t,hidden_layer return controller
def build(P, input_size, output_size, mem_width): """ Create controller function for use during scan op """ P.W_input_hidden = U.initial_weights(input_size, output_size) P.b_hidden_0 = 0. * U.initial_weights(output_size) P.attention_weight = np.array(0.1,dtype=theano.config.floatX) def controller(input_t, read_t): # print "input_t",input_t.type lstm_weight = 1-P.attention_weight weighted_sum = lstm_weight*input_t + P.attention_weight*read_t if input_t.ndim > 1 : output_t = T.nnet.softmax( T.dot(weighted_sum, P.W_input_hidden) + P.b_hidden_0 ) else : output_t = U.vector_softmax( T.dot(weighted_sum, P.W_input_hidden) + P.b_hidden_0) # print "input",read_t.type,input_t.type # print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type # print "layer", hidden_0.type return output_t return controller
def build_model(hidden_size, predict_only=False): X = T.matrix('X') Y = T.ivector('Y') #* (0.001 * U.initial_weights(2,hidden_size) + np.array([[0,0,1,1],[1,1,0,0]]))) W_input_hidden = U.create_shared(U.initial_weights(2, hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) W_hidden_predict = U.create_shared(U.initial_weights(hidden_size, 2)) b_predict = U.create_shared(U.initial_weights(2)) params = [W_input_hidden, b_hidden, W_hidden_predict, b_predict] hidden_lin = T.dot(X, W_input_hidden) + b_hidden hidden = T.nnet.sigmoid(hidden_lin) predict = T.nnet.softmax(T.dot(hidden, W_hidden_predict) + b_predict) cost = -T.mean(T.log( predict[T.arange(Y.shape[0]), Y])) + 1e-3 * adjacency_constraint( hidden_lin) # + 1e-4 * sum(T.sum(p**2) for p in params) accuracy = T.mean(T.eq(T.argmax(predict, axis=1), Y)) grad = T.grad(cost, params) train = theano.function( inputs=[X, Y], #updates = updates.momentum(params,grad,0.9999,0.1) if not predict_only else None, #updates = updates.momentum(params,grad,0.999,0.0005), updates=updates.adadelta(params, grad), outputs=[accuracy, W_input_hidden, b_hidden, (hidden > 0.5)]) predict = theano.function(inputs=[X], outputs=predict[:, 0]) i = T.iscalar('i') hidden_p = theano.function(inputs=[X, i], outputs=hidden[:, i]) return train, predict, hidden_p, params
def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None): if parameters is None: W_if = U.create_shared(U.initial_weights(input_size, output_size), name='W_if') W_ff = U.create_shared(U.initial_weights(output_size, output_size), name='W_ff') b = U.create_shared(U.initial_weights(output_size), name='b') else: W_if = theano.shared(parameters['W_if'], name='W_if') W_ff = theano.shared(parameters['W_ff'], name='W_ff') b = theano.shared(parameters['b'], name='b') initial = U.create_shared(U.initial_weights(output_size)) self.is_backward = is_backward self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) def step(in_t, out_tminus1): return self.activation_fn(T.dot(out_tminus1, W_ff) + T.dot(in_t, W_if) + b) self.output, _ = theano.scan( step, sequences=[inputs], outputs_info=[initial], go_backwards=self.is_backward ) self.params = [W_if, W_ff, b]
def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None): if parameters is None: self.W_if = U.create_shared(U.initial_weights(input_size, output_size), name='W_if') self.W_ff = U.create_shared(U.initial_weights(output_size, output_size), name='W_ff') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W_if = theano.shared(parameters['W_if'], name='W_if') self.W_ff = theano.shared(parameters['W_ff'], name='W_ff') self.b = theano.shared(parameters['b'], name='b') initial = T.zeros((output_size,)) self.is_backward = is_backward self.activation_fn = lambda x: T.cast(T.minimum(x * (x > 0), 20), dtype='float32')#dtype=theano.config.floatX) nonrecurrent = T.dot(inputs, self.W_if) + self.b self.output, _ = theano.scan( lambda in_t, out_tminus1, weights: self.activation_fn(in_t + T.dot(out_tminus1, weights)), sequences=[nonrecurrent], outputs_info=[initial], non_sequences=[self.W_ff], go_backwards=self.is_backward ) self.params = [self.W_if, self.W_ff, self.b]
def build(P, id, input_size, mem_width): # P["W_%d_key" % id] = U.initial_weights(input_size, mem_width) P["W_%d_key" % id] = U.initial_weights(input_size, mem_width) P["b_%d_key" % id] = 0. * U.initial_weights(mem_width) P["W_%d_sim" % id] = U.initial_weights(input_size, mem_width) P["b_%d_sim" % id] = 0. * U.initial_weights(mem_width) # P["W_%d_shift" % id] = U.initial_weights(input_size, shift_width) # P["b_%d_shift" % id] = 0. * U.initial_weights(shift_width) if id != 0 : P["W_%d_g" % id] = U.initial_weights(input_size) P["b_%d_g" % id] = 0. def head_params(x): # key key_t = T.dot(x, P["W_%d_key" % id]) + P["b_%d_key" % id] #similarity weight sim_t = T.nnet.sigmoid(T.dot(x, P["W_%d_sim" % id]) + P["b_%d_sim" % id]) #attention weight att_t = 1 - sim_t if id != 0 : g_t = T.nnet.sigmoid(T.dot(x, P["W_%d_g" % id]) + P["b_%d_g" % id])#guess this is a vector else : g_t = T.ones([x.shape[0]]) return key_t, g_t, sim_t, att_t return head_params
def build(P, input_size, output_size, mem_width): """ Create controller function for use during scan op """ P.W_input_hidden = U.initial_weights(input_size, output_size) P.b_hidden_0 = 0. * U.initial_weights(output_size) P.W_read_hidden = U.initial_weights(mem_width, input_size) P.b_hidden_read = 0. * U.initial_weights(input_size) def controller(input_t, read_t): # print "input_t",input_t.type new_input_t = (input_t + T.nnet.sigmoid(T.dot(read_t,P.W_read_hidden) + P.b_hidden_read))/2 if input_t.ndim > 1 : output_t = T.nnet.softmax( T.dot(new_input_t, P.W_input_hidden) + P.b_hidden_0 ) else : output_t = U.vector_softmax( T.dot(new_input_t, P.W_input_hidden) + P.b_hidden_0) # print "input",read_t.type,input_t.type # print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type # print "layer", hidden_0.type return output_t return controller
def __init__(self, inputs, input_size, output_size, parameters=None): if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = T.nnet.softmax(T.dot(inputs, self.W) + self.b) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size, parameters=None): if parameters is None: W = U.create_shared(U.initial_weights(input_size, output_size), name='W') b = U.create_shared(U.initial_weights(output_size), name='b') else: W = theano.shared(parameters['W'], name='W') b = theano.shared(parameters['b'], name='b') self.output = T.nnet.softmax(T.dot(inputs, W) + b) self.params = [W, b]
def __init__(self, forward_in, backward_in, input_size, output_size): Wf = U.create_shared(U.initial_weights(input_size, output_size)) Wb = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.activations = T.dot(forward_in, Wf) + T.dot(backward_in, Wb) + b self.output, _ = theano.scan(lambda inpt: T.nnet.softmax(inpt), sequences=[self.activations]) self.params = [Wf, Wb, b]
def build(P, n_input, n_hidden, n_output): P.W_i_h = U.initial_weights(n_input, n_hidden) P.W_h_o = U.initial_weights(n_hidden, n_output) P.b_h = U.initial_weights(n_hidden) P.b_o = U.initial_weights(n_output) def f(X): hidden = T.nnet.sigmoid(T.dot(X, P.W_i_h) + P.b_h) output = T.nnet.softmax(T.dot(hidden, P.W_h_o) + P.b_o) return output return f
def __init__(self, forward_in, backward_in, input_size, output_size): Wf = U.create_shared(U.initial_weights(input_size, output_size)) Wb = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.activations = T.dot(forward_in, Wf) + T.dot(backward_in, Wb) + b self.output, _ = theano.scan( lambda inpt: T.nnet.softmax(inpt), sequences=[self.activations] ) self.params = [Wf, Wb, b]
def __init__(self, inputs, input_size, output_size, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = self.activation_fn(T.dot(inputs, self.W) + self.b) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) W = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.output2 = self.activation_fn(T.dot(inputs, W) + b) self.output, _ = theano.scan( lambda element: self.activation_fn(T.dot(element, W) + b), sequences=[inputs]) self.params = [W, b]
def __init__(self, inputs, input_size, output_size, rng, dropout_rate, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = T.cast(self.activation_fn( (T.dot(inputs, self.W) + self.b)*(1.0-dropout_rate) ), dtype=theano.config.floatX) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) W = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.output2 = self.activation_fn(T.dot(inputs, W) + b) self.output, _ = theano.scan( lambda element: self.activation_fn(T.dot(element, W) + b), sequences=[inputs] ) self.params = [W, b]
def build_network(input_size, hidden_size, constraint_adj=False): P = Parameters() X = T.bmatrix('X') P.W_input_hidden = U.initial_weights(input_size, hidden_size) P.b_hidden = U.initial_weights(hidden_size) P.b_output = U.initial_weights(input_size) hidden_lin = T.dot(X, P.W_input_hidden) + P.b_hidden hidden = T.nnet.sigmoid(hidden_lin) output = T.nnet.softmax(T.dot(hidden, P.W_input_hidden.T) + P.b_output) parameters = P.values() cost = build_error(X, output, P) if constraint_adj: pass #cost = cost + adjacency_constraint(hidden_lin) return X, output, cost, P
def build_network(input_size,hidden_size,constraint_adj=False): P = Parameters() X = T.bmatrix('X') P.W_input_hidden = U.initial_weights(input_size,hidden_size) P.b_hidden = U.initial_weights(hidden_size) P.b_output = U.initial_weights(input_size) hidden_lin = T.dot(X,P.W_input_hidden)+P.b_hidden hidden = T.nnet.sigmoid(hidden_lin) output = T.nnet.softmax(T.dot(hidden,P.W_input_hidden.T) + P.b_output) parameters = P.values() cost = build_error(X,output,P) if constraint_adj:pass #cost = cost + adjacency_constraint(hidden_lin) return X,output,cost,P
def __init__(self, inputs, input_size, output_size, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: W = U.create_shared(U.initial_weights(input_size, output_size), name='W') b = U.create_shared(U.initial_weights(output_size), name='b') else: W = theano.shared(parameters['W'], name='W') b = theano.shared(parameters['b'], name='b') self.output, _ = theano.scan( lambda element: self.activation_fn(T.dot(element, W) + b), sequences=[inputs] ) self.params = [W, b]
def build(P, input_size, output_size, mem_size, mem_width, layer_sizes): """ Create controller function for use during scan op """ P.W_input_hidden = U.initial_weights(input_size, layer_sizes[0]) P.W_read_hidden = U.initial_weights(mem_width, layer_sizes[0]) P.b_hidden_0 = 0.0 * U.initial_weights(layer_sizes[0]) hidden_weights = [] for i in xrange(len(layer_sizes) - 1): P["W_hidden_%d" % (i + 1)] = U.initial_weights(layer_sizes[i], layer_sizes[i + 1]) P["b_hidden_%d" % (i + 1)] = 0.0 * U.initial_weights(layer_sizes[i + 1]) hidden_weights.append((P["W_hidden_%d" % (i + 1)], P["b_hidden_%d" % (i + 1)])) P.W_hidden_output = 0.0 * U.initial_weights(layer_sizes[-1], output_size) P.b_output = 0.0 * U.initial_weights(output_size) def controller(input_t, read_t): # print "input_t",input_t.type prev_layer = hidden_0 = T.dot(input_t, P.W_input_hidden) + T.dot(read_t, P.W_read_hidden) + P.b_hidden_0 # print "input",read_t.type,input_t.type # print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type # print "layer", hidden_0.type for W, b in hidden_weights: prev_layer = T.tanh(T.dot(prev_layer, W) + b) fin_hidden = prev_layer output_t = T.nnet.sigmoid(T.dot(fin_hidden, P.W_hidden_output) + P.b_output) return output_t, fin_hidden return controller
def _build_conv_pool(P, n_layer, input_layer, n_feats_out, n_feats_in, conv_size, pool_size): P["W_%d"%n_layer] = U.initial_weights(n_feats_out, n_feats_in, conv_size, conv_size) P["b_%d"%n_layer] = np.zeros((n_feats_out, )) W = P["W_%d"%n_layer] b = P["b_%d"%n_layer] out_conv = T.nnet.conv2d(input_layer, W) out_pool = max_pool_2d(out_conv, (pool_size, pool_size)) output = T.nnet.sigmoid(out_pool + b.dimshuffle('x', 0, 'x', 'x')) return output
def build(P, input_size, proj_size) : P["image_projection matrix"] = U.initial_weights(input_size, proj_size) #issue: initial method def image_project(x) : #projection proj_result = T.dot(x,P["image_projection matrix"]) return proj_result #whether normalize or not return image_project
def __init__(self, inputs, input_size, output_size, is_backward=False): W_if = U.create_shared(U.initial_weights(input_size, output_size)) W_ff = U.create_shared(U.initial_weights(output_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) initial = U.create_shared(U.initial_weights(output_size)) self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) self.output, _ = theano.scan( lambda in_t: theano.scan( lambda index, out_tminus1: self.activation_fn(T.dot(out_tminus1, W_ff) + T.dot(in_t[index], W_if) + b), sequences=[T.arange(inputs.shape[1])], outputs_info=[initial], go_backwards=is_backward ), sequences=[inputs] # for each sample at time "t" ) self.params = [W_if, W_ff, b]
def __init__(self, inputs, input_size, output_size, is_backward=False): W_if = U.create_shared(U.initial_weights(input_size, output_size)) W_ff = U.create_shared(U.initial_weights(output_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) initial = U.create_shared(U.initial_weights(output_size)) self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) self.output, _ = theano.scan( lambda in_t: theano.scan( lambda index, out_tminus1: self.activation_fn( T.dot(out_tminus1, W_ff) + T.dot(in_t[index], W_if) + b), sequences=[T.arange(inputs.shape[1])], outputs_info=[initial], go_backwards=is_backward), sequences=[inputs] # for each sample at time "t" ) self.params = [W_if, W_ff, b]
def build_network(input_size, hidden_size): X = T.dmatrix('X') W_input_to_hidden = U.create_shared( U.initial_weights(input_size, hidden_size)) W_hidden_to_hidden = U.create_shared( U.initial_weights(hidden_size, hidden_size)) initial_hidden = U.create_shared(U.initial_weights(hidden_size), name='init_hidden') b_hidden = U.create_shared(U.initial_weights(hidden_size)) b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size)) b_input_reproduction = U.create_shared(U.initial_weights(input_size)) parameters = [ W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction, ] hidden, hidden1_reproduction, input_reproduction = make_rae( X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction) unrolled = unroll(hidden[-1], W_input_to_hidden, W_hidden_to_hidden, b_hidden_reproduction, b_input_reproduction, hidden.shape[0]) return X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled
def __init__(self, inputs, input_size, output_size, rng, dropout_rate, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = T.cast(self.activation_fn( (T.dot(inputs, self.W) + self.b) * (1.0 - dropout_rate)), dtype=theano.config.floatX) self.params = [self.W, self.b]
def build(P): image_row = 35 # num of base pairs image_col = 4 # num of nucleotides n_input = image_row * image_col n_feats = [1, 16] # num of "motifs" We'll learn 16 PWM's conv_row = 8 # 8-long PWM conv_col = 4 # 4 nucleotides pool_row = 28 # ?? pool_col = 1 n_pool_out = (n_feats[1] * ((image_row - conv_row + 1) / pool_row) * ((image_col - conv_col + 1) / pool_col)) n_hidden = 32 n_output = 1 P.W_input_conv = U.initial_weights(n_feats[1], n_feats[0], conv_row, conv_col) P.b_pool_out = np.zeros(n_pool_out) P.W_pool_out_hidden = U.initial_weights(n_pool_out, n_hidden) P.b_hidden = np.zeros(n_hidden) P.W_hidden_output = U.initial_weights(n_hidden, n_output) P.b_output = np.zeros(n_output) def f(X): n_samples = X.shape[0] input = X.reshape((n_samples, n_feats[0], image_row, image_col)) conv_out = T.nnet.conv2d(input, P.W_input_conv) pool_out_= max_pool_2d(conv_out, (pool_row, pool_col)) pool_out = pool_out_.flatten(2) + P.b_pool_out hidden = relu(T.dot(pool_out, P.W_pool_out_hidden) + P.b_hidden) output = T.dot(hidden, P.W_hidden_output) + P.b_output return output.astype(theano.config.floatX) return f
def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None): if parameters is None: self.W_if = U.create_shared(U.initial_weights( input_size, output_size), name='W_if') self.W_ff = U.create_shared(U.initial_weights( output_size, output_size), name='W_ff') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W_if = theano.shared(parameters['W_if'], name='W_if') self.W_ff = theano.shared(parameters['W_ff'], name='W_ff') self.b = theano.shared(parameters['b'], name='b') initial = T.zeros((output_size, )) self.is_backward = is_backward self.activation_fn = lambda x: T.cast(T.minimum(x * (x > 0), 20), dtype='float32' ) #dtype=theano.config.floatX) nonrecurrent = T.dot(inputs, self.W_if) + self.b self.output, _ = theano.scan( lambda in_t, out_tminus1, weights: self.activation_fn(in_t + T.dot( out_tminus1, weights)), sequences=[nonrecurrent], outputs_info=[initial], non_sequences=[self.W_ff], go_backwards=self.is_backward) self.params = [self.W_if, self.W_ff, self.b]
def build_model(hidden_size,predict_only=False): X = T.matrix('X') Y = T.ivector('Y') #* (0.001 * U.initial_weights(2,hidden_size) + np.array([[0,0,1,1],[1,1,0,0]]))) W_input_hidden = U.create_shared(U.initial_weights(2,hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) W_hidden_predict = U.create_shared(U.initial_weights(hidden_size,2)) b_predict = U.create_shared(U.initial_weights(2)) params = [W_input_hidden,b_hidden,W_hidden_predict,b_predict] hidden_lin = T.dot(X,W_input_hidden) + b_hidden hidden = T.nnet.sigmoid(hidden_lin) predict = T.nnet.softmax(T.dot(hidden,W_hidden_predict) + b_predict) cost = -T.mean(T.log(predict[T.arange(Y.shape[0]),Y])) + 1e-3*adjacency_constraint(hidden_lin)# + 1e-4 * sum(T.sum(p**2) for p in params) accuracy = T.mean(T.eq(T.argmax(predict,axis=1),Y)) grad = T.grad(cost,params) train = theano.function( inputs = [X,Y], #updates = updates.momentum(params,grad,0.9999,0.1) if not predict_only else None, #updates = updates.momentum(params,grad,0.999,0.0005), updates = updates.adadelta(params,grad), outputs = [accuracy,W_input_hidden,b_hidden,(hidden>0.5)] ) predict = theano.function( inputs = [X], outputs = predict[:,0] ) i = T.iscalar('i') hidden_p = theano.function( inputs = [X,i], outputs = hidden[:,i] ) return train,predict,hidden_p,params
def build(P, n_input, n_hidden, n_output): P.W_hidden_output = U.initial_weights(n_hidden, n_output) P.b_output = np.zeros(n_output) # n_hidden = 50 * 4 * 4 = 800 (n_feats of layer2 * pixels in image) # TODO: fix these magic numbers (especially the 800) def f(X): layer0 = X.reshape((X.shape[0], 1, 28, 28)) layer1 = _build_conv_pool(P, 1, layer0, 20, 1, 5, 2) layer2_= _build_conv_pool(P, 2, layer1, 50, 20, 5, 2) layer2 = layer2_.flatten(2) output = T.nnet.softmax(T.dot(layer2, P.W_hidden_output) + P.b_output) return output return f
def build_model(P,X,input_size,hidden_size,output_size): W_input_hidden = U.create_shared(U.initial_weights(input_size,hidden_size)) W_hidden_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size)) W_hidden_output = U.create_shared(U.initial_weights(hidden_size,output_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) i_hidden = U.create_shared(U.initial_weights(hidden_size)) b_output = U.create_shared(U.initial_weights(output_size)) hidden = build_rnn(T.dot(X,W_input_hidden),W_hidden_hidden,b_hidden,i_hidden) predict = T.nnet.softmax(T.dot(hidden,W_hidden_output) + b_output) return X,predict
def build(P, input_size=8, output_size=8, mem_size=128, mem_width=20, layer_sizes=[100]): """ Create controller function for use during scan op """ P.W_input_hidden = U.initial_weights(input_size, layer_sizes[0]) P.W_read_hidden = U.initial_weights(mem_width, layer_sizes[0]) P.b_hidden_0 = 0. * U.initial_weights(layer_sizes[0]) hidden_weights = [] for i in xrange(len(layer_sizes) - 1): P["W_hidden_%d" % (i + 1)] = U.initial_weights(layer_sizes[i], layer_sizes[i + 1]) P["b_hidden_%d" % (i + 1)] = 0. * U.initial_weights(layer_sizes[i + 1]) hidden_weights.append( (P["W_hidden_%d" % (i + 1)], P["b_hidden_%d" % (i + 1)])) P.W_hidden_output = 0. * U.initial_weights(layer_sizes[-1], output_size) P.b_output = 0. * U.initial_weights(output_size) def controller(input_t, read_t): # print "input_t",input_t.type prev_layer = hidden_0 = T.tanh( T.dot(input_t, P.W_input_hidden) + T.dot(read_t, P.W_read_hidden) + P.b_hidden_0) # print "input",read_t.type,input_t.type # print "weights",P.W_input_hidden.type,P.W_read_hidden.type,P.b_hidden_0.type # print "layer", hidden_0.type for W, b in hidden_weights: prev_layer = T.tanh(T.dot(prev_layer, W) + b) fin_hidden = prev_layer output_t = T.nnet.sigmoid( T.dot(fin_hidden, P.W_hidden_output) + P.b_output) return output_t, fin_hidden return controller
def build_model(P, X, input_size, hidden_size, output_size): W_input_hidden = U.create_shared(U.initial_weights(input_size, hidden_size)) W_hidden_hidden = U.create_shared( U.initial_weights(hidden_size, hidden_size)) W_hidden_output = U.create_shared( U.initial_weights(hidden_size, output_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) i_hidden = U.create_shared(U.initial_weights(hidden_size)) b_output = U.create_shared(U.initial_weights(output_size)) hidden = build_rnn(T.dot(X, W_input_hidden), W_hidden_hidden, b_hidden, i_hidden) predict = T.nnet.softmax(T.dot(hidden, W_hidden_output) + b_output) return X, predict
def build_network(input_size,hidden_size): X = T.dmatrix('X') W_input_to_hidden = U.create_shared(U.initial_weights(input_size,hidden_size)) W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size)) initial_hidden = U.create_shared(U.initial_weights(hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size)) b_input_reproduction = U.create_shared(U.initial_weights(input_size)) parameters = [ W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction, ] hidden, hidden1_reproduction, input_reproduction = make_rae( X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction ) unrolled = unroll( hidden[-1], W_input_to_hidden, W_hidden_to_hidden, b_hidden_reproduction, b_input_reproduction, hidden.shape[0] ) return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled
def build(P, input_size, mem_width, mem_size, shift_width): """ NTM heads are implemented as another hidden layer coming after the last hidden layer of the controller that emits k_t, beta_t, g_t, s_t, gamma_t as outputs (see Controller outputs of Figure 2 in paper) along with erase and add vectors """ P["W_key"] = U.initial_weights(input_size, mem_width) P["b_key"] = 0. * U.initial_weights(mem_width) P["W_beta"] = 0. * U.initial_weights(input_size) P["b_beta"] = 0. P["W_g"] = U.initial_weights(input_size) P["b_g"] = 0. P["W_shift"] = U.initial_weights(input_size, shift_width) P["b_shift"] = 0. * U.initial_weights(shift_width) P["W_gamma"] = U.initial_weights(input_size) P["b_gamma"] = 0. P["W_erase"] = U.initial_weights(input_size, mem_width) P["b_erase"] = 0. * U.initial_weights(mem_width) P["W_add"] = U.initial_weights(input_size, mem_width) P["b_add"] = 0. * U.initial_weights(mem_width) def head_params(x): """ Takes hidden layer from controller computes k_t, beta_t, g_t, s_t, and erase and add vectors as outputs """ # key key_t = T.dot(x, P["W_key"]) + P["b_key"] # key strength _beta_t = T.dot(x, P["W_beta"]) + P["b_beta"] beta_t = T.nnet.softplus(_beta_t) # interpolation gate g_t = T.nnet.sigmoid(T.dot(x, P["W_g"]) + P["b_g"]) # shift shift_t = U.vector_softmax(T.dot(x, P["W_shift"]) + P["b_shift"]) shift_t.name = "shift_t" # sharpening _gamma_t = T.dot(x, P["W_gamma"]) + P["b_gamma"] gamma_t = T.nnet.softplus(_gamma_t) + 1. # erase and add vectors erase_t = T.nnet.sigmoid(T.dot(x, P["W_erase"]) + P["b_erase"]) add_t = T.dot(x, P["W_add"]) + P["b_add"] return key_t, beta_t, g_t, shift_t, gamma_t, erase_t, add_t return head_params
def build(P, id, input_size, mem_width, mem_size, shift_width): # 1. content addressing P["W_%d_key" % id] = U.initial_weights(input_size, mem_width) P["b_%d_key" % id] = 0. * U.initial_weights(mem_width) P["W_%d_beta" % id] = 0. * U.initial_weights(input_size) P["b_%d_beta" % id] = 0. # 2. interpolation P["W_%d_g" % id] = U.initial_weights(input_size) P["b_%d_g" % id] = 0. # 3. convolutional shift P["W_%d_shift" % id] = U.initial_weights(input_size, shift_width) P["b_%d_shift" % id] = 0. * U.initial_weights(shift_width) # 4. sharpening P["W_%d_gamma" % id] = U.initial_weights(input_size) P["b_%d_gamma" % id] = 0. # 5. erase and add vector P["W_%d_erase" % id] = U.initial_weights(input_size, mem_width) P["b_%d_erase" % id] = 0. * U.initial_weights(mem_width) P["W_%d_add" % id] = U.initial_weights(input_size, mem_width) P["b_%d_add" % id] = 0. * U.initial_weights(mem_width) def head_params(x): # key key_t = T.dot(x, P["W_%d_key" % id]) + P["b_%d_key" % id] # shift shift_t = U.vector_softmax( T.dot(x, P["W_%d_shift" % id]) + P["b_%d_shift" % id]) shift_t.name = "shift_t" # scalars _beta_t = T.dot(x, P["W_%d_beta" % id]) + P["b_%d_beta" % id] _gamma_t = T.dot(x, P["W_%d_gamma" % id]) + P["b_%d_gamma" % id] beta_t = T.nnet.softplus(_beta_t) gamma_t = T.nnet.softplus(_gamma_t) + 1. # beta_t = (_beta_t > 0)*_beta_t # gamma_t = (_gamma_t > 0)*_gamma_t + 1. # beta_t = T.exp(_beta_t) # gamma_t = T.exp(_gamma_t) + 1. g_t = T.nnet.sigmoid(T.dot(x, P["W_%d_g" % id]) + P["b_%d_g" % id]) erase_t = T.nnet.sigmoid( T.dot(x, P["W_%d_erase" % id]) + P["b_%d_erase" % id]) add_t = T.dot(x, P["W_%d_add" % id]) + P["b_%d_add" % id] return key_t, beta_t, g_t, shift_t, gamma_t, erase_t, add_t return head_params
def build(P,id,input_size,mem_width,mem_size,shift_width): P["W_%d_key"%id] = U.initial_weights(input_size,mem_width) P["b_%d_key"%id] = 0. * U.initial_weights(mem_width) # P["W_%d_shift"%id] = U.initial_weights(input_size) # 100 # P["b_%d_shift"%id] = 0. P["W_%d_shift"%id] = U.initial_weights(input_size,shift_width) # 100X3 P["b_%d_shift"%id] = 0. * U.initial_weights(shift_width) # P["W_%d_beta"%id] = 0. * U.initial_weights(input_size) P["W_%d_beta"%id] = U.initial_weights(input_size) P["b_%d_beta"%id] = 0. P["W_%d_gamma"%id] = U.initial_weights(input_size) P["b_%d_gamma"%id] = 0. P["W_%d_g"%id] = U.initial_weights(input_size) P["b_%d_g"%id] = 0. P["W_%d_erase"%id] = U.initial_weights(input_size,mem_width) P["b_%d_erase"%id] = 0. * U.initial_weights(mem_width) P["W_%d_add"%id] = U.initial_weights(input_size,mem_width) P["b_%d_add"%id] = 0. * U.initial_weights(mem_width) def head_params(x): # key key_t = T.dot(x,P["W_%d_key"%id]) + P["b_%d_key"%id] # shift shift_t = T.nnet.sigmoid(T.dot(x,P["W_%d_shift"%id]) + P["b_%d_shift"%id]) # *2 - 1 # shift_t = U.vector_softmax(T.dot(x,P["W_%d_shift"%id]) + P["b_%d_shift"%id]) # shift_t.name = "shift_t" # scalars _beta_t = T.dot(x,P["W_%d_beta"%id]) + P["b_%d_beta"%id] _gamma_t = T.dot(x,P["W_%d_gamma"%id]) + P["b_%d_gamma"%id] beta_t = T.nnet.softplus(_beta_t) gamma_t = T.nnet.softplus(_gamma_t) + 1. # beta_t = (_beta_t > 0)*_beta_t # gamma_t = (_gamma_t > 0)*_gamma_t + 1. # beta_t = T.exp(_beta_t) # gamma_t = T.exp(_gamma_t) + 1. g_t = T.nnet.sigmoid(T.dot(x,P["W_%d_g"%id]) + P["b_%d_g"%id]) erase_t = T.nnet.sigmoid(T.dot(x,P["W_%d_erase"%id]) + P["b_%d_erase"%id]) add_t = T.dot(x,P["W_%d_add"%id]) + P["b_%d_add"%id] return key_t,beta_t,g_t,shift_t,gamma_t,erase_t,add_t return head_params
def build(P, id, input_size, mem_width, mem_size, shift_width): P["W_%d_key" % id] = U.initial_weights(input_size, mem_width) P["b_%d_key" % id] = 0. * U.initial_weights(mem_width) # P["W_%d_shift"%id] = U.initial_weights(input_size) # 100 # P["b_%d_shift"%id] = 0. P["W_%d_shift" % id] = U.initial_weights(input_size, shift_width) # 100X3 P["b_%d_shift" % id] = 0. * U.initial_weights(shift_width) # P["W_%d_beta"%id] = 0. * U.initial_weights(input_size) P["W_%d_beta" % id] = U.initial_weights(input_size) P["b_%d_beta" % id] = 0. P["W_%d_gamma" % id] = U.initial_weights(input_size) P["b_%d_gamma" % id] = 0. P["W_%d_g" % id] = U.initial_weights(input_size) P["b_%d_g" % id] = 0. P["W_%d_erase" % id] = U.initial_weights(input_size, mem_width) P["b_%d_erase" % id] = 0. * U.initial_weights(mem_width) P["W_%d_add" % id] = U.initial_weights(input_size, mem_width) P["b_%d_add" % id] = 0. * U.initial_weights(mem_width) def head_params(x): # key key_t = T.dot(x, P["W_%d_key" % id]) + P["b_%d_key" % id] # shift shift_t = T.nnet.sigmoid( T.dot(x, P["W_%d_shift" % id]) + P["b_%d_shift" % id]) # *2 - 1 # shift_t = U.vector_softmax(T.dot(x,P["W_%d_shift"%id]) + P["b_%d_shift"%id]) # shift_t.name = "shift_t" # scalars _beta_t = T.dot(x, P["W_%d_beta" % id]) + P["b_%d_beta" % id] _gamma_t = T.dot(x, P["W_%d_gamma" % id]) + P["b_%d_gamma" % id] beta_t = T.nnet.softplus(_beta_t) gamma_t = T.nnet.softplus(_gamma_t) + 1. # beta_t = (_beta_t > 0)*_beta_t # gamma_t = (_gamma_t > 0)*_gamma_t + 1. # beta_t = T.exp(_beta_t) # gamma_t = T.exp(_gamma_t) + 1. g_t = T.nnet.sigmoid(T.dot(x, P["W_%d_g" % id]) + P["b_%d_g" % id]) erase_t = T.nnet.sigmoid( T.dot(x, P["W_%d_erase" % id]) + P["b_%d_erase" % id]) add_t = T.dot(x, P["W_%d_add" % id]) + P["b_%d_add" % id] return key_t, beta_t, g_t, shift_t, gamma_t, erase_t, add_t return head_params
def build_lstm_step(P,word_vector_size,hidden_state_size): P.W_input_in = U.initial_weights(word_vector_size,hidden_state_size) P.W_hidden_in = U.initial_weights(hidden_state_size,hidden_state_size) P.W_cell_in = U.initial_weights(hidden_state_size,hidden_state_size) P.b_in = U.initial_weights(hidden_state_size) P.W_input_forget = U.initial_weights(word_vector_size,hidden_state_size) P.W_hidden_forget = U.initial_weights(hidden_state_size,hidden_state_size) P.W_cell_forget = U.initial_weights(hidden_state_size,hidden_state_size) P.b_forget = U.initial_weights(hidden_state_size) P.W_input_output = U.initial_weights(word_vector_size,hidden_state_size) P.W_hidden_output = U.initial_weights(hidden_state_size,hidden_state_size) P.W_cell_output = U.initial_weights(hidden_state_size,hidden_state_size) P.b_output = U.initial_weights(hidden_state_size) P.W_input_cell = U.initial_weights(word_vector_size,hidden_state_size) P.W_hidden_cell = U.initial_weights(hidden_state_size,hidden_state_size) P.b_cell = U.initial_weights(hidden_state_size) P.init_h = U.initial_weights(hidden_state_size) P.init_c = U.initial_weights(hidden_state_size) def step(x,prev_h,prev_c): input_gate = T.nnet.sigmoid( T.dot(x,P.W_input_in) +\ T.dot(prev_h,P.W_hidden_in) +\ T.dot(prev_c,P.W_cell_in) +\ P.b_in ) forget_gate = T.nnet.sigmoid( T.dot(x,P.W_input_forget) +\ T.dot(prev_h,P.W_hidden_forget) +\ T.dot(prev_c,P.W_cell_forget) +\ P.b_forget ) curr_c = forget_gate * prev_c + input_gate * T.tanh( T.dot(x,P.W_input_cell) +\ T.dot(prev_h,P.W_hidden_cell) +\ P.b_cell ) output_gate = T.nnet.sigmoid( T.dot(x,P.W_input_output) +\ T.dot(prev_h,P.W_hidden_output) +\ T.dot(curr_c,P.W_cell_output) +\ P.b_output ) curr_h = output_gate * T.tanh(curr_c) return curr_h,curr_c return step
def create_vocab_vectors(P,vocab2id,size): return U.initial_weights(len(vocab2id) + 1,size)
def create_vocab_vectors(P, vocab2id, size): return U.initial_weights(len(vocab2id) + 1, size)
def build_lstm_step(P, word_vector_size, hidden_state_size): P.W_input_in = U.initial_weights(word_vector_size, hidden_state_size) P.W_hidden_in = U.initial_weights(hidden_state_size, hidden_state_size) P.W_cell_in = U.initial_weights(hidden_state_size, hidden_state_size) P.b_in = U.initial_weights(hidden_state_size) P.W_input_forget = U.initial_weights(word_vector_size, hidden_state_size) P.W_hidden_forget = U.initial_weights(hidden_state_size, hidden_state_size) P.W_cell_forget = U.initial_weights(hidden_state_size, hidden_state_size) P.b_forget = U.initial_weights(hidden_state_size) P.W_input_output = U.initial_weights(word_vector_size, hidden_state_size) P.W_hidden_output = U.initial_weights(hidden_state_size, hidden_state_size) P.W_cell_output = U.initial_weights(hidden_state_size, hidden_state_size) P.b_output = U.initial_weights(hidden_state_size) P.W_input_cell = U.initial_weights(word_vector_size, hidden_state_size) P.W_hidden_cell = U.initial_weights(hidden_state_size, hidden_state_size) P.b_cell = U.initial_weights(hidden_state_size) P.init_h = U.initial_weights(hidden_state_size) P.init_c = U.initial_weights(hidden_state_size) def step(x, prev_h, prev_c): input_gate = T.nnet.sigmoid( T.dot(x,P.W_input_in) +\ T.dot(prev_h,P.W_hidden_in) +\ T.dot(prev_c,P.W_cell_in) +\ P.b_in ) forget_gate = T.nnet.sigmoid( T.dot(x,P.W_input_forget) +\ T.dot(prev_h,P.W_hidden_forget) +\ T.dot(prev_c,P.W_cell_forget) +\ P.b_forget ) curr_c = forget_gate * prev_c + input_gate * T.tanh( T.dot(x,P.W_input_cell) +\ T.dot(prev_h,P.W_hidden_cell) +\ P.b_cell ) output_gate = T.nnet.sigmoid( T.dot(x,P.W_input_output) +\ T.dot(prev_h,P.W_hidden_output) +\ T.dot(curr_c,P.W_cell_output) +\ P.b_output ) curr_h = output_gate * T.tanh(curr_c) return curr_h, curr_c return step
def build(P,input_size,mem_width,mem_size,shift_width): """ NTM heads are implemented as another hidden layer coming after the last hidden layer of the controller that emits k_t, beta_t, g_t, s_t, gamma_t as outputs (see Controller outputs of Figure 2 in paper) along with erase and add vectors """ P["W_key"] = U.initial_weights(input_size,mem_width) P["b_key"] = 0. * U.initial_weights(mem_width) P["W_beta"] = 0. * U.initial_weights(input_size) P["b_beta"] = 0. P["W_g"] = U.initial_weights(input_size) P["b_g"] = 0. P["W_shift"] = U.initial_weights(input_size,shift_width) P["b_shift"] = 0. * U.initial_weights(shift_width) P["W_gamma"] = U.initial_weights(input_size) P["b_gamma"] = 0. P["W_erase"] = U.initial_weights(input_size,mem_width) P["b_erase"] = 0. * U.initial_weights(mem_width) P["W_add"] = U.initial_weights(input_size,mem_width) P["b_add"] = 0. * U.initial_weights(mem_width) def head_params(x): """ Takes hidden layer from controller computes k_t, beta_t, g_t, s_t, and erase and add vectors as outputs """ # key key_t = T.dot(x,P["W_key"]) + P["b_key"] # key strength _beta_t = T.dot(x,P["W_beta"]) + P["b_beta"] beta_t = T.nnet.softplus(_beta_t) # interpolation gate g_t = T.nnet.sigmoid(T.dot(x,P["W_g"]) + P["b_g"]) # shift shift_t = U.vector_softmax(T.dot(x,P["W_shift"]) + P["b_shift"]) shift_t.name = "shift_t" # sharpening _gamma_t = T.dot(x,P["W_gamma"]) + P["b_gamma"] gamma_t = T.nnet.softplus(_gamma_t) + 1. # erase and add vectors erase_t = T.nnet.sigmoid(T.dot(x,P["W_erase"]) + P["b_erase"]) add_t = T.dot(x,P["W_add"]) + P["b_add"] return key_t,beta_t,g_t,shift_t,gamma_t,erase_t,add_t return head_params