def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None): if parameters is None: self.W_if = U.create_shared(U.initial_weights(input_size, output_size), name='W_if') self.W_ff = U.create_shared(U.initial_weights(output_size, output_size), name='W_ff') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W_if = theano.shared(parameters['W_if'], name='W_if') self.W_ff = theano.shared(parameters['W_ff'], name='W_ff') self.b = theano.shared(parameters['b'], name='b') initial = T.zeros((output_size,)) self.is_backward = is_backward self.activation_fn = lambda x: T.cast(T.minimum(x * (x > 0), 20), dtype='float32')#dtype=theano.config.floatX) nonrecurrent = T.dot(inputs, self.W_if) + self.b self.output, _ = theano.scan( lambda in_t, out_tminus1, weights: self.activation_fn(in_t + T.dot(out_tminus1, weights)), sequences=[nonrecurrent], outputs_info=[initial], non_sequences=[self.W_ff], go_backwards=self.is_backward ) self.params = [self.W_if, self.W_ff, self.b]
def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None): if parameters is None: W_if = U.create_shared(U.initial_weights(input_size, output_size), name='W_if') W_ff = U.create_shared(U.initial_weights(output_size, output_size), name='W_ff') b = U.create_shared(U.initial_weights(output_size), name='b') else: W_if = theano.shared(parameters['W_if'], name='W_if') W_ff = theano.shared(parameters['W_ff'], name='W_ff') b = theano.shared(parameters['b'], name='b') initial = U.create_shared(U.initial_weights(output_size)) self.is_backward = is_backward self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) def step(in_t, out_tminus1): return self.activation_fn(T.dot(out_tminus1, W_ff) + T.dot(in_t, W_if) + b) self.output, _ = theano.scan( step, sequences=[inputs], outputs_info=[initial], go_backwards=self.is_backward ) self.params = [W_if, W_ff, b]
def build_model(hidden_size, predict_only=False): X = T.matrix('X') Y = T.ivector('Y') #* (0.001 * U.initial_weights(2,hidden_size) + np.array([[0,0,1,1],[1,1,0,0]]))) W_input_hidden = U.create_shared(U.initial_weights(2, hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) W_hidden_predict = U.create_shared(U.initial_weights(hidden_size, 2)) b_predict = U.create_shared(U.initial_weights(2)) params = [W_input_hidden, b_hidden, W_hidden_predict, b_predict] hidden_lin = T.dot(X, W_input_hidden) + b_hidden hidden = T.nnet.sigmoid(hidden_lin) predict = T.nnet.softmax(T.dot(hidden, W_hidden_predict) + b_predict) cost = -T.mean(T.log( predict[T.arange(Y.shape[0]), Y])) + 1e-3 * adjacency_constraint( hidden_lin) # + 1e-4 * sum(T.sum(p**2) for p in params) accuracy = T.mean(T.eq(T.argmax(predict, axis=1), Y)) grad = T.grad(cost, params) train = theano.function( inputs=[X, Y], #updates = updates.momentum(params,grad,0.9999,0.1) if not predict_only else None, #updates = updates.momentum(params,grad,0.999,0.0005), updates=updates.adadelta(params, grad), outputs=[accuracy, W_input_hidden, b_hidden, (hidden > 0.5)]) predict = theano.function(inputs=[X], outputs=predict[:, 0]) i = T.iscalar('i') hidden_p = theano.function(inputs=[X, i], outputs=hidden[:, i]) return train, predict, hidden_p, params
def __init__(self, inputs, input_size, output_size, parameters=None): if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = T.nnet.softmax(T.dot(inputs, self.W) + self.b) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size, parameters=None): if parameters is None: W = U.create_shared(U.initial_weights(input_size, output_size), name='W') b = U.create_shared(U.initial_weights(output_size), name='b') else: W = theano.shared(parameters['W'], name='W') b = theano.shared(parameters['b'], name='b') self.output = T.nnet.softmax(T.dot(inputs, W) + b) self.params = [W, b]
def __init__(self, forward_in, backward_in, input_size, output_size): Wf = U.create_shared(U.initial_weights(input_size, output_size)) Wb = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.activations = T.dot(forward_in, Wf) + T.dot(backward_in, Wb) + b self.output, _ = theano.scan(lambda inpt: T.nnet.softmax(inpt), sequences=[self.activations]) self.params = [Wf, Wb, b]
def __init__(self, forward_in, backward_in, input_size, output_size): Wf = U.create_shared(U.initial_weights(input_size, output_size)) Wb = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.activations = T.dot(forward_in, Wf) + T.dot(backward_in, Wb) + b self.output, _ = theano.scan( lambda inpt: T.nnet.softmax(inpt), sequences=[self.activations] ) self.params = [Wf, Wb, b]
def __init__(self, inputs, input_size, output_size, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = self.activation_fn(T.dot(inputs, self.W) + self.b) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) W = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.output2 = self.activation_fn(T.dot(inputs, W) + b) self.output, _ = theano.scan( lambda element: self.activation_fn(T.dot(element, W) + b), sequences=[inputs]) self.params = [W, b]
def __init__(self, inputs, input_size, output_size): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) W = U.create_shared(U.initial_weights(input_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) self.output2 = self.activation_fn(T.dot(inputs, W) + b) self.output, _ = theano.scan( lambda element: self.activation_fn(T.dot(element, W) + b), sequences=[inputs] ) self.params = [W, b]
def __init__(self, inputs, input_size, output_size, rng, dropout_rate, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = T.cast(self.activation_fn( (T.dot(inputs, self.W) + self.b)*(1.0-dropout_rate) ), dtype=theano.config.floatX) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: W = U.create_shared(U.initial_weights(input_size, output_size), name='W') b = U.create_shared(U.initial_weights(output_size), name='b') else: W = theano.shared(parameters['W'], name='W') b = theano.shared(parameters['b'], name='b') self.output, _ = theano.scan( lambda element: self.activation_fn(T.dot(element, W) + b), sequences=[inputs] ) self.params = [W, b]
def make_accumulate_update(inputs,outputs,parameters,gradients,update_method=updates.adadelta): acc = [ U.create_shared(np.zeros(p.get_value().shape)) for p in parameters ] count = U.create_shared(np.int32(0)) acc_update = [ (a,a + g) for a,g in zip(acc,gradients) ] + [ (count,count+1) ] acc_gradient = theano.function( inputs = inputs, outputs = outputs, updates = acc_update ) avg_gradient = [ a/count for a in acc ] clear_update = [ (a,0.*a) for a,g in zip(acc,parameters) ] + [ (count,0) ] train_acc = theano.function( inputs=[], updates=update_method(parameters,avg_gradient) + clear_update ) return acc_gradient,train_acc
def __init__(self, inputs, input_size, output_size, is_backward=False): W_if = U.create_shared(U.initial_weights(input_size, output_size)) W_ff = U.create_shared(U.initial_weights(output_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) initial = U.create_shared(U.initial_weights(output_size)) self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) self.output, _ = theano.scan( lambda in_t: theano.scan( lambda index, out_tminus1: self.activation_fn(T.dot(out_tminus1, W_ff) + T.dot(in_t[index], W_if) + b), sequences=[T.arange(inputs.shape[1])], outputs_info=[initial], go_backwards=is_backward ), sequences=[inputs] # for each sample at time "t" ) self.params = [W_if, W_ff, b]
def __init__(self, inputs, input_size, output_size, is_backward=False): W_if = U.create_shared(U.initial_weights(input_size, output_size)) W_ff = U.create_shared(U.initial_weights(output_size, output_size)) b = U.create_shared(U.initial_weights(output_size)) initial = U.create_shared(U.initial_weights(output_size)) self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) self.output, _ = theano.scan( lambda in_t: theano.scan( lambda index, out_tminus1: self.activation_fn( T.dot(out_tminus1, W_ff) + T.dot(in_t[index], W_if) + b), sequences=[T.arange(inputs.shape[1])], outputs_info=[initial], go_backwards=is_backward), sequences=[inputs] # for each sample at time "t" ) self.params = [W_if, W_ff, b]
def make_accumulate_update(inputs, outputs, parameters, gradients, update_method=updates.adadelta): acc = [U.create_shared(np.zeros(p.get_value().shape)) for p in parameters] count = U.create_shared(np.int32(0)) acc_update = [(a, a + g) for a, g in zip(acc, gradients)] + [(count, count + 1)] acc_gradient = theano.function(inputs=inputs, outputs=outputs, updates=acc_update) avg_gradient = [a / count for a in acc] clear_update = [(a, 0. * a) for a, g in zip(acc, parameters)] + [(count, 0)] train_acc = theano.function( inputs=[], updates=update_method(parameters, avg_gradient) + clear_update) return acc_gradient, train_acc
def build_network(input_size, hidden_size): X = T.dmatrix('X') W_input_to_hidden = U.create_shared( U.initial_weights(input_size, hidden_size)) W_hidden_to_hidden = U.create_shared( U.initial_weights(hidden_size, hidden_size)) initial_hidden = U.create_shared(U.initial_weights(hidden_size), name='init_hidden') b_hidden = U.create_shared(U.initial_weights(hidden_size)) b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size)) b_input_reproduction = U.create_shared(U.initial_weights(input_size)) parameters = [ W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction, ] hidden, hidden1_reproduction, input_reproduction = make_rae( X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction) unrolled = unroll(hidden[-1], W_input_to_hidden, W_hidden_to_hidden, b_hidden_reproduction, b_input_reproduction, hidden.shape[0]) return X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled
def load(model_file): print "Loading model..." connections, biases = load_file(model_file) print "Loaded model." for i in range(len(connections)): connections[i] = U.create_shared(connections[i]) biases[i] = U.create_shared(biases[i]) X = T.fmatrix('X') layers = [None] * len(connections) current = X for i in range(len(connections) - 1): current = layers[i] = T.nnet.sigmoid( T.dot(current, connections[i]) + biases[i]) i = len(connections) - 1 layers[i] = T.nnet.softmax(T.dot(current, connections[i]) + biases[i]) predict = theano.function(inputs=[X], outputs=layers) return predict
def __init__(self, inputs, input_size, output_size, rng, dropout_rate, parameters=None): self.activation_fn = lambda x: T.minimum(x * (x > 0), 20) if parameters is None: self.W = U.create_shared(U.initial_weights(input_size, output_size), name='W') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W = theano.shared(parameters['W'], name='W') self.b = theano.shared(parameters['b'], name='b') self.output = T.cast(self.activation_fn( (T.dot(inputs, self.W) + self.b) * (1.0 - dropout_rate)), dtype=theano.config.floatX) self.params = [self.W, self.b]
def __init__(self, inputs, input_size, output_size, is_backward=False, parameters=None): if parameters is None: self.W_if = U.create_shared(U.initial_weights( input_size, output_size), name='W_if') self.W_ff = U.create_shared(U.initial_weights( output_size, output_size), name='W_ff') self.b = U.create_shared(U.initial_weights(output_size), name='b') else: self.W_if = theano.shared(parameters['W_if'], name='W_if') self.W_ff = theano.shared(parameters['W_ff'], name='W_ff') self.b = theano.shared(parameters['b'], name='b') initial = T.zeros((output_size, )) self.is_backward = is_backward self.activation_fn = lambda x: T.cast(T.minimum(x * (x > 0), 20), dtype='float32' ) #dtype=theano.config.floatX) nonrecurrent = T.dot(inputs, self.W_if) + self.b self.output, _ = theano.scan( lambda in_t, out_tminus1, weights: self.activation_fn(in_t + T.dot( out_tminus1, weights)), sequences=[nonrecurrent], outputs_info=[initial], non_sequences=[self.W_ff], go_backwards=self.is_backward) self.params = [self.W_if, self.W_ff, self.b]
def build_model(hidden_size,predict_only=False): X = T.matrix('X') Y = T.ivector('Y') #* (0.001 * U.initial_weights(2,hidden_size) + np.array([[0,0,1,1],[1,1,0,0]]))) W_input_hidden = U.create_shared(U.initial_weights(2,hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) W_hidden_predict = U.create_shared(U.initial_weights(hidden_size,2)) b_predict = U.create_shared(U.initial_weights(2)) params = [W_input_hidden,b_hidden,W_hidden_predict,b_predict] hidden_lin = T.dot(X,W_input_hidden) + b_hidden hidden = T.nnet.sigmoid(hidden_lin) predict = T.nnet.softmax(T.dot(hidden,W_hidden_predict) + b_predict) cost = -T.mean(T.log(predict[T.arange(Y.shape[0]),Y])) + 1e-3*adjacency_constraint(hidden_lin)# + 1e-4 * sum(T.sum(p**2) for p in params) accuracy = T.mean(T.eq(T.argmax(predict,axis=1),Y)) grad = T.grad(cost,params) train = theano.function( inputs = [X,Y], #updates = updates.momentum(params,grad,0.9999,0.1) if not predict_only else None, #updates = updates.momentum(params,grad,0.999,0.0005), updates = updates.adadelta(params,grad), outputs = [accuracy,W_input_hidden,b_hidden,(hidden>0.5)] ) predict = theano.function( inputs = [X], outputs = predict[:,0] ) i = T.iscalar('i') hidden_p = theano.function( inputs = [X,i], outputs = hidden[:,i] ) return train,predict,hidden_p,params
def load(model_file): print "Loading model..." connections, biases = load_file(model_file) print "Loaded model." for i in range(len(connections)): connections[i] = U.create_shared(connections[i]) biases[i] = U.create_shared(biases[i]) X = T.fmatrix('X') layers = [None]*len(connections) current = X for i in range(len(connections)-1): current = layers[i] = T.nnet.sigmoid(T.dot(current,connections[i]) + biases[i]) i = len(connections)-1 layers[i] = T.nnet.softmax(T.dot(current,connections[i]) + biases[i]) predict = theano.function( inputs = [X], outputs = layers ) return predict
def build_model(P,X,input_size,hidden_size,output_size): W_input_hidden = U.create_shared(U.initial_weights(input_size,hidden_size)) W_hidden_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size)) W_hidden_output = U.create_shared(U.initial_weights(hidden_size,output_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) i_hidden = U.create_shared(U.initial_weights(hidden_size)) b_output = U.create_shared(U.initial_weights(output_size)) hidden = build_rnn(T.dot(X,W_input_hidden),W_hidden_hidden,b_hidden,i_hidden) predict = T.nnet.softmax(T.dot(hidden,W_hidden_output) + b_output) return X,predict
def build_model(P, X, input_size, hidden_size, output_size): W_input_hidden = U.create_shared(U.initial_weights(input_size, hidden_size)) W_hidden_hidden = U.create_shared( U.initial_weights(hidden_size, hidden_size)) W_hidden_output = U.create_shared( U.initial_weights(hidden_size, output_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) i_hidden = U.create_shared(U.initial_weights(hidden_size)) b_output = U.create_shared(U.initial_weights(output_size)) hidden = build_rnn(T.dot(X, W_input_hidden), W_hidden_hidden, b_hidden, i_hidden) predict = T.nnet.softmax(T.dot(hidden, W_hidden_output) + b_output) return X, predict
def build_network(input_size,hidden_size): X = T.dmatrix('X') W_input_to_hidden = U.create_shared(U.initial_weights(input_size,hidden_size)) W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size)) initial_hidden = U.create_shared(U.initial_weights(hidden_size)) b_hidden = U.create_shared(U.initial_weights(hidden_size)) b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size)) b_input_reproduction = U.create_shared(U.initial_weights(input_size)) parameters = [ W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction, ] hidden, hidden1_reproduction, input_reproduction = make_rae( X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden, b_hidden_reproduction, b_input_reproduction ) unrolled = unroll( hidden[-1], W_input_to_hidden, W_hidden_to_hidden, b_hidden_reproduction, b_input_reproduction, hidden.shape[0] ) return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled