Ejemplo n.º 1
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width,
                 wide, name):
        """
        Construct a convolutional layer
        `wide`:
            False: only apply filter to complete patches of the image.
            Generates output of shape: image_shape - filter_shape + 1
            True: zero-pads image to multiple of filter shape to generate
            output of shape: image_shape + filter_shape - 1
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.wide = wide
        self.name = name
        self.filter_shape = (nb_filters, stack_size, filter_height,
                             filter_width)

        fan_in = stack_size * filter_height * filter_width  # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width)
                   )  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(
            6. / (fan_in + fan_out))  # initialize filters with random values

        self.filters = create_shared(
            drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(np.zeros((nb_filters, )), name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Ejemplo n.º 2
0
    def __init__(self, nb_filters, stack_size, filter_height, wide, emb_dim,
                 name):
        """
        1D convolutional layer: 1D Row-wise convolution.
        Requires to know the dimension of the embeddings.
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.wide = wide
        self.emb_dim = emb_dim
        self.filter_shape = (emb_dim, nb_filters, stack_size, filter_height, 1)

        # _TODO_ check initialization
        # fan_in = in_fmaps * 1 * width
        # fan_out = out_fmaps * 1 * width
        # W_bound = numpy.sqrt(6./(fan_in+fan_out))
        filters_values = np.asarray(np.random.normal(0,
                                                     0.05,
                                                     size=self.filter_shape),
                                    dtype=theano.config.floatX)
        self.filters = create_shared(filters_values, name + '__filters')
        self.bias = create_shared(np.zeros((nb_filters, emb_dim)),
                                  name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Ejemplo n.º 3
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width,
                 border_mode, stride, name):
        """
        Construct a convolutional layer.
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.border_mode = border_mode
        self.filter_shape = (nb_filters, stack_size, filter_height,
                             filter_width)
        self.stride = stride
        self.name = name

        fan_in = stack_size * filter_height * filter_width  # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width)
                   )  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(
            6. / (fan_in + fan_out))  # initialize filters with random values

        self.filters = create_shared(
            drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(
            np.ones((nb_filters, )) * 0.1, name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Ejemplo n.º 4
0
    def __init__(self, input_dim, output_dim, bias=True, activation='sigmoid',
                 name='hidden_layer'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.bias = bias
        self.name = name
        if activation is None:
            self.activation = None
        elif activation == 'tanh':
            self.activation = T.tanh
        elif activation == 'sigmoid':
            self.activation = T.nnet.sigmoid
        elif activation == 'softmax':
            self.activation = T.nnet.softmax
        elif activation == 'relu':
            self.activation = T.nnet.relu
        else:
            raise Exception("Unknown activation function: %s" % activation)

        # Initialize weights and bias
        self.weights = create_shared(
            random_weights((input_dim, output_dim)),
            name + '__weights'
        )

        if activation == 'relu':
            self.bias = create_shared(np.ones((output_dim,)) * 0.1, name + '__bias')
        else:
            self.bias = create_shared(np.zeros((output_dim,)), name + '__bias')

        # Define parameters
        if self.bias:
            self.params = [self.weights, self.bias]
        else:
            self.params = [self.weights]
Ejemplo n.º 5
0
    def __init__(self, nb_filters, stack_size, filter_height, wide, emb_dim, name):
        """
        1D convolutional layer: 1D Row-wise convolution.
        Requires to know the dimension of the embeddings.
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.wide = wide
        self.emb_dim = emb_dim
        self.filter_shape = (emb_dim, nb_filters, stack_size, filter_height, 1)

        # _TODO_ check initialization
        # fan_in = in_fmaps * 1 * width
        # fan_out = out_fmaps * 1 * width
        # W_bound = numpy.sqrt(6./(fan_in+fan_out))
        filters_values = np.asarray(
            np.random.normal(0, 0.05, size=self.filter_shape),
            dtype=theano.config.floatX
        )
        self.filters = create_shared(filters_values, name + '__filters')
        self.bias = create_shared(np.zeros((nb_filters, emb_dim)), name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Ejemplo n.º 6
0
def trainer(X,Y,alpha,lr,predictions,updates,data,labels):
	data   = U.create_shared(data,  dtype=np.int8)
	labels = U.create_shared(labels,dtype=np.int8)
	index_start = T.lscalar('start')
	index_end   = T.lscalar('end')
	print "Compiling function..."
	train_model = theano.function(
			inputs  = [index_start,index_end,alpha,lr],
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			updates = updates,
			givens  = {
				X:   data[index_start:index_end],
				Y: labels[index_start:index_end]
			}
		)
	test_model = theano.function(
			inputs  = [index_start,index_end],
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			givens  = {
				X:   data[index_start:index_end],
				Y: labels[index_start:index_end]
			}
		)
	print "Done."
	return train_model,test_model
Ejemplo n.º 7
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 bias=True,
                 activation='sigmoid',
                 name='hidden_layer'):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.bias = bias
        self.name = name
        if activation is None:
            self.activation = None
        elif activation == 'tanh':
            self.activation = T.tanh
        elif activation == 'sigmoid':
            self.activation = T.nnet.sigmoid
        elif activation == 'softmax':
            self.activation = T.nnet.softmax
        elif activation == 'relu':
            self.activation = T.nnet.relu
        else:
            raise Exception("Unknown activation function: %s" % activation)

        # Initialize weights and bias
        self.weights = create_shared(random_weights((input_dim, output_dim)),
                                     name + '__weights')

        self.bias = create_shared(np.zeros((output_dim, )), name + '__bias')

        # Define parameters
        if self.bias:
            self.params = [self.weights, self.bias]
        else:
            self.params = [self.weights]
Ejemplo n.º 8
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 activation=T.nnet.sigmoid,
                 with_batch=True,
                 name='RNN'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.with_batch = with_batch
        self.name = name

        # Randomly generate weights
        self.w_x = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_x')
        self.w_h = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__w_h')

        # Initialize the bias vector and h_0 to zero vectors
        self.b_h = create_shared(np.zeros((hidden_dim, )), name + '__b_h')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        # Define parameters
        self.params = [self.w_x, self.w_h, self.b_h, self.h_0]
Ejemplo n.º 9
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Update gate weights and bias
        self.w_z = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_z')
        self.u_z = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_z')
        self.b_z = create_shared(np.zeros((hidden_dim,)), name + '__b_z')

        # Reset gate weights and bias
        self.w_r = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_r')
        self.u_r = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_r')
        self.b_r = create_shared(np.zeros((hidden_dim,)), name + '__b_r')

        # New memory content weights and bias
        self.w_c = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_c')
        self.u_c = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__u_c')
        self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c')

        # Initialize the bias vector, h_0, to the zero vector
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        # Define parameters
        self.params = [self.w_z, self.u_z, self.b_z,
                       self.w_r, self.u_r, self.b_r,
                       self.w_c, self.u_c, self.b_c,
                       self.h_0]
Ejemplo n.º 10
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width, wide, name):
        """
        Construct a convolutional layer
        `wide`:
            False: only apply filter to complete patches of the image.
            Generates output of shape: image_shape - filter_shape + 1
            True: zero-pads image to multiple of filter shape to generate
            output of shape: image_shape + filter_shape - 1
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.wide = wide
        self.name = name
        self.filter_shape = (nb_filters, stack_size, filter_height, filter_width)

        fan_in = stack_size * filter_height * filter_width       # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width))  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(6. / (fan_in + fan_out))                # initialize filters with random values

        self.filters = create_shared(drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(np.zeros((nb_filters,)), name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Ejemplo n.º 11
0
def adadelta(parameters, gradients, rho=np.float32(0.95),
             eps=np.float32(1e-6)):
    gradients_sq = [
        U.create_shared(np.zeros(p.get_value().shape, dtype=np.float32))
        for p in parameters
    ]
    deltas_sq = [
        U.create_shared(np.zeros(p.get_value().shape, dtype=np.float32))
        for p in parameters
    ]

    gradients_sq_new = [
        rho * g_sq + (np.float32(1) - rho) * (g**2)
        for g_sq, g in izip(gradients_sq, gradients)
    ]
    deltas = [
        (T.sqrt(d_sq + eps) / T.sqrt(g_sq + eps)) * grad
        for d_sq, g_sq, grad in izip(deltas_sq, gradients_sq_new, gradients)
    ]
    deltas_sq_new = [
        rho * d_sq + (np.float32(1) - rho) * (d**2)
        for d_sq, d in izip(deltas_sq, deltas)
    ]

    gradient_sq_updates = zip(gradients_sq, gradients_sq_new)
    deltas_sq_updates = zip(deltas_sq, deltas_sq_new)
    parameters_updates = [(p, p - d) for p, d in izip(parameters, deltas)]
    return gradient_sq_updates + deltas_sq_updates + parameters_updates
Ejemplo n.º 12
0
	def __init__(self,layers_in,layer_out):
		self.ins     = layers_in
		self.out     = layer_out
		self.Ws      = [ U.create_shared(U.initial_weights(inp.size,self.out.size))
							for inp in self.ins.layers ]
		self.bias    = U.create_shared(np.zeros(self.out.size))
		self.updates = self.Ws + [self.bias]
Ejemplo n.º 13
0
def momentum(parameters,gradients,mu,eps):
	t = U.create_shared(1)
	m = (1 - 3.0/(t+5) < mu)
	mu = m * (1 - 3.0/(t+5)) + (1-m) * mu
	deltas = [ U.create_shared(np.zeros(p.get_value().shape)) for p in parameters ]
	delta_nexts = [ mu*delta + eps*grad for delta,grad in zip(deltas,gradients) ]
	delta_updates = [ (delta, delta_next) for delta,delta_next in zip(deltas,delta_nexts) ]
	param_updates = [ (param, param - delta_next) for param,delta_next in zip(parameters,delta_nexts) ]
	return delta_updates + param_updates + [ (t,t + 1) ]
Ejemplo n.º 14
0
 def __init__(self, layers_in, layer_out):
     self.ins = layers_in
     self.out = layer_out
     self.Ws = [
         U.create_shared(U.initial_weights(inp.size, self.out.size))
         for inp in self.ins.layers
     ]
     self.bias = U.create_shared(np.zeros(self.out.size))
     self.updates = self.Ws + [self.bias]
Ejemplo n.º 15
0
def adadelta(parameters,gradients,rho=np.float32(0.95),eps=np.float32(1e-6)):
	gradients_sq = [ U.create_shared(np.zeros(p.get_value().shape,dtype=np.float32)) for p in parameters ]
	deltas_sq    = [ U.create_shared(np.zeros(p.get_value().shape,dtype=np.float32)) for p in parameters ]

	gradients_sq_new = [ rho*g_sq + (np.float32(1)-rho)*(g**2)      for g_sq,g         in izip(gradients_sq,gradients) ]
	deltas = [ (T.sqrt(d_sq+eps)/T.sqrt(g_sq+eps))*grad for d_sq,g_sq,grad in izip(deltas_sq,gradients_sq_new,gradients) ]
	deltas_sq_new = [ rho*d_sq + (np.float32(1)-rho)*(d**2)         for d_sq,d         in izip(deltas_sq,deltas) ]

	gradient_sq_updates = zip(gradients_sq,gradients_sq_new)
	deltas_sq_updates = zip(deltas_sq,deltas_sq_new)
	parameters_updates = [ (p,p - d) for p,d in izip(parameters,deltas) ]
	return gradient_sq_updates + deltas_sq_updates + parameters_updates
Ejemplo n.º 16
0
def build_network(input_size,hidden_size):
	X = T.imatrix('X')
	W_input_to_hidden  = U.create_shared(U.initial_weights(input_size,hidden_size))
	W_hidden_to_output = U.create_shared(U.initial_weights(hidden_size,input_size))
	b_output = U.create_shared(U.initial_weights(input_size))

	hidden = T.nnet.sigmoid(T.dot(X,W_input_to_hidden))
	output = T.nnet.softmax(T.dot(hidden,W_input_to_hidden.T) + b_output)
	
	parameters = [W_input_to_hidden,b_output]

	return X,output,parameters
Ejemplo n.º 17
0
def construct_network(context, characters, hidden, mult_hidden):
    print "Setting up memory..."
    X = T.bvector('X')
    Y = T.bvector('Y')
    alpha = T.cast(T.fscalar('alpha'), dtype=theano.config.floatX)
    lr = T.cast(T.fscalar('lr'), dtype=theano.config.floatX)

    print "Initialising weights..."
    W_char_hidden = U.create_shared(U.initial_weights(characters, hidden))
    f_char_hidden = U.create_shared(U.initial_weights(characters, mult_hidden))
    b_hidden = U.create_shared(U.initial_weights(hidden))
    Wf_hidden = U.create_shared(U.initial_weights(hidden, mult_hidden))
    fW_hidden = U.create_shared(U.initial_weights(mult_hidden, hidden))
    W_hidden_predict = U.create_shared(U.initial_weights(hidden, characters))
    b_predict = U.create_shared(U.initial_weights(characters))

    print "Constructing graph..."
    hidden = make_hidden(hidden, W_char_hidden[X], f_char_hidden[X], Wf_hidden,
                         fW_hidden, b_hidden)
    predictions = T.nnet.softmax(T.dot(hidden, W_hidden_predict) + b_predict)
    weights = [
        W_char_hidden, f_char_hidden, b_hidden, Wf_hidden, fW_hidden,
        W_hidden_predict, b_predict
    ]
    cost = -T.mean(T.log(predictions)[T.arange(Y.shape[0]), Y])
    gparams = T.grad(cost, weights)

    deltas = [U.create_shared(np.zeros(w.get_value().shape)) for w in weights]
    updates = [(param, param - (alpha * delta + gparam * lr))
               for param, delta, gparam in zip(weights, deltas, gparams)
               ] + [(delta, alpha * delta + gparam * lr)
                    for delta, gparam in zip(deltas, gparams)]
    return X, Y, alpha, lr, updates, predictions, weights
Ejemplo n.º 18
0
def build_network(input_size, hidden_size):
    X = T.imatrix('X')
    W_input_to_hidden = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    W_hidden_to_output = U.create_shared(
        U.initial_weights(hidden_size, input_size))
    b_output = U.create_shared(U.initial_weights(input_size))

    hidden = T.nnet.sigmoid(T.dot(X, W_input_to_hidden))
    output = T.nnet.softmax(T.dot(hidden, W_input_to_hidden.T) + b_output)

    parameters = [W_input_to_hidden, b_output]

    return X, output, parameters
Ejemplo n.º 19
0
def momentum(parameters, gradients, mu, eps):
    t = U.create_shared(1)
    m = (1 - 3.0 / (t + 5) < mu)
    mu = m * (1 - 3.0 / (t + 5)) + (1 - m) * mu
    deltas = [
        U.create_shared(np.zeros(p.get_value().shape)) for p in parameters
    ]
    delta_nexts = [
        mu * delta + eps * grad for delta, grad in zip(deltas, gradients)
    ]
    delta_updates = [(delta, delta_next)
                     for delta, delta_next in zip(deltas, delta_nexts)]
    param_updates = [(param, param - delta_next)
                     for param, delta_next in zip(parameters, delta_nexts)]
    return delta_updates + param_updates + [(t, t + 1)]
Ejemplo n.º 20
0
	def __init__(self, visible, hidden, **kwargs):
		kwargs['lambda_2'] = 0.0
		self.v = visible
		self.h = hidden
		inputs = self.v.size
		outputs = self.h.size
		
		super(RBM,self).__init__(inputs,outputs,**kwargs)
		self.h_bias       = self.bias
		self.h_bias_delta = self.bias_delta

		self.v_bias       = U.create_shared(np.zeros(self.v.size))
		self.v_bias_delta = U.create_shared(np.zeros(self.v.size))

		self.tunables += [self.v_bias]
		self.deltas   += [self.v_bias_delta]
Ejemplo n.º 21
0
    def __init__(self, input_dim, hidden_dim, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.name = name

        self.W = create_shared(random_weights((input_dim, hidden_dim * 4)), name + 'W')
        self.U = create_shared(random_weights((hidden_dim, hidden_dim * 4)), name + 'U')
        self.b = create_shared(random_weights((hidden_dim * 4, )), name + 'b')

        self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        self.params = [self.W, self.U, self.b]
Ejemplo n.º 22
0
    def __init__(self, input_dim, hidden_dim, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.name = name

        self.W = create_shared(random_weights((input_dim, hidden_dim * 4)),
                               name + 'W')
        self.U = create_shared(random_weights((hidden_dim, hidden_dim * 4)),
                               name + 'U')
        self.b = create_shared(random_weights((hidden_dim * 4, )), name + 'b')

        self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        self.params = [self.W, self.U, self.b]
Ejemplo n.º 23
0
def rmsprop(parameters,gradients,discount=0.95,momentum=0.9,learning_rate=1e-4,epsilon=1e-4):
	#gradients = [ (g < -clip)*(-clip) + (g > clip)*(clip) + (abs(g) <= clip) * g for g in gradients ]
	sq_acc    = [ U.create_shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]
	acc       = [ U.create_shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]
	delta_acc = [ U.create_shared(np.zeros(p.get_value().shape, dtype=theano.config.floatX)) for p in parameters ]

	sq_avg = [ discount * sq_a + (1 - discount) * g**2 for sq_a,g in izip(sq_acc,gradients) ]
	avg    = [ discount * a    + (1 - discount) * g    for a,   g in izip(acc,gradients) ]
	scaled_grads = [ g / T.sqrt(sq_a - a**2 + epsilon) for g,a,sq_a in izip(gradients,acc,sq_acc) ]
	deltas = [ momentum * d_a + learning_rate * s_g for d_a,s_g in izip(delta_acc,scaled_grads) ]


	sq_acc_updates = [ (sq_a, sq_aa) for sq_a,sq_aa in izip(sq_acc,sq_avg) ]
	acc_updates    = [ (a,    aa)    for a,   aa    in izip(acc,avg) ]
	delta_updates  = [ (d_a,d) for d_a,d in izip(delta_acc,deltas) ]
	parameters_updates = [ (p, p - d) for p,d in izip(parameters,deltas) ]

	return parameters_updates + acc_updates + sq_acc_updates + delta_updates
Ejemplo n.º 24
0
def rmsprop(parameters,
            gradients,
            discount=0.95,
            momentum=0.9,
            learning_rate=1e-4,
            epsilon=1e-4):
    #gradients = [ (g < -clip)*(-clip) + (g > clip)*(clip) + (abs(g) <= clip) * g for g in gradients ]
    sq_acc = [
        U.create_shared(
            np.zeros(p.get_value().shape, dtype=theano.config.floatX))
        for p in parameters
    ]
    acc = [
        U.create_shared(
            np.zeros(p.get_value().shape, dtype=theano.config.floatX))
        for p in parameters
    ]
    delta_acc = [
        U.create_shared(
            np.zeros(p.get_value().shape, dtype=theano.config.floatX))
        for p in parameters
    ]

    sq_avg = [
        discount * sq_a + (1 - discount) * g**2
        for sq_a, g in izip(sq_acc, gradients)
    ]
    avg = [discount * a + (1 - discount) * g for a, g in izip(acc, gradients)]
    scaled_grads = [
        g / T.sqrt(sq_a - a**2 + epsilon)
        for g, a, sq_a in izip(gradients, acc, sq_acc)
    ]
    deltas = [
        momentum * d_a + learning_rate * s_g
        for d_a, s_g in izip(delta_acc, scaled_grads)
    ]

    sq_acc_updates = [(sq_a, sq_aa) for sq_a, sq_aa in izip(sq_acc, sq_avg)]
    acc_updates = [(a, aa) for a, aa in izip(acc, avg)]
    delta_updates = [(d_a, d) for d_a, d in izip(delta_acc, deltas)]
    parameters_updates = [(p, p - d) for p, d in izip(parameters, deltas)]

    return parameters_updates + acc_updates + sq_acc_updates + delta_updates
Ejemplo n.º 25
0
def make_hidden_outputs(inputs,W):
	h0 = U.create_shared(np.zeros((HIDDEN,)))
	def step(score_t,self_tm1,W):
		return T.nnet.sigmoid(score_t + T.dot(self_tm1,W))
	activation_probs,_ = theano.scan(
			step,
			sequences     = inputs,
			outputs_info  = h0,
			non_sequences = W
		)
	return activation_probs
Ejemplo n.º 26
0
    def __init__(self, input_dim, hidden_dim, activation=T.nnet.sigmoid,
                 with_batch=True, name='RNN'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.activation = activation
        self.with_batch = with_batch
        self.name = name

        # Randomly generate weights
        self.w_x = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_x')
        self.w_h = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_h')

        # Initialize the bias vector and h_0 to zero vectors
        self.b_h = create_shared(np.zeros((hidden_dim,)), name + '__b_h')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        # Define parameters
        self.params = [self.w_x, self.w_h, self.b_h, self.h_0]
Ejemplo n.º 27
0
	def __init__(self,inputs,outputs,
				 lr = 0.1,       batch_size = 10,  max_epochs = 100000,
				 momentum = 0.5, validation = 0.1, lambda_2 = 0.001,
				 lr_min = 0.1):
		self.momentum   = momentum
		self.lr         = lr
		self.lr_min     = lr_min
		self.batch_size = batch_size
		self.validation = validation
		self.max_epochs = max_epochs 
		self.lambda_2   = lambda_2


		self.W       = U.create_shared(U.initial_weights(inputs,outputs))
		self.W_delta = U.create_shared(np.zeros((inputs,outputs)))

		self.bias       = U.create_shared(np.zeros(outputs))
		self.bias_delta = U.create_shared(np.zeros(outputs))

		self.tunables = [self.W,       self.bias]
		self.deltas   = [self.W_delta, self.bias_delta]
Ejemplo n.º 28
0
def build_network(input_size, hidden_size):
    X = T.dmatrix('X')
    W_input_to_hidden = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    W_hidden_to_hidden = U.create_shared(
        U.initial_weights(hidden_size, hidden_size))
    b_hidden = U.create_shared(U.initial_weights(hidden_size))
    #	initial_hidden = U.create_shared(U.initial_weights(hidden_size))
    initial_hidden = U.create_shared(U.initial_weights(hidden_size))

    #	W_hidden_to_hidden_reproduction = W_hidden_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,hidden_size))
    b_hidden_reproduction = U.create_shared(U.initial_weights(hidden_size))
    W_hidden_to_input_reproduction = W_input_to_hidden.T  #U.create_shared(U.initial_weights(hidden_size,input_size))
    b_input_reproduction = U.create_shared(U.initial_weights(input_size))
    parameters = [
        W_input_to_hidden,
        W_hidden_to_hidden,
        b_hidden,
        initial_hidden,
        b_hidden_reproduction,
        b_input_reproduction,
    ]

    hidden, hidden1_reproduction, input_reproduction = make_rae(
        X, W_input_to_hidden, W_hidden_to_hidden, b_hidden, initial_hidden,
        b_hidden_reproduction, b_input_reproduction)

    unrolled = unroll(hidden[-1], W_input_to_hidden, W_hidden_to_hidden,
                      b_hidden_reproduction, b_input_reproduction,
                      hidden.shape[0])

    return X, parameters, hidden, hidden1_reproduction, input_reproduction, unrolled
Ejemplo n.º 29
0
	def fit(self,X,Y=None):
		print "Splitting validation and training set..."
		training_count  = int(X.shape[0]*(1-self.validation))
		validate_count  = X.shape[0] - training_count
		n_train_batches = int(math.ceil(training_count/float(self.batch_size)))
		print "Setting up shared training memory..."
		train_x = U.create_shared(X[:training_count])
		valid_x = U.create_shared(X[training_count:])

		if Y != None:
			train_y = T.cast(U.create_shared(Y[:training_count]),'int32')
			valid_y = T.cast(U.create_shared(Y[training_count:]),'int32')
		else:
			train_y = valid_y = None

		print "Total examples:", X.shape[0]
		print "train examples:", training_count
		print "valid examples:", validate_count 
		print "batches:       ", n_train_batches
		print "batch size:    ", self.batch_size
	
		self.train(*self.prepare_functions(n_train_batches,train_x,valid_x,train_y,valid_y))
Ejemplo n.º 30
0
def make_hidden_predict_outputs(hidden_size,characters_size,
								inputs,gen_mask,
								W_i,b_i,W_o,b_o,W_pred,b_pred,W_back):
	h0 = U.create_shared(np.zeros(hidden_size))
	p0 = U.create_shared(np.zeros(characters_size))
	def step(score_t,gm,hidden_1,predict_1,W_i,b_i,W_o,b_o,W_pred,b_pred,W_back):
		hidden  = T.nnet.sigmoid(
	#			(T.dot(hidden_1,W_i) + b_i ) + \
				(1-gm) * ( T.dot(hidden_1,W_i) + b_i ) + \
				(gm  ) * ( T.dot(hidden_1,W_o) + b_o ) + \
				T.dot(predict_1,W_back) + \
				score_t
			)
		predict = T.nnet.softmax(T.dot(hidden,W_pred) + b_pred)[0]
		return hidden,predict
	[hidden_,predict_],_ = theano.scan(
			step,
			sequences     = [inputs,gen_mask],
			outputs_info  = [h0,p0],
			non_sequences = [W_i,b_i,W_o,b_o,W_pred,b_pred,W_back]
		)
	return hidden_,predict_ 
Ejemplo n.º 31
0
def make_hidden(hidden_size,add_ins,mult_ins,Wf,fW,b):
	h0 = U.create_shared(np.zeros(hidden_size))
	def step(add_in,mult_in,hidden_1,Wf,fW,b):
		mult_W = T.dot(Wf * mult_in,fW)
		hidden_score = add_in + T.dot(hidden_1,mult_W) + b
		return T.nnet.sigmoid(hidden_score)
	hidden,_ = theano.scan(
			step,
			sequences     = [add_ins,mult_ins],
			outputs_info  = [h0],
			non_sequences = [Wf,fW,b]
		)
	return hidden
Ejemplo n.º 32
0
    def __init__(self, nb_filters, stack_size, filter_height, filter_width, border_mode, stride, name):
        """
        Construct a convolutional layer.
        """
        self.nb_filters = nb_filters
        self.stack_size = stack_size
        self.filter_height = filter_height
        self.filter_width = filter_width
        self.border_mode = border_mode
        self.filter_shape = (nb_filters, stack_size, filter_height, filter_width)
        self.stride = stride
        self.name = name

        fan_in = stack_size * filter_height * filter_width       # number of inputs to each hidden unit
        fan_out = ((nb_filters * filter_height * filter_width))  # each unit in the lower layer receives a gradient from
        drange = np.sqrt(6. / (fan_in + fan_out))                # initialize filters with random values

        self.filters = create_shared(drange * random_weights(self.filter_shape), name + '__filters')
        self.bias = create_shared(np.ones((nb_filters,)) * 0.1, name + '__bias')

        # parameters in the layer
        self.params = [self.filters, self.bias]
Ejemplo n.º 33
0
def make_hidden(hidden_size, add_ins, mult_ins, Wf, fW, b):
    h0 = U.create_shared(np.zeros(hidden_size))

    def step(add_in, mult_in, hidden_1, Wf, fW, b):
        mult_W = T.dot(Wf * mult_in, fW)
        hidden_score = add_in + T.dot(hidden_1, mult_W) + b
        return T.nnet.sigmoid(hidden_score)

    hidden, _ = theano.scan(step,
                            sequences=[add_ins, mult_ins],
                            outputs_info=[h0],
                            non_sequences=[Wf, fW, b])
    return hidden
Ejemplo n.º 34
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Update gate weights and bias
        self.w_z = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_z')
        self.u_z = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__u_z')
        self.b_z = create_shared(np.zeros((hidden_dim, )), name + '__b_z')

        # Reset gate weights and bias
        self.w_r = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_r')
        self.u_r = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__u_r')
        self.b_r = create_shared(np.zeros((hidden_dim, )), name + '__b_r')

        # New memory content weights and bias
        self.w_c = create_shared(random_weights((input_dim, hidden_dim)),
                                 name + '__w_c')
        self.u_c = create_shared(random_weights((hidden_dim, hidden_dim)),
                                 name + '__u_c')
        self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c')

        # Initialize the bias vector, h_0, to the zero vector
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        # Define parameters
        self.params = [
            self.w_z, self.u_z, self.b_z, self.w_r, self.u_r, self.b_r,
            self.w_c, self.u_c, self.b_c, self.h_0
        ]
Ejemplo n.º 35
0
def trainer(X, Y, alpha, lr, predictions, updates, data, labels):
    data = U.create_shared(data, dtype=np.int8)
    labels = U.create_shared(labels, dtype=np.int8)
    index_start = T.lscalar('start')
    index_end = T.lscalar('end')
    print "Compiling function..."
    train_model = theano.function(inputs=[index_start, index_end, alpha, lr],
                                  outputs=T.mean(
                                      T.neq(T.argmax(predictions, axis=1), Y)),
                                  updates=updates,
                                  givens={
                                      X: data[index_start:index_end],
                                      Y: labels[index_start:index_end]
                                  })
    test_model = theano.function(inputs=[index_start, index_end],
                                 outputs=T.mean(
                                     T.neq(T.argmax(predictions, axis=1), Y)),
                                 givens={
                                     X: data[index_start:index_end],
                                     Y: labels[index_start:index_end]
                                 })
    print "Done."
    return train_model, test_model
Ejemplo n.º 36
0
def build_network(input_size, hidden_size):
    srng = RandomStreams(seed=12345)

    X = T.fmatrix('X')
    W_input_to_hidden1 = U.create_shared(
        U.initial_weights(input_size, hidden_size))
    b_hidden1 = U.create_shared(U.initial_weights(hidden_size))
    W_hidden1_to_output = U.create_shared(U.initial_weights(hidden_size))
    b_output = U.create_shared(U.initial_weights(1)[0])

    def network(training):
        hidden1 = T.dot(X, W_input_to_hidden1) + b_hidden1
        hidden1 = hidden1 * (hidden1 > 0)
        if training:
            hidden1 = hidden1 * srng.binomial(size=(hidden_size, ), p=0.5)
        else:
            hidden1 = 0.5 * hidden1
        output = T.nnet.sigmoid(T.dot(hidden1, W_hidden1_to_output) + b_output)
        return output

    parameters = [W_input_to_hidden1, b_hidden1, W_hidden1_to_output, b_output]

    return X, network(True), network(False), parameters
Ejemplo n.º 37
0
def construct_network(context,characters,hidden,mult_hidden):
	print "Setting up memory..."
	X = T.bvector('X')
	Y = T.bvector('Y')
	alpha = T.cast(T.fscalar('alpha'),dtype=theano.config.floatX)
	lr    = T.cast(T.fscalar('lr'),   dtype=theano.config.floatX)
	
	print "Initialising weights..."
	W_char_hidden    = U.create_shared(U.initial_weights(characters,hidden))
	f_char_hidden    = U.create_shared(U.initial_weights(characters,mult_hidden))
	b_hidden         = U.create_shared(U.initial_weights(hidden))
	Wf_hidden        = U.create_shared(U.initial_weights(hidden,mult_hidden))
	fW_hidden        = U.create_shared(U.initial_weights(mult_hidden,hidden))
	W_hidden_predict = U.create_shared(U.initial_weights(hidden,characters))
	b_predict        = U.create_shared(U.initial_weights(characters))

	print "Constructing graph..."
	hidden = make_hidden(
			hidden,
			W_char_hidden[X],
			f_char_hidden[X],
			Wf_hidden,
			fW_hidden,
			b_hidden
		)
	predictions = T.nnet.softmax(T.dot(hidden,W_hidden_predict) + b_predict)
	weights = [
			W_char_hidden,
			f_char_hidden,
			b_hidden,
			Wf_hidden,
			fW_hidden,
			W_hidden_predict,
			b_predict
		]
	cost    = -T.mean(T.log(predictions)[T.arange(Y.shape[0]),Y])
	gparams =  T.grad(cost,weights)

	deltas  = [ U.create_shared(np.zeros(w.get_value().shape)) for w in weights ]
	updates = [
				( param, param - ( alpha * delta + gparam * lr ) )
					for param,delta,gparam in zip(weights,deltas,gparams)
			] + [
				( delta, alpha * delta + gparam * lr)
					for delta,gparam in zip(deltas,gparams)
			]
	return X,Y,alpha,lr,updates,predictions,weights
Ejemplo n.º 38
0
    def __init__(self, input_dim, output_dim, name='embedding_layer'):
        """
        Typically, input_dim is the vocabulary size,
        and output_dim the embedding dimension.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name

        # Randomly generate weights
        self.embeddings = create_shared(
            random_weights((input_dim, output_dim)),
            self.name + '__embeddings')

        # Define parameters
        self.params = [self.embeddings]
Ejemplo n.º 39
0
    def __init__(self, input_dim, output_dim, name='embedding_layer'):
        """
        Typically, input_dim is the vocabulary size,
        and output_dim the embedding dimension.
        """
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.name = name

        # Randomly generate weights
        self.embeddings = create_shared(
            random_weights((input_dim, output_dim)),
            self.name + '__embeddings'
        )

        # Define parameters
        self.params = [self.embeddings]
Ejemplo n.º 40
0
def build_network(input_size,hidden_size):
	X = T.dmatrix('X')
	W_input_to_hidden  = U.create_shared(U.initial_weights(input_size,hidden_size))
	W_hidden_to_hidden = U.create_shared(U.initial_weights(hidden_size,hidden_size))
	b_hidden = U.create_shared(U.initial_weights(hidden_size))
#	initial_hidden = U.create_shared(U.initial_weights(hidden_size))
	initial_hidden = U.create_shared(U.initial_weights(hidden_size))

#	W_hidden_to_hidden_reproduction = W_hidden_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,hidden_size))
	b_hidden_reproduction           = U.create_shared(U.initial_weights(hidden_size))
	W_hidden_to_input_reproduction  = W_input_to_hidden.T#U.create_shared(U.initial_weights(hidden_size,input_size))
	b_input_reproduction            = U.create_shared(U.initial_weights(input_size))
	parameters = [
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden,
			initial_hidden,
			b_hidden_reproduction,
			b_input_reproduction,
		]

	hidden, hidden1_reproduction, input_reproduction = make_rae(
			X,
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden,
			initial_hidden,
			b_hidden_reproduction,
			b_input_reproduction
		)

	unrolled = unroll(
			hidden[-1],
			W_input_to_hidden,
			W_hidden_to_hidden,
			b_hidden_reproduction,
			b_input_reproduction,
			hidden.shape[0]
		)

	return X,parameters,hidden,hidden1_reproduction,input_reproduction,unrolled
Ejemplo n.º 41
0
        row += 1
        prev += i
    return dense


def sparse_dot(l, prev, values, W):
    row_data = values[T.arange(prev, prev + l)]
    row_weights = W[row_data[:, 0]]
    sum_weights = T.sum(row_weights * row_data[:, 1].reshape((l, 1)), axis=0)
    return sum_weights, prev + l


if __name__ == "__main__":
    M = [[(1, 2), (5, 3), (10, 1)], [(0, 2), (3, 1)], [(2, 2), (8, 4)]]
    index = T.ivector('index')
    values = T.imatrix('values')
    prev = T.iscalar('prev')
    initial_weights = U.initial_weights(11, 3)
    W = U.create_shared(initial_weights)

    [output, _], updates = theano.scan(sparse_dot,
                                       sequences=index,
                                       outputs_info=[None, prev],
                                       non_sequences=[values, W])

    f = theano.function(inputs=[index, values, prev], outputs=output)

    ind, val = to_sparse_array(M)
    print ind, val
    print f(ind, val, 0)
Ejemplo n.º 42
0
import theano
import theano.tensor as T
import numpy as np
import utils as U
from numpy_hinton import print_arr
from theano.printing import Print

W1 = U.create_shared(U.initial_weights(10, 10))
W2 = U.create_shared(U.initial_weights(10, 10))
b = U.create_shared(U.initial_weights(10))
X = T.dmatrix('X')


def pair_combine(X):
    def step(i, inputs):
        length = inputs.shape[0]
        next_level = T.dot(inputs[T.arange(0, length - i - 1)], W1) + T.dot(
            inputs[T.arange(1, length - i)], W2) + b
        next_level = next_level * (next_level > 0)
        #next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)]
        #next_level = theano.printing.Print('inputs')(next_level)
        return T.concatenate(
            [next_level,
             T.zeros_like(inputs[:length - next_level.shape[0]])])

    combined, _ = theano.scan(step,
                              sequences=[T.arange(X.shape[0])],
                              outputs_info=[X],
                              n_steps=X.shape[0] - 1)
    return combined[-1, 0], combined[0][:-1]
Ejemplo n.º 43
0
	row_data = values[T.arange(prev,prev+l)]
	row_weights = W[row_data[:,0]]
	sum_weights = T.sum(row_weights*row_data[:,1].reshape((l,1)),axis=0)
	return sum_weights,prev+l



if __name__ == "__main__":
	M = [[(1,2),(5,3),(10,1)],
		 [(0,2),(3,1)],
		 [(2,2),(8,4)]]
	index  = T.ivector('index')
	values = T.imatrix('values')
	prev   = T.iscalar('prev')
	initial_weights = U.initial_weights(11,3)
	W = U.create_shared(initial_weights)

	[output,_],updates = theano.scan(
			sparse_dot,
			sequences     = index,
			outputs_info  = [None,prev],
			non_sequences = [values,W]
		)

	f = theano.function(
			inputs = [index,values,prev],
			outputs = output
		)

	ind,val = to_sparse_array(M)
	print ind,val
Ejemplo n.º 44
0
    def __init__(self, input_dim, hidden_dim, output_emb_dim, output_dim,
                 with_batch=True, name='LSTM'):
        """
        Initialize neural network.
          - input_dim: dimension of input vectors
          - hidden_dim: dimension of hidden vectors
          - output_emb_dim: dimension of output embeddings
          - output_dim: number of possible outputs
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_emb_dim = output_emb_dim
        self.output_dim = output_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi')
        self.w_yi = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf')
        self.w_yf = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho')
        self.w_yo = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yo')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc')
        self.w_yc = create_shared(random_weights((output_emb_dim, hidden_dim)), name + '__w_yc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim,)), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim,)), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim,)), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')
        # self.y_0 = create_shared(np.zeros((output_emb_dim,)), name + '__y_0')

        # Weights for projection to final output, and outputs embeddings
        self.embeddings = create_shared(random_weights((output_dim + 1, output_emb_dim)), name + '__embeddings')
        self.weights = create_shared(random_weights((hidden_dim, output_dim)), name + '__weights')
        self.bias = create_shared(random_weights((output_dim,)), name + '__bias')

        # Define parameters
        self.params = [self.w_xi, self.w_hi, self.w_yi, self.w_ci,
                       self.w_xf, self.w_hf, self.w_yf, self.w_cf,
                       self.w_xo, self.w_ho, self.w_yo, self.w_co,
                       self.w_xc, self.w_hc, self.w_yc,
                       self.b_i, self.b_c, self.b_o, self.b_f,
                       self.c_0, self.h_0,  # self.y_0,
                       self.embeddings, self.weights, self.bias]
Ejemplo n.º 45
0
    def link(self, input):
        """
        Propagate the input through the network and return the last hidden vector.
        The whole sequence is also accessible through self.h
        """
        def recurrence_strength(j, s_tm1_i, current_sum, _, d_t, u_t):
            s_t_i = T.maximum(0, s_tm1_i - T.maximum(0, u_t - current_sum))
            return current_sum + s_tm1_i, T.switch(T.eq(j, 0), d_t, s_t_i)

        def recurrence_read(s_t_i, v_t_i, current_sum, current_read):
            new_read = T.minimum(s_t_i, T.maximum(0, 1 - current_sum)) * v_t_i
            return current_sum + s_t_i, current_read + new_read

        def recurrence(i, x_t, r_tm1, h_tm1, strengths, values):

            updates = {}

            # Controller - compute O'_t'
            controller_input = T.concatenate([x_t, r_tm1, h_tm1])
            controller_output = T.tanh(T.dot(controller_input, self.w_xrh_hop) + self.b_xrh_hop)  # _TODO_ tanh?
            h_t = controller_output[:self.rnn_hidden_dim]
            op_t = controller_output[self.rnn_hidden_dim:]

            # Compute d_t (push signal), u_t (pop signal), v_t (value vector) and o_t (network output)
            d_t = T.nnet.sigmoid(T.dot(op_t, self.w_op_d) + self.b_op_d)[0]
            u_t = T.nnet.sigmoid(T.dot(op_t, self.w_op_u) + self.b_op_u)[0]
            v_t = T.tanh(T.dot(op_t, self.w_op_v) + self.b_op_v)
            o_t = T.tanh(T.dot(op_t, self.w_op_o) + self.b_op_o)

            # Add new value to the stack
            updates[values] = T.set_subtensor(values[i], v_t)

            # Compute new strength
            previous_strength = T.switch(T.eq(i, 0), [np.float32(0)], strengths[i - 1][:i])
            [_, new_strength], _ = theano.scan(
                fn=recurrence_strength,
                outputs_info=[np.float32(0), np.float32(0)],
                sequences=[T.arange(i + 1), T.concatenate([[np.float32(0)], previous_strength[::-1]])],
                non_sequences=[d_t, u_t]
            )
            new_strength = new_strength[::-1]
            updates[strengths] = T.set_subtensor(strengths[i, :i + 1], new_strength)

            # Compute new read vector
            [_, r_t], _ = theano.scan(
                fn=recurrence_read,
                outputs_info=[np.float32(0), np.zeros(self.values_dim).astype(np.float32)],
                sequences=[new_strength[:i + 1][::-1], T.concatenate([values[:i + 1], v_t.reshape((1, self.values_dim))], axis=0)[::-1]]
            )
            r_t = r_t[-1]

            return [r_t, h_t, o_t], updates

        # _TODO_ change the maxsize
        strengths = create_shared(np.zeros((100, 100)), 'strengths')
        values = create_shared(np.zeros((100, self.values_dim)), 'values')

        [r, h, o], updates = theano.scan(
            fn=recurrence,
            sequences=[T.arange(input.shape[0]), input],
            outputs_info=[self.r_0, self.h_0, None],
            non_sequences=[strengths, values]
        )

        return [r, h, o], updates
import theano
import math
import pickle
import theano.tensor as T
import numpy as np
import utils as U
from theano import sparse
from scipy.sparse import csr_matrix


def shared_sparse(arr):
    data = arr.data
    indices = arr.indices
    indptr = arr.indptr
    shape = np.array(arr.shape)
    return sparse.CSR(data, indices, indptr, shape)


if __name__ == "__main__":
    training_data = shared_sparse(csr_matrix(np.eye(100)))

    #training_labels = pickle.load(open('tags.train.data','r'))

    W = U.create_shared(U.initial_weights(71165, 26920))
    out = theano.dot(training_data, W)
    f = theano.function(inputs=[], outputs=out)
    print f()
Ejemplo n.º 47
0
import theano
import theano.tensor as T
import numpy         as np
import utils         as U
from numpy_hinton import print_arr
from theano.printing import Print

W1 = U.create_shared(U.initial_weights(10,10))
W2 = U.create_shared(U.initial_weights(10,10))
b  = U.create_shared(U.initial_weights(10))
X = T.dmatrix('X')
def pair_combine(X):
	def step(i,inputs):
		length = inputs.shape[0]
		next_level = T.dot(inputs[T.arange(0,length-i-1)],W1) + T.dot(inputs[T.arange(1,length-i)],W2) + b
		next_level = next_level*(next_level > 0)
		#next_level = inputs[T.arange(0,length-i-1)] + inputs[T.arange(1,length-i)]
		#next_level = theano.printing.Print('inputs')(next_level)
		return T.concatenate([next_level,T.zeros_like(inputs[:length-next_level.shape[0]])])
	combined,_ = theano.scan(
			step,
			sequences    = [T.arange(X.shape[0])],
			outputs_info = [X],
			n_steps = X.shape[0]-1
		)
	return combined[-1,0], combined[0][:-1]
combined, pairwise = pair_combine(X)
f = theano.function(
		inputs = [X],
		outputs = [combined,pairwise]
	)
Ejemplo n.º 48
0
	def __init__(self,size):
		super(Recurrent,self).__init__(size)
		self.W = U.create_shared(U.initial_weights(size,size))
		self.h0 = U.create_shared(np.zeros((size,)))
		self.updates = [self.W]
Ejemplo n.º 49
0
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 output_emb_dim,
                 output_dim,
                 with_batch=True,
                 name='LSTM'):
        """
        Initialize neural network.
          - input_dim: dimension of input vectors
          - hidden_dim: dimension of hidden vectors
          - output_emb_dim: dimension of output embeddings
          - output_dim: number of possible outputs
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_emb_dim = output_emb_dim
        self.output_dim = output_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hi')
        self.w_yi = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hf')
        self.w_yf = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ho')
        self.w_yo = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yo')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hc')
        self.w_yc = create_shared(random_weights((output_emb_dim, hidden_dim)),
                                  name + '__w_yc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim, )), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim, )), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim, )), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')
        # self.y_0 = create_shared(np.zeros((output_emb_dim,)), name + '__y_0')

        # Weights for projection to final output, and outputs embeddings
        self.embeddings = create_shared(
            random_weights((output_dim + 1, output_emb_dim)),
            name + '__embeddings')
        self.weights = create_shared(random_weights((hidden_dim, output_dim)),
                                     name + '__weights')
        self.bias = create_shared(random_weights((output_dim, )),
                                  name + '__bias')

        # Define parameters
        self.params = [
            self.w_xi,
            self.w_hi,
            self.w_yi,
            self.w_ci,
            self.w_xf,
            self.w_hf,
            self.w_yf,
            self.w_cf,
            self.w_xo,
            self.w_ho,
            self.w_yo,
            self.w_co,
            self.w_xc,
            self.w_hc,
            self.w_yc,
            self.b_i,
            self.b_c,
            self.b_o,
            self.b_f,
            self.c_0,
            self.h_0,  # self.y_0,
            self.embeddings,
            self.weights,
            self.bias
        ]
Ejemplo n.º 50
0
    def __init__(self,
                 input_dim,
                 rnn_hidden_dim,
                 rnn_output_dim,
                 values_dim,
                 output_dim,
                 name='stack'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.rnn_hidden_dim = rnn_hidden_dim
        self.rnn_output_dim = rnn_output_dim
        self.values_dim = values_dim
        self.output_dim = output_dim
        self.name = name

        # Generate weights and bias to compute the push scalar (d_t), the pop scalar (u_t),
        # the value vector (v_t), and the network output (o_t)
        # Weights
        self.w_op_d = create_shared(random_weights((rnn_output_dim, 1)),
                                    name + '__w_op_d')
        self.w_op_u = create_shared(random_weights((rnn_output_dim, 1)),
                                    name + '__w_op_u')
        self.w_op_v = create_shared(
            random_weights((rnn_output_dim, values_dim)), name + '__w_op_v')
        self.w_op_o = create_shared(
            random_weights((rnn_output_dim, output_dim)), name + '__w_op_o')
        # Bias
        self.b_op_d = create_shared(np.zeros((1, )), name + '__b_op_d')
        self.b_op_u = create_shared(np.zeros((1, )), name + '__b_op_u')
        self.b_op_v = create_shared(np.zeros((values_dim, )),
                                    name + '__b_op_v')
        self.b_op_o = create_shared(np.zeros((output_dim, )),
                                    name + '__b_op_o')

        # RNN Controller weights
        self.w_xrh_hop = create_shared(
            random_weights((input_dim + values_dim + rnn_hidden_dim,
                            rnn_hidden_dim + rnn_output_dim)),
            name + '__w_xrh_hop')
        self.b_xrh_hop = create_shared(
            np.zeros((rnn_hidden_dim + rnn_output_dim, )),
            name + '__b_xrh_hop')

        # Initial hidden states H_0 - H_t = (h_t, r_t, (v_t, s_t))
        self.h_0 = create_shared(np.zeros((rnn_hidden_dim, )), name + '__h_0')
        self.r_0 = create_shared(np.zeros((values_dim, )), name + '__r_0')
        # self.v_0 = create_shared(np.zeros((values_dim,)), name + '__v_0')
        # self.s_0 = create_shared(np.zeros((1,)), name + '__s_0')

        # Define parameters
        self.params = [
            self.w_op_d, self.w_op_u, self.w_op_v, self.w_op_o, self.b_op_d,
            self.b_op_u, self.b_op_v, self.b_op_o, self.w_xrh_hop,
            self.b_xrh_hop, self.h_0
        ]  # _TODO_ check this (why not put r_0, s_0, v_0)
Ejemplo n.º 51
0
    def link(self, input):
        """
        Propagate the input through the network and return the last hidden vector.
        The whole sequence is also accessible through self.h
        """
        def recurrence_strength(j, s_tm1_i, current_sum, _, d_t, u_t):
            s_t_i = T.maximum(0, s_tm1_i - T.maximum(0, u_t - current_sum))
            return current_sum + s_tm1_i, T.switch(T.eq(j, 0), d_t, s_t_i)

        def recurrence_read(s_t_i, v_t_i, current_sum, current_read):
            new_read = T.minimum(s_t_i, T.maximum(0, 1 - current_sum)) * v_t_i
            return current_sum + s_t_i, current_read + new_read

        def recurrence(i, x_t, r_tm1, h_tm1, strengths, values):

            updates = {}

            # Controller - compute O'_t'
            controller_input = T.concatenate([x_t, r_tm1, h_tm1])
            controller_output = T.tanh(
                T.dot(controller_input, self.w_xrh_hop) +
                self.b_xrh_hop)  # _TODO_ tanh?
            h_t = controller_output[:self.rnn_hidden_dim]
            op_t = controller_output[self.rnn_hidden_dim:]

            # Compute d_t (push signal), u_t (pop signal), v_t (value vector) and o_t (network output)
            d_t = T.nnet.sigmoid(T.dot(op_t, self.w_op_d) + self.b_op_d)[0]
            u_t = T.nnet.sigmoid(T.dot(op_t, self.w_op_u) + self.b_op_u)[0]
            v_t = T.tanh(T.dot(op_t, self.w_op_v) + self.b_op_v)
            o_t = T.tanh(T.dot(op_t, self.w_op_o) + self.b_op_o)

            # Add new value to the stack
            updates[values] = T.set_subtensor(values[i], v_t)

            # Compute new strength
            previous_strength = T.switch(T.eq(i, 0), [np.float32(0)],
                                         strengths[i - 1][:i])
            [_, new_strength
             ], _ = theano.scan(fn=recurrence_strength,
                                outputs_info=[np.float32(0),
                                              np.float32(0)],
                                sequences=[
                                    T.arange(i + 1),
                                    T.concatenate([[np.float32(0)],
                                                   previous_strength[::-1]])
                                ],
                                non_sequences=[d_t, u_t])
            new_strength = new_strength[::-1]
            updates[strengths] = T.set_subtensor(strengths[i, :i + 1],
                                                 new_strength)

            # Compute new read vector
            [_, r_t], _ = theano.scan(
                fn=recurrence_read,
                outputs_info=[
                    np.float32(0),
                    np.zeros(self.values_dim).astype(np.float32)
                ],
                sequences=[
                    new_strength[:i + 1][::-1],
                    T.concatenate(
                        [values[:i + 1],
                         v_t.reshape((1, self.values_dim))],
                        axis=0)[::-1]
                ])
            r_t = r_t[-1]

            return [r_t, h_t, o_t], updates

        # _TODO_ change the maxsize
        strengths = create_shared(np.zeros((100, 100)), 'strengths')
        values = create_shared(np.zeros((100, self.values_dim)), 'values')

        [r, h,
         o], updates = theano.scan(fn=recurrence,
                                   sequences=[T.arange(input.shape[0]), input],
                                   outputs_info=[self.r_0, self.h_0, None],
                                   non_sequences=[strengths, values])

        return [r, h, o], updates
Ejemplo n.º 52
0
			outputs = T.mean(T.neq(T.argmax(predictions, axis=1), Y)),
			updates = updates,
			givens  = {
				X: data,
				Y: labels,
			}
		)
	return train_model



if __name__ == '__main__':
	print "Setting up memory..."
	X = T.bmatrix('X')
	Y = T.bvector('Y')
	Ws_char_to_hidden   = [ U.create_shared(U.initial_weights(CHARACTERS,HIDDEN),name='yeah%d'%i) for i in xrange(CONTEXT) ]
	b_hidden            = U.create_shared(U.initial_weights(HIDDEN))
	W_hidden_to_hidden  = U.create_shared(U.initial_weights(HIDDEN,HIDDEN))
	W_hidden_to_predict = U.create_shared(U.initial_weights(HIDDEN,CHARACTERS))
	b_predict           = U.create_shared(U.initial_weights(CHARACTERS))
	tunables = Ws_char_to_hidden + [
			b_hidden, 
			W_hidden_to_hidden,
			W_hidden_to_predict,
			b_predict
		]

	print "Constructing graph..."
	hidden_inputs  = make_hidden_inputs(X,Ws_char_to_hidden,b_hidden)
	hidden_outputs = make_hidden_outputs(hidden_inputs,W_hidden_to_hidden)
	predictions    = make_predictions(hidden_outputs,W_hidden_to_predict,b_predict)
Ejemplo n.º 53
0
                    type=int,
                    action="store",
                    help="number of parallel jobs")
args = parser.parse_args()

student_name = args.user
rg_name = utils.get_student_resource_group(student_name)
storage_account = utils.get_student_storage_account(student_name)
region = utils.get_student_region(student_name)
vm_size = "Standard_E4_v3"

RESIZE_OS_DISK = False
OS_DISK_SIZE = 511

if args.create_shared:
    utils.create_shared(rg_name, region)


def create_cluster_node(idx, user_pass):
    IP_NAME = "ip_cluster{0}".format(idx)
    NIC_NAME = "nic_cluster{0}".format(idx)
    INT_DNS_NAME = "cluster{0}".format(idx)
    OS_DISK_NAME = "cluster{0}_os_disk".format(idx)
    VM_NAME = INT_DNS_NAME
    IP = "10.0.1.2{0}".format(idx)

    if idx != 1:
        IP_NAME = None

    if args.create_aux:
        # create public IP
Ejemplo n.º 54
0
    def __init__(self, input_dim, rnn_hidden_dim, rnn_output_dim, values_dim, output_dim, name='stack'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.rnn_hidden_dim = rnn_hidden_dim
        self.rnn_output_dim = rnn_output_dim
        self.values_dim = values_dim
        self.output_dim = output_dim
        self.name = name

        # Generate weights and bias to compute the push scalar (d_t), the pop scalar (u_t),
        # the value vector (v_t), and the network output (o_t)
        # Weights
        self.w_op_d = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_d')
        self.w_op_u = create_shared(random_weights((rnn_output_dim, 1)), name + '__w_op_u')
        self.w_op_v = create_shared(random_weights((rnn_output_dim, values_dim)), name + '__w_op_v')
        self.w_op_o = create_shared(random_weights((rnn_output_dim, output_dim)), name + '__w_op_o')
        # Bias
        self.b_op_d = create_shared(np.zeros((1,)), name + '__b_op_d')
        self.b_op_u = create_shared(np.zeros((1,)), name + '__b_op_u')
        self.b_op_v = create_shared(np.zeros((values_dim,)), name + '__b_op_v')
        self.b_op_o = create_shared(np.zeros((output_dim,)), name + '__b_op_o')

        # RNN Controller weights
        self.w_xrh_hop = create_shared(random_weights((input_dim + values_dim + rnn_hidden_dim, rnn_hidden_dim + rnn_output_dim)), name + '__w_xrh_hop')
        self.b_xrh_hop = create_shared(np.zeros((rnn_hidden_dim + rnn_output_dim,)), name + '__b_xrh_hop')

        # Initial hidden states H_0 - H_t = (h_t, r_t, (v_t, s_t))
        self.h_0 = create_shared(np.zeros((rnn_hidden_dim,)), name + '__h_0')
        self.r_0 = create_shared(np.zeros((values_dim,)), name + '__r_0')
        # self.v_0 = create_shared(np.zeros((values_dim,)), name + '__v_0')
        # self.s_0 = create_shared(np.zeros((1,)), name + '__s_0')

        # Define parameters
        self.params = [
            self.w_op_d, self.w_op_u, self.w_op_v, self.w_op_o,
            self.b_op_d, self.b_op_u, self.b_op_v, self.b_op_o,
            self.w_xrh_hop, self.b_xrh_hop,
            self.h_0
        ] # _TODO_ check this (why not put r_0, s_0, v_0)
Ejemplo n.º 55
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_ho')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)),
                                  name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)),
                                  name + '__w_hc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim, )), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim, )), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim, )), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim, )), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim, )), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim, )), name + '__h_0')

        # Define parameters
        self.params = [
            self.w_xi,
            self.w_hi,  # self.w_ci,
            self.w_xf,
            self.w_hf,  # self.w_cf,
            self.w_xo,
            self.w_ho,  # self.w_co,
            self.w_xc,
            self.w_hc,
            self.b_i,
            self.b_c,
            self.b_o,
            self.b_f,
            self.c_0,
            self.h_0
        ]
Ejemplo n.º 56
0
    def __init__(self, input_dim, hidden_dim, with_batch=True, name='LSTM'):
        """
        Initialize neural network.
        """
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.with_batch = with_batch
        self.name = name

        # Input gate weights
        self.w_xi = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xi')
        self.w_hi = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hi')
        self.w_ci = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ci')

        # Forget gate weights
        self.w_xf = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xf')
        self.w_hf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hf')
        self.w_cf = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_cf')

        # Output gate weights
        self.w_xo = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xo')
        self.w_ho = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_ho')
        self.w_co = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_co')

        # Cell weights
        self.w_xc = create_shared(random_weights((input_dim, hidden_dim)), name + '__w_xc')
        self.w_hc = create_shared(random_weights((hidden_dim, hidden_dim)), name + '__w_hc')

        # Initialize the bias vectors, c_0 and h_0 to zero vectors
        self.b_i = create_shared(np.zeros((hidden_dim,)), name + '__b_i')
        self.b_f = create_shared(np.zeros((hidden_dim,)), name + '__b_f')
        self.b_c = create_shared(np.zeros((hidden_dim,)), name + '__b_c')
        self.b_o = create_shared(np.zeros((hidden_dim,)), name + '__b_o')
        self.c_0 = create_shared(np.zeros((hidden_dim,)), name + '__c_0')
        self.h_0 = create_shared(np.zeros((hidden_dim,)), name + '__h_0')

        # Define parameters
        self.params = [self.w_xi, self.w_hi,  # self.w_ci,
                       self.w_xf, self.w_hf,  # self.w_cf,
                       self.w_xo, self.w_ho,  # self.w_co,
                       self.w_xc, self.w_hc,
                       self.b_i, self.b_c, self.b_o, self.b_f,
                       self.c_0, self.h_0]
Ejemplo n.º 57
0
parser = argparse.ArgumentParser()
parser.add_argument("--create_shared",
                    action="store_true",
                    help="create shared resources")
parser.add_argument("--create_aux",
                    action="store_true",
                    help="create aux resources, only once per script run")
args = parser.parse_args()

VM_SIZE = "Standard_NC6"

RESIZE_OS_DISK = False
OS_DISK_SIZE = 1023

if args.create_shared:
    utils.create_shared(RG_NAME, REGION, VNET_NAME, NSG_NAME, SUBNET_NAME)

IP_NAME = "ip_ubuntugpu"
NIC_NAME = "nic_ubuntugpu"
INT_DNS_NAME = UBUNTUGPU_VM
OS_DISK_NAME = "ubuntugpu_os_disk"
IP = "10.0.1.10"

if args.create_aux:
    # create public IP
    utils.create_public_ip(IP_NAME, RG_NAME)

    # Create network card with fixed private IP
    utils.create_nic_with_private_ip(NIC_NAME, RG_NAME, VNET_NAME, SUBNET_NAME,
                                     NSG_NAME, IP_NAME, INT_DNS_NAME, IP)