def linearRegression_2(inputs, outputs): """ Computers the least squares estimator (LSE) B_hat that minimises the sum of the squared errors. Computes B_hat as B_hat = X^+ . y with X^+ the pseudoinverse of matrix X. http://en.wikipedia.org/wiki/Moore-Penrose_pseudoinverse In: inputs: Matrix of inputs (X) (nxp matrix) format: [[observation_1], ..., [observation_n]] outputs: Column vector (Matrix) of outputs y (nx1 matrix) format: [[y_1], ... , [y_n]] Out: B_hat: Column vector (Matrix) of fitted slopes (px1 matrix) format: [[b_0], ... , [b_{p-1}]] """ X = T.dmatrix('X') y = T.dcol('y') # http://deeplearning.net/software/theano/library/sandbox/linalg.html # MatrixPinv is the class. # pinv is the method based upon the MatrixPinv class. # B_hat = X^+ . y B_hat = T.dot(linOps.pinv(X),y) lse = function([X, y], B_hat) b = lse(inputs, outputs) return b
def linearRegression_1(inputs, outputs): """ Computers the least squares estimator (LSE) B_hat that minimises the sum of the squared errors. Computes B_hat as B_hat = (X.T . X)^-1 . X.T . y -> Ordinarly Least Squares (OLS) http://en.wikipedia.org/wiki/Ordinary_least_squares In: inputs: Matrix of inputs (X) (nxp matrix) format: [[observation_1], ..., [observation_n]] outputs: Column vector (Matrix) of outputs y format: [[y_1], ... , [y_n]] Out: B_hat: Column vector (Matrix) of fitted slopes format: [[b_0], ... , [b_{p-1}]] """ X = T.dmatrix('X') y = T.dcol('y') # B_hat = (X.T . X)^-1 . X.T . y # http://deeplearning.net/software/theano/library/sandbox/linalg.html # MatrixInverse is the class. # matrix_inverse is the method base upon the MatrixInverse class. B_hat = T.dot(T.dot(linOps.matrix_inverse(T.dot(X.T, X)),X.T),y) lse = function([X, y], B_hat) b = lse(inputs, outputs) return b
def linearRegression_3(inputs, outputs): """ Computers the least squares estimator (LSE) B_hat that minimises the sum of the squared errors. Computes B_hat with the use of gradient descent: http://en.wikipedia.org/wiki/Gradient_descent In: inputs: Matrix of inputs (X) (nxp matrix) format: [[observation_1], ..., [observation_n]] outputs: Column vector (Matrix) of outputs y (nx1 matrix) format: [[y_1], ... , [y_n]] Out: B_hat: Column vector (Matrix) of fitted slopes (px1 matrix) format: [[b_0], ... , [b_{p-1}]] """ X = T.dmatrix('X') # inputs z = T.dcol('y') # targets B_hat = T.dcol('B_hat') # parameter estimates # define the cost function y = T.dot(X, B_hat) # outputs err = (z - y) # error function cost = (err ** 2).sum() # the cost to minimize # Gradient expression. cost_B_grad = T.grad(cost, [B_hat]) # Compile cost function and its gradient. cost_fun = function([B_hat, X, z], cost) # can be used to debug or early stopping cost_grad_fun = function([B_hat, X, z], cost_B_grad) # initialise parameter estimates b = np.random.randn(inputs.shape[1],1) # Gradient descent nb_of_iterations = 500; step_size = 0.001; for i in range(nb_of_iterations): cost_grad_res = cost_grad_fun(b, inputs, outputs) # cost_grad_res is a list of arrays cost_grad_res = cost_grad_res[0] # update the parameter estimates according to the first order gradients for j in range(b.shape[0]): b[j] -= cost_grad_res[j][0] * step_size return b
def __init__(self, numpy_rng, theano_rng = None, first_layer_type = 'bernoulli', mean_doc_size = 1, n_ins = 784, mid_layer_sizes=[200], inner_code_length = 10): """This class is made to support a variable number of layers. :type numpy_rng: numpy.random.RandomState :param numpy_rng: numpy random number generator used to draw initial weights :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams :param theano_rng: Theano random generator; if None is given one is generated based on a seed drawn from `rng` :type n_ins: int :param n_ins: dimension of the input (and autoencoder output, y) of the SMH :type n_code_length: int :param n_code_length: how many codes to squash down to in the middle layer """ self.first_layer_type = first_layer_type; self.mean_doc_size = mean_doc_size; self.sigmoid_layers = [] self.rbm_layers = [] self.params = [] self.n_ins = n_ins self.inner_code_length = inner_code_length self.mid_layer_sizes = list(mid_layer_sizes) self.numpy_rng = numpy_rng self.theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data if (theano.config.floatX == "float32"): self.x = T.matrix('x') # self.x_sums = T.col('x_sums') self.y = T.matrix('y') # the output (after finetuning) should /look the same as the input else: if (theano.config.floatX == "float64"): self.x = T.dmatrix('x') # self.x_sums = T.dcol('x_sums') self.y = T.dmatrix('y') # the output (after finetuning) should look the same as the input else: raise Exception #not sure whats up here.. # The SMH is an MLP, for which all weights of intermediate layers are shared with a # different RBM. We will first construct the SMH as a deep multilayer perceptron, and # when constructing each sigmoidal layer we also construct an RBM that shares weights # with that layer. During pretraining we will train these RBMs (which will lead # to chainging the weights of the MLP as well) During finetuning we will finish # training the SMH by doing stochastic gradient descent on the MLP. self.init_layers()
def test_broadcast_arguments(self): rng_R = random_state_type() low = tensor.dvector() high = tensor.dcol() post_r, out = uniform(rng_R, low=low, high=high) assert out.ndim == 2 f = compile.function([rng_R, low, high], [post_r, out], accept_inplace=True) rng_state0 = numpy.random.RandomState(utt.fetch_seed()) numpy_rng = numpy.random.RandomState(utt.fetch_seed()) post0, val0 = f(rng_state0, [-5, 0.5, 0, 1], [[1.0]]) post1, val1 = f(post0, [0.9], [[1.0], [1.1], [1.5]]) post2, val2 = f(post1, [-5, 0.5, 0, 1], [[1.0], [1.1], [1.5]]) numpy_val0 = numpy_rng.uniform(low=[-5, 0.5, 0, 1], high=[1.0]) numpy_val1 = numpy_rng.uniform(low=[0.9], high=[[1.0], [1.1], [1.5]]) numpy_val2 = numpy_rng.uniform(low=[-5, 0.5, 0, 1], high=[[1.0], [1.1], [1.5]]) assert numpy.all(val0 == numpy_val0), (val0, numpy_val0) assert numpy.all(val1 == numpy_val1) assert numpy.all(val2 == numpy_val2)
def test_broadcast_arguments(self): random = RandomStreams(utt.fetch_seed()) low = tensor.dvector() high = tensor.dcol() out = random.uniform(low=low, high=high) assert out.ndim == 2 f = function([low, high], out) rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30) numpy_rng = numpy.random.RandomState(int(rng_seed)) val0 = f([-5, .5, 0, 1], [[1.]]) val1 = f([.9], [[1.], [1.1], [1.5]]) val2 = f([-5, .5, 0, 1], [[1.], [1.1], [1.5]]) numpy_val0 = numpy_rng.uniform(low=[-5, .5, 0, 1], high=[1.]) numpy_val1 = numpy_rng.uniform(low=[.9], high=[[1.], [1.1], [1.5]]) numpy_val2 = numpy_rng.uniform(low=[-5, .5, 0, 1], high=[[1.], [1.1], [1.5]]) assert numpy.all(val0 == numpy_val0) assert numpy.all(val1 == numpy_val1) assert numpy.all(val2 == numpy_val2)
def test_broadcast_arguments(self): rng_R = random_state_type() low = tensor.dvector() high = tensor.dcol() post_r, out = uniform(rng_R, low=low, high=high) assert out.ndim == 2 f = compile.function([rng_R, low, high], [post_r, out], accept_inplace=True) rng_state0 = np.random.RandomState(utt.fetch_seed()) numpy_rng = np.random.RandomState(utt.fetch_seed()) post0, val0 = f(rng_state0, [-5, 0.5, 0, 1], [[1.0]]) post1, val1 = f(post0, [0.9], [[1.0], [1.1], [1.5]]) post2, val2 = f(post1, [-5, 0.5, 0, 1], [[1.0], [1.1], [1.5]]) numpy_val0 = numpy_rng.uniform(low=[-5, 0.5, 0, 1], high=[1.0]) numpy_val1 = numpy_rng.uniform(low=[0.9], high=[[1.0], [1.1], [1.5]]) numpy_val2 = numpy_rng.uniform(low=[-5, 0.5, 0, 1], high=[[1.0], [1.1], [1.5]]) assert np.all(val0 == numpy_val0), (val0, numpy_val0) assert np.all(val1 == numpy_val1) assert np.all(val2 == numpy_val2)
import numpy as np import theano import theano.tensor as T error = T.dcol() q = T.minimum(abs(error), 1.0) l = abs(error) - q loss = T.sum(0.5 * q**2 + l) d = theano.grad(loss, error) f = theano.function([error], d) e = np.arange(-2, 2.01, 0.25).reshape(-1, 1) # print e print f(e)
def test_infer_shape(self): rng_R = random_state_type() rng_R_val = numpy.random.RandomState(utt.fetch_seed()) # no shape specified, default args post_r, out = uniform(rng_R) self._compile_and_check([rng_R], [out], [rng_R_val], RandomFunction) post_r, out = uniform(rng_R, size=None, ndim=2) self._compile_and_check([rng_R], [out], [rng_R_val], RandomFunction) """ #infer_shape don't work for multinomial. #The parameter ndim_added is set to 1 and in this case, the infer_shape #inplementation don't know how to infer the shape post_r, out = multinomial(rng_R) self._compile_and_check([rng_R], [out], [rng_R_val], RandomFunction) """ # no shape specified, args have to be broadcasted low = tensor.TensorType(dtype='float64', broadcastable=(False, True, True))() high = tensor.TensorType(dtype='float64', broadcastable=(True, True, True, False))() post_r, out = uniform(rng_R, size=None, ndim=2, low=low, high=high) low_val = [[[3]], [[4]], [[-5]]] high_val = [[[[5, 8]]]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # multinomial, specified shape """ #infer_shape don't work for multinomial n = iscalar() pvals = dvector() size_val = (7, 3) n_val = 6 pvals_val = [0.2] * 5 post_r, out = multinomial(rng_R, size=size_val, n=n, pvals=pvals, ndim=2) self._compile_and_check([rng_R, n, pvals], [out], [rng_R_val, n_val, pvals_val], RandomFunction) """ # uniform vector low and high low = dvector() high = dvector() post_r, out = uniform(rng_R, low=low, high=1) low_val = [-5, .5, 0, 1] self._compile_and_check([rng_R, low], [out], [rng_R_val, low_val], RandomFunction) low_val = [.9] self._compile_and_check([rng_R, low], [out], [rng_R_val, low_val], RandomFunction) post_r, out = uniform(rng_R, low=low, high=high) low_val = [-4., -2] high_val = [-1, 0] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) low_val = [-4.] high_val = [-1] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # uniform broadcasting low and high low = dvector() high = dcol() post_r, out = uniform(rng_R, low=low, high=high) low_val = [-5, .5, 0, 1] high_val = [[1.]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) low_val = [.9] high_val = [[1.], [1.1], [1.5]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) low_val = [-5, .5, 0, 1] high_val = [[1.], [1.1], [1.5]] self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # uniform with vector slice low = dvector() high = dvector() post_r, out = uniform(rng_R, low=low, high=high) low_val = [.1, .2, .3] high_val = [1.1, 2.2, 3.3] size_val = (3, ) self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val[:-1], high_val[:-1]], RandomFunction) # uniform with explicit size and size implicit in parameters # NOTE 1: Would it be desirable that size could also be supplied # as a Theano variable? post_r, out = uniform(rng_R, size=size_val, low=low, high=high) self._compile_and_check([rng_R, low, high], [out], [rng_R_val, low_val, high_val], RandomFunction) # binomial with vector slice n = ivector() prob = dvector() post_r, out = binomial(rng_R, n=n, p=prob) n_val = [1, 2, 3] prob_val = [.1, .2, .3] size_val = (3, ) self._compile_and_check([rng_R, n, prob], [out], [rng_R_val, n_val[:-1], prob_val[:-1]], RandomFunction) # binomial with explicit size and size implicit in parameters # cf. NOTE 1 post_r, out = binomial(rng_R, n=n, p=prob, size=size_val) self._compile_and_check([rng_R, n, prob], [out], [rng_R_val, n_val, prob_val], RandomFunction) # normal with vector slice avg = dvector() std = dvector() post_r, out = normal(rng_R, avg=avg, std=std) avg_val = [1, 2, 3] std_val = [.1, .2, .3] size_val = (3, ) self._compile_and_check([rng_R, avg, std], [out], [rng_R_val, avg_val[:-1], std_val[:-1]], RandomFunction) # normal with explicit size and size implicit in parameters # cf. NOTE 1 post_r, out = normal(rng_R, avg=avg, std=std, size=size_val) self._compile_and_check([rng_R, avg, std], [out], [rng_R_val, avg_val, std_val], RandomFunction) # multinomial with tensor-3 probabilities """
theano.shared(numpy.zeros(max_mults[i] * dimensions[i + 1], numpy.float64)) for i in range(num_conv) ] rs = rng.RandomState(1234) mask_rng = theano.tensor.shared_randomstreams.RandomStreams(rs.randint(999999)) discrimins = theano.shared((rng.rand(dimensions[num_conv], 1) - 0.5) * 2.0) step_discrimins = theano.shared( numpy.zeros([dimensions[num_conv], 1], numpy.float64)) bias_discrimins = theano.shared(0.0) step_bias_discrimins = theano.shared(0.0) x = T.dmatrix('x') sources = T.dcol('sources') batch_size, num = T.shape(x) y = x.dimshuffle(0, 1, 'x') # batch_size, k, dim=1 y = T.unbroadcast(y, 2) ''' filt = filters[0]*0.0 + 1.0 num = num / window_sizes[0] source_test = y + sources.dimshuffle(0, 1, 'x')*1.0 - y; source_test = T.reshape(source_test[:, 0:num*window_sizes[0], :], [batch_size*num, window_sizes[0]*dimensions[0]]) source_test = T.dot(source_test, filt) source_test = T.reshape(source_test, [batch_size, num, dimensions[1], max_mults[0]]) source_test = source_test.max(3, True)
import numpy as np import theano import theano.tensor as T error = T.dcol() q = T.minimum(abs(error), 1.0) l = abs(error) - q loss = T.sum(0.5 * q ** 2 + l) d = theano.grad(loss, error) f = theano.function([error], d) e = np.arange(-2, 2.01, 0.25).reshape(-1, 1) # print e print f(e)
import numpy as np import theano.tensor as T from theano import * import matplotlib.pyplot as plt import sys import random P=T.dcol('P') Theta=T.drow('Theta') #p1=T.dscalar('p1') #p2=T.dscalar('p2') #p3=T.dscalar('p3') #P=p1,p2,p3 #theta1=T.dscalar('theta1') #theta2=T.dscalar('theta2') #theta3=T.dscalar('theta3') #Theta=theta1,theta2,theta3 n_s=3 n_a=2 phi=range(-90,100,10) n_phi=len(phi) G=np.zeros((n_a**2,2*n_a-1)) for k in range(n_a): G[k*n_a:(k+1)*n_a,(n_a-1-k):(2*n_a-1-k)]=np.eye(n_a) #B_phi=np.zeros((n_phi*n_a**2,n_s),dtype=complex)
import theano import theano.tensor as T X = T.dmatrix('X') y = T.dcol('y') def Layer(X, n_in, n_out): w = theano.shared(np.random.randn(n_out, n_in), name='w') b = theano.shared(np.random.randn(1, n_out), name='b', broadcastable=(True, False)) Xw = T.dot(X, T.transpose(w)) + b sigmoid = 1 / (1 + T.exp(-Xw)) return sigmoid, w, b hid1, w_h, b_h = Layer(X, 2, 2) out, w_o, b_o = Layer(hid1, 2, 1) cost = T.sum((y - out)**2) updates = [] updates.append((w_o, w_o - 0.1 * T.grad(cost, w_o))) updates.append((b_o, b_o - 0.1 * T.grad(cost, b_o))) updates.append((w_h, w_h - 0.1 * T.grad(cost, w_h))) updates.append((b_h, b_h - 0.1 * T.grad(cost, b_h))) train = theano.function([X, y], [cost],
import theano import theano.tensor as T X = T.dmatrix('X') y = T.dcol('y') def Layer(X, n_in, n_out): w = theano.shared(np.random.randn(n_out,n_in), name='w') b = theano.shared(np.random.randn(1,n_out), name='b', broadcastable=(True,False)) Xw = T.dot(X,T.transpose(w)) + b; sigmoid = 1/(1 + T.exp(-Xw)); return sigmoid, w,b hid1, w_h, b_h = Layer(X, 2,2) out, w_o, b_o = Layer(hid1, 2,1) cost = T.sum((y - out) ** 2) updates = [] updates.append((w_o, w_o - 0.1 * T.grad(cost,w_o) )) updates.append((b_o, b_o - 0.1 * T.grad(cost,b_o) )) updates.append((w_h, w_h - 0.1 * T.grad(cost,w_h) )) updates.append((b_h, b_h - 0.1 * T.grad(cost,b_h) )) train = theano.function([X,y], [cost], updates= updates, allow_input_downcast=True) pred = theano.function([X], [out], allow_input_downcast=True)