Example #1
0
 def grad(self, input, output_gradients):
     A, b = inputs
     c = self(A, b)
     c_bar = output_gradients[0]
     trans_solve_op = SpSolve()
     b_bar = trans_solve_op(ts.transpose(A), c_bar)
     A_bar = -ts.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
     return [A_bar, b_bar]
Example #2
0
    def test_transpose_csr(self):
        a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5, 3)))
        self.assertTrue(a.data.shape == (5, 3))
        self.assertTrue(a.type.dtype == "float64")
        self.assertTrue(a.type.format == "csr")
        ta = transpose(a)
        self.assertTrue(ta.type.dtype == "float64", ta.type.dtype)
        self.assertTrue(ta.type.format == "csc", ta.type.format)

        vta = eval_outputs([ta])
        self.assertTrue(vta.shape == (3, 5))
Example #3
0
    def test_transpose_csr(self):
        a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5, 3)))
        self.assertTrue(a.data.shape == (5, 3))
        self.assertTrue(a.type.dtype == 'float64')
        self.assertTrue(a.type.format == 'csr')
        ta = transpose(a)
        self.assertTrue(ta.type.dtype == 'float64', ta.type.dtype)
        self.assertTrue(ta.type.format == 'csc', ta.type.format)

        vta = eval_outputs([ta])
        self.assertTrue(vta.shape == (3, 5))
Example #4
0
    def test_transpose_csc(self):
        sp = scipy.sparse.csc_matrix(scipy.sparse.eye(5, 3))
        a = as_sparse_variable(sp)
        self.assertFalse(a.data is sp)
        self.assertTrue(a.data.shape == (5, 3))
        self.assertTrue(a.type.dtype == 'float64', a.type.dtype)
        self.assertTrue(a.type.format == 'csc', a.type.format)
        ta = transpose(a)
        self.assertTrue(ta.type.dtype == 'float64', ta.type.dtype)
        self.assertTrue(ta.type.format == 'csr', ta.type.format)

        vta = eval_outputs([ta])
        self.assertTrue(vta.shape == (3, 5))
Example #5
0
def true_dot(x, y, grad_preserves_dense=True):
    """
    @todo: Maybe the triple-transposition formulation (when x is dense)
    is slow. See if there is a direct way to do this.
    """
    if hasattr(x, 'getnnz'): x = as_sparse_variable(x)
    if hasattr(y, 'getnnz'): y = as_sparse_variable(y)

    x_is_sparse_variable = _is_sparse_variable(x)
    y_is_sparse_variable = _is_sparse_variable(y)
    if not x_is_sparse_variable and not y_is_sparse_variable:
        raise TypeError()
    if x_is_sparse_variable:
        return TrueDot(grad_preserves_dense)(x, y)
    else:
        assert y_is_sparse_variable
        return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
Example #6
0
def true_dot(x, y, grad_preserves_dense=True):
    """
    @todo: Maybe the triple-transposition formulation (when x is dense)
    is slow. See if there is a direct way to do this.
    """
    if hasattr(x, 'getnnz'): x = as_sparse_variable(x)
    if hasattr(y, 'getnnz'): y = as_sparse_variable(y)

    x_is_sparse_variable = _is_sparse_variable(x)
    y_is_sparse_variable = _is_sparse_variable(y)
    if not x_is_sparse_variable and not y_is_sparse_variable:
        raise TypeError()
    if x_is_sparse_variable:
        return TrueDot(grad_preserves_dense)(x, y)
    else:
        assert y_is_sparse_variable
        return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
Example #7
0
    def test_basicDS(self):
        for mtype in _mtypes:
            x = as_sparse_variable(mtype((500, 3)))
            x.data[(10, 1)] = 1
            x.data[(20, 2)] = 2
            self.assertTrue(_is_sparse_variable(x))

            y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]])
            self.assertTrue(_is_dense_variable(y))

            x.data = x.data.T
            y.data = y.data.T

            zop = true_dot(y, x)
            zop = transpose(true_dot(y, x))
            self.assertTrue(_is_sparse_variable(zop))
            z = eval_outputs([zop])
            self.assertTrue(_is_sparse(z))
            self.assertTrue(z.shape == (500, 2))
            #            self.assertTrue(type(z) is mtype)

            w = mtype((500, 2))
            w[(10, 0)] = 3.
            w[(20, 0)] = 4
            w[(10, 1)] = 4
            w[(20, 1)] = 2
            self.assertTrue(z.shape == w.shape)
            # Type should switch from csr to csc and vice-versa, so don't perform this test
            #self.assertTrue(type(z) == type(w))
            self.assertTrue(z.dtype == w.dtype)

            # Type should switch from csr to csc and vice-versa, so don't perform this test
            #self.assertTrue(z == w)
            self.assertTrue(abs(z - w).nnz == 0)

            z = z.todense()
            w = w.todense()
            self.assertTrue((z == w).all() == True)
Example #8
0
    def test_basicDS(self):
        for mtype in _mtypes:
            x = as_sparse_variable(mtype((500,3)))
            x.data[(10, 1)] = 1
            x.data[(20, 2)] = 2
            self.assertTrue(_is_sparse_variable(x))

            y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]])
            self.assertTrue(_is_dense_variable(y))

            x.data = x.data.T
            y.data = y.data.T

            zop = true_dot(y, x)
            zop = transpose(true_dot(y, x))
            self.assertTrue(_is_sparse_variable(zop))
            z = eval_outputs([zop])
            self.assertTrue(_is_sparse(z))
            self.assertTrue(z.shape == (500,2))
#            self.assertTrue(type(z) is mtype)

            w = mtype((500,2))
            w[(10, 0)] = 3.
            w[(20, 0)] = 4
            w[(10, 1)] = 4
            w[(20, 1)] = 2
            self.assertTrue(z.shape == w.shape)
            # Type should switch from csr to csc and vice-versa, so don't perform this test
            #self.assertTrue(type(z) == type(w))
            self.assertTrue(z.dtype == w.dtype)

            # Type should switch from csr to csc and vice-versa, so don't perform this test
            #self.assertTrue(z == w)
            self.assertTrue(abs(z-w).nnz == 0)

            z = z.todense()
            w = w.todense()
            self.assertTrue((z == w).all() == True)
Example #9
0
	def getUpdateParams(self):
		update = []
		aux = []

		# Update state
		update.append( (self.params[0], input_layer.output) )

		# Update output
		print 'Length: ' + str(len(self.connections))
		for i, c in enumerate(self.connections):
			aux.append(sparse.structured_dot(
						sparse.transpose(c.input), 
						self.params[2][i] * c.inhibition
						))
		aux2 = aux.pop()
		for a in range(len(aux)):
			aux2 = sparse.add(aux2,aux.pop())
			print aux2
		from theano import pp
		print 'out: '
		print pp(aux2)
		update.append((self.params[1],sparse.transpose(sparse.structured_sigmoid(aux2))))
		# Hardcoded!!
		'''update.append((self.params[1],
			sparse.transpose(
				sparse.structured_sigmoid(sparse.structured_dot(
						sparse.transpose(self.connections[0].input), 
						self.params[2][0])))))
		'''
		'''
		update.append((self.params[1], 
		  sparse.transpose(
			sparse.structured_sigmoid(
				sparse.structured_dot(
					sparse.transpose(self.connections[0].input), 	# Input
					self.params[2][0]))))) 							# Weights
		'''
		# Update weights
		''' #Old ones (OJA)
		for i, w in enumerate(self.params[2]):
			update.append( (w,  
				#layer.params[0]))
				sparse.add( 
					w, 
					self.LR[i]*sparse.transpose(
						sparse.structured_dot(self.params[1], self.x_yw[i])
						)
					)
				))
		'''
		for i, w in enumerate(self.params[2]):
			update.append( (w, #w))
				#layer.params[0]))
					sparse.structured_maximum(
						sparse.add(
							w,
							sparse.add(self.xy[i], 
							self.AWW[i])),
					0)
				) )

		return update
Example #10
0
	def addConnections(self, connections):
		global delta, Wmin, Wmax, awe
		self.connections = self.connections + connections
		i=0
		for i, c in enumerate(connections):
			j = self.i + i
			# Weights
			self.weights.append(
				theano.shared( 
					sp.csc_matrix(
					np.asarray( 
					c.generateConnectionMatrix(self.o_shape, generate), 
					dtype=self.input.dtype) ), name ='Wi_' + str(j)))
			self.Wmax.append(
				theano.shared(
					sp.csc_matrix(
					np.asarray( 
					np.ones((sizeFromShape(c.i_shape),sizeFromShape(self.o_shape)))*Wmax, 
					dtype=self.input.dtype) ), name ='WM_' + str(i)))
			self.Wmin.append(
				theano.shared(
					sp.csc_matrix(
					np.asarray( 
					np.ones((sizeFromShape(c.i_shape),sizeFromShape(self.o_shape)))*Wmin, 
					dtype=self.input.dtype) ), name ='WM_' + str(i)))
			# yw
			# out: nx1
			# Wi: mxn
			# outT x WiT : 1xm
			self.yw.append(
				sparse.structured_dot(
					sparse.transpose(self.output),
					sparse.transpose(self.weights[j])))
			# x_yw
			# in: nx1
			self.x_yw.append(
				sparse.sub(
					sparse.transpose(c.input),
					self.yw[j]))
			
			print len(self.weights)
			print self.weights[i].type
			print self.weights[i].type.ndim
			print 
			if self.weights:
				auxX=sparse.sub(self.Wmax[j], self.weights[i])
				auxY=sparse.sub(self.weights[i], self.Wmin[j])
				self.LR.append(delta*(
					sparse.sub(
						sparse.structured_pow(
							sparse.sub(self.Wmax[j], self.weights[i]),
							1), 
						sparse.structured_pow(
							sparse.sub(self.Wmin[j], self.weights[i]),
							1))))
				self.xy.append(
					self.LR[i]*sparse.structured_dot(
						c.input,
						sparse.transpose(self.output)))
				self.AWW.append(
					awe*delta*sparse.structured_pow(
								sparse.sub(self.Wmax[j], self.weights[i]),
								1)*self.weights[i])
		self.i +=i
		self.params[2] = self.weights
Example #11
0
#update = [(param_i, param_i + LR)]
index=T.lscalar()

csc_mat = sparse.csc_matrix('cscMat', dtype='float32')
qq,ww,ee,rr = sparse.csm_properties(csc_mat)
csc_trans = sparse.CSR(qq,ww,ee,rr)
#trans = theano.function([csc_mat],csc_trans)


Wis = []
Wrs = []
states = []
outs=[]
a = sp.csc_matrix(np.asarray([[0, 1, 1], [0, 0, 0], [1, 0, 0]],dtype='float32'))
print sparse.transpose(a).toarray()

old_W = sparse.csc_matrix('old_W',dtype='float32') # Old weight matrix
pop_i = sparse.csc_matrix('pop_i',dtype='float32') # Input layer
pop_j = sparse.csc_matrix('pop_j',dtype='float32') # Output layer
alpha = T.scalar('alpha',dtype='float32')
'''
new_W = sparse.add(old_W,
					sparse.sub(
						alpha*sparse.structured_dot(sparse.transpose(pop_j), pop_i), 
						sparse.structured_dot(
							sparse.structured_dot(
								sparse.transpose(pop_j), 
								pop_j),
							old_W)
						)
Example #12
0
	def __init__(self, input, filter_shape, sigma,i_shape,o_shape, Wi = False, Wr = False):
		global generate
		# Mean neuron density ~80k/mm^3 in V2 (skoglund 1996)
		# Synapse length follow a power law ()
		# Synapse length for feedback interareal ~10-40mm, feedforward same, but less connections
		# Synapse lengths is found by Sholl analysis. 
		# Ahould compare RF data with Van den Bergh 2010

		# Initialize weights as a shared variable
		#n_col=input.shape[1]

		try: 
			if generate:
				np.load('asd')
			else:
				Wi=np.load(i_file)
				print '[info] Weights loaded from file!'
				print 'Shape = ' + str(Wi.shape)
		except IOError:
			print "[info] Weights file wasn't found. Generating new connections"
			kern1 = gkern2(filter_shape,sigma)
			Wi = kernel2connection(kern1, i_shape, o_shape)
			#Wi /= np.sum(Wi,1).reshape((Wi.shape[0],1))*15
			print 'Shape = ' + str(Wi.shape)
			np.save(i_file,Wi)

		try: 
			if generate:
				np.load('asd')
			else:
				Wr=np.load(r_file)
				print 'Weights loaded from file!'
		except IOError:
			print "Weights file wasn't found. Generating new connections"
			kern2 = gkern2(filter_shape,sigma)
			Wr = kernel2connection(kern2, o_shape,o_shape)
			#Wr /= np.sum(Wi,1)
			np.save(r_file,Wr)

		if np.sum(Wi,1)[0] != 1:
			Wi /= np.sum(Wi,1).reshape((Wi.shape[0],1))*5
		if np.sum(Wr,1)[0] != 1:
			Wr /= np.sum(Wr,1).reshape((Wr.shape[0],1))
		print np.sum(Wi,0)
		print np.sum(Wi,1)
		plt.plot(Wi[1,:])
		plt.show()


		self.Wi= theano.shared( 
				sp.csc_matrix(
				np.asarray( 
				Wi, 
				dtype=input.dtype) ), name ='Wi')
		self.Wr = theano.shared( 
				sp.csc_matrix(
				np.asarray( 
				Wr, 
				dtype=input.dtype) ), name ='Wr')
		# Output of the layer is the sigmoid of the convolved network
		self.state = theano.shared( 
			sp.csc_matrix(
			np.asarray( 
			np.zeros((o_shape[0]*o_shape[1],1)), 
			dtype=input.dtype) ), name ='St')

		self.input = input

		# I could do the same with biases if needed
		#print self.input.get_value().shape
		#print self.Wi.get_value().shape
		self.output = theano.shared( 
			sp.csc_matrix(
			np.asarray( 
			np.zeros((o_shape[0]*o_shape[1],1)), 
			dtype=input.dtype) ), name ='Out')
		#sparse.structured_sigmoid(sparse.structured_dot(self.input, self.Wi))  #T.dot(self.input, self.Wi))
		# input = external + recursive (from layer)
		# self.input = T.dot(input, self.Wi) #+ T.sum(T.dot(self.state,self.Wr),1)

		# out: nx1
		# Wi: mxn
		# outT x WiT : 1xm
		self.yw = sparse.structured_dot(
						sparse.transpose(self.output),
						sparse.transpose(self.Wi))
		# in: nx1
		self.x_yw = sparse.sub(
						sparse.transpose(self.input),
						self.yw)


		# optional: self.output = T.nnet.sigmoid(conv_out+self.output)
		self.params = [self.Wi, self.Wr, self.state, self.output]
Example #13
0
#update = [(param_i, param_i + LR)]
index=T.lscalar()

csc_mat = sparse.csc_matrix('cscMat', dtype='float32')
qq,ww,ee,rr = sparse.csm_properties(csc_mat)
csc_trans = sparse.CSR(qq,ww,ee,rr)
#trans = theano.function([csc_mat],csc_trans)


Wis = []
Wrs = []
states = []
outs=[]
a = sp.csc_matrix(np.asarray([[0, 1, 1], [0, 0, 0], [1, 0, 0]],dtype='float32'))
print sparse.transpose(a).toarray()

old_W = sparse.csc_matrix('old_W',dtype='float32') # Old weight matrix
pop_i = sparse.csc_matrix('pop_i',dtype='float32') # Input layer
pop_j = sparse.csc_matrix('pop_j',dtype='float32') # Output layer
alpha = T.scalar('alpha',dtype='float32')
'''
new_W = sparse.add(old_W,
					sparse.sub(
						alpha*sparse.structured_dot(sparse.transpose(pop_j), pop_i), 
						sparse.structured_dot(
							sparse.structured_dot(
								sparse.transpose(pop_j), 
								pop_j),
							old_W)
						)
Example #14
0
def calc_mixed_mnl_probabilities(beta,
                                 design,
                                 rows_to_obs,
                                 error_components):
    """
    This function will calculate the MNL choice probabilities for each
    alternative of each choice situation in the design matrix. This function
    will be specific to ONLY the MNL model. Note this function is overly
    restrictive because Theano can only do automatic differentiation of
    functions that return scalars. This means the log-likelihood function
    must only return a single value, so this probability function must only
    return a 1D array (or a column vector in 2D).

    Parameters
    ----------
    beta : 1D ndarray of shape `(design.shape[1],)`.
        All elements should by ints, floats, or longs. There should be one
        element per index coefficient.
    design : 2D ndarray.
        There should be one row per observation per available alternative.
        There should be one column per utility coefficient being estimated. All
        elements should be ints, floats, or longs.
    rows_to_obs : 2D ndarray.
        There should be one row per observation per available alternative and
        one column per observation. This matrix maps the rows of the design
        matrix to the unique observations (on the columns).
    error_components : 1D ndarray of shape `(design.shape[0],)`.
        All elements should be floats or longs. These will be the error
        components to be added to the deterministic portion of the systematic
        utility.

    Returns
    -------
    long_probs : 1D ndarray of shape `(design.shape[0],)`.
        There will be one element per observation per available alternative for
        that observation. Each element will be the probability of the
        corresponding observation being associated with that rows corresponding
        alternative.
    """
    # Calculate the systematic utility for each alternative for each individual
    sys_utilities = calc_error_comp_utilities(beta, design, error_components)

    # The following commands are to guard against numeric under/over-flow
    # Note that the strange function calls (e.g. switch) are used because
    # Theano needs special commands to emulate the (clearer) numpy behavior.
    sys_utilities = tt.switch(tt.lt(sys_utilities, min_exponent_val),
                              min_exponent_val,
                              sys_utilities)

    sys_utilities = tt.switch(tt.gt(sys_utilities, max_exponent_val),
                              max_exponent_val,
                              sys_utilities)

    # Exponentiate the transformed utilities
    long_exponentials = np.exp(sys_utilities)

    # Calculate \sum _j exp(V_j) for each individual.
    # Result should be a 1D array.
    individual_denominators = sparse.dot(sparse.transpose(rows_to_obs),
                                         long_exponentials)

    # Get a 1D array of the same number of rows as the design matrix, with each
    # element of each row representing the `individual_denominators` for the
    # given choice situation for the given observation.
    long_denominators = sparse.dot(rows_to_obs,
                                   individual_denominators)

    # long_probs will be of shape (num_rows,) Each element will provide the
    # probability of the observation associated with that row having the
    # alternative associated with that row as the observation's outcome
    long_probs = long_exponentials / long_denominators

    # Guard against underflow. Note the `.nonzero()` is so Theano can duplicate
    # the boolean slicing capabilities of numpy.
    long_probs = tt.switch(tt.eq(long_probs, 0), min_comp_value, long_probs)

    # Consider using an assert statement that ensures all probabilities add to
    # 1 for each choice situation.
    return long_probs
def theano_mnl_probabilities(beta,
                             design,
                             rows_to_obs):
    """
    This function will calculate the MNL choice probabilities for each
    alternative of each choice situation in the design matrix. This function
    will be specific to ONLY the MNL model. Note this function is overly
    restrictive because Theano can only do automatic differentiation of
    functions that return scalars (so we can only return a 1D array of
    probabilities).

    Parameters
    ----------
    beta : 2D ndarray.
        All elements should by ints, floats, or longs. There should be one
        element per index coefficient.
    design : 2D ndarray.
        There should be one row per observation per available alternative.
        There should be one column per utility coefficient being estimated. All
        elements should be ints, floats, or longs.
    rows_to_obs : 2D ndarray.
        There should be one row per observation per available alternative and
        one column per observation. This matrix maps the rows of the design
        matrix to the unique observations (on the columns).

    Returns
    -------
    long_probs : 2D numpy array.
        There will be one element per observation per available alternative for
        that observation. Each element will be the probability of the
        corresponding observation being associated with that rows corresponding
        alternative.
    """
    # Calculate the systematic utility for each alternative for each individual
    sys_utilities = design.dot(beta)

    # The following commands are to guard against numeric under/over-flow
    # Note that the strange function calls are used because Theano needs
    # special commands to emulate the numpy behavior.
    sys_utilities = tt.switch(tt.lt(sys_utilities, min_exponent_val),
                              min_exponent_val,
                              sys_utilities)

    sys_utilities = tt.switch(tt.gt(sys_utilities, max_exponent_val),
                              max_exponent_val,
                              sys_utilities)

    # Exponentiate the transformed utilities
    long_exponentials = np.exp(sys_utilities)

    # Calculate \sum _j exp(V_j) for each individual.
    # Result should be a 2D array.
    individual_denominators = sparse.dot(sparse.transpose(rows_to_obs),
                                         long_exponentials)

    # Get a 2D array of the same number of rows as the design matrix, with each
    # element of each row representing the `individual_denominators` for the
    # given choice situation for the given observation.
    long_denominators = sparse.dot(rows_to_obs,
                                   individual_denominators)

    # long_probs will be of shape (num_rows, 1) Each element will provide the
    # probability of the observation associated with that row having the
    # alternative associated with that row as the observation's outcome
    long_probs = long_exponentials / long_denominators

    # Prevent negative infinity values when calculating the log-likelihood.
    long_probs = tt.switch(tt.eq(long_probs, 0), min_comp_value, long_probs)

    # Consider using an assert statement that ensures all probabilities add to
    # 1 for each choice situation.
    return long_probs