def grad(self, input, output_gradients): A, b = inputs c = self(A, b) c_bar = output_gradients[0] trans_solve_op = SpSolve() b_bar = trans_solve_op(ts.transpose(A), c_bar) A_bar = -ts.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T) return [A_bar, b_bar]
def test_transpose_csr(self): a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5, 3))) self.assertTrue(a.data.shape == (5, 3)) self.assertTrue(a.type.dtype == "float64") self.assertTrue(a.type.format == "csr") ta = transpose(a) self.assertTrue(ta.type.dtype == "float64", ta.type.dtype) self.assertTrue(ta.type.format == "csc", ta.type.format) vta = eval_outputs([ta]) self.assertTrue(vta.shape == (3, 5))
def test_transpose_csr(self): a = as_sparse_variable(scipy.sparse.csr_matrix(scipy.sparse.eye(5, 3))) self.assertTrue(a.data.shape == (5, 3)) self.assertTrue(a.type.dtype == 'float64') self.assertTrue(a.type.format == 'csr') ta = transpose(a) self.assertTrue(ta.type.dtype == 'float64', ta.type.dtype) self.assertTrue(ta.type.format == 'csc', ta.type.format) vta = eval_outputs([ta]) self.assertTrue(vta.shape == (3, 5))
def test_transpose_csc(self): sp = scipy.sparse.csc_matrix(scipy.sparse.eye(5, 3)) a = as_sparse_variable(sp) self.assertFalse(a.data is sp) self.assertTrue(a.data.shape == (5, 3)) self.assertTrue(a.type.dtype == 'float64', a.type.dtype) self.assertTrue(a.type.format == 'csc', a.type.format) ta = transpose(a) self.assertTrue(ta.type.dtype == 'float64', ta.type.dtype) self.assertTrue(ta.type.format == 'csr', ta.type.format) vta = eval_outputs([ta]) self.assertTrue(vta.shape == (3, 5))
def true_dot(x, y, grad_preserves_dense=True): """ @todo: Maybe the triple-transposition formulation (when x is dense) is slow. See if there is a direct way to do this. """ if hasattr(x, 'getnnz'): x = as_sparse_variable(x) if hasattr(y, 'getnnz'): y = as_sparse_variable(y) x_is_sparse_variable = _is_sparse_variable(x) y_is_sparse_variable = _is_sparse_variable(y) if not x_is_sparse_variable and not y_is_sparse_variable: raise TypeError() if x_is_sparse_variable: return TrueDot(grad_preserves_dense)(x, y) else: assert y_is_sparse_variable return transpose(TrueDot(grad_preserves_dense)(y.T, x.T))
def test_basicDS(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500, 3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]]) self.assertTrue(_is_dense_variable(y)) x.data = x.data.T y.data = y.data.T zop = true_dot(y, x) zop = transpose(true_dot(y, x)) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500, 2)) # self.assertTrue(type(z) is mtype) w = mtype((500, 2)) w[(10, 0)] = 3. w[(20, 0)] = 4 w[(10, 1)] = 4 w[(20, 1)] = 2 self.assertTrue(z.shape == w.shape) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(z == w) self.assertTrue(abs(z - w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def test_basicDS(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500,3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]]) self.assertTrue(_is_dense_variable(y)) x.data = x.data.T y.data = y.data.T zop = true_dot(y, x) zop = transpose(true_dot(y, x)) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500,2)) # self.assertTrue(type(z) is mtype) w = mtype((500,2)) w[(10, 0)] = 3. w[(20, 0)] = 4 w[(10, 1)] = 4 w[(20, 1)] = 2 self.assertTrue(z.shape == w.shape) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(z == w) self.assertTrue(abs(z-w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def getUpdateParams(self): update = [] aux = [] # Update state update.append( (self.params[0], input_layer.output) ) # Update output print 'Length: ' + str(len(self.connections)) for i, c in enumerate(self.connections): aux.append(sparse.structured_dot( sparse.transpose(c.input), self.params[2][i] * c.inhibition )) aux2 = aux.pop() for a in range(len(aux)): aux2 = sparse.add(aux2,aux.pop()) print aux2 from theano import pp print 'out: ' print pp(aux2) update.append((self.params[1],sparse.transpose(sparse.structured_sigmoid(aux2)))) # Hardcoded!! '''update.append((self.params[1], sparse.transpose( sparse.structured_sigmoid(sparse.structured_dot( sparse.transpose(self.connections[0].input), self.params[2][0]))))) ''' ''' update.append((self.params[1], sparse.transpose( sparse.structured_sigmoid( sparse.structured_dot( sparse.transpose(self.connections[0].input), # Input self.params[2][0]))))) # Weights ''' # Update weights ''' #Old ones (OJA) for i, w in enumerate(self.params[2]): update.append( (w, #layer.params[0])) sparse.add( w, self.LR[i]*sparse.transpose( sparse.structured_dot(self.params[1], self.x_yw[i]) ) ) )) ''' for i, w in enumerate(self.params[2]): update.append( (w, #w)) #layer.params[0])) sparse.structured_maximum( sparse.add( w, sparse.add(self.xy[i], self.AWW[i])), 0) ) ) return update
def addConnections(self, connections): global delta, Wmin, Wmax, awe self.connections = self.connections + connections i=0 for i, c in enumerate(connections): j = self.i + i # Weights self.weights.append( theano.shared( sp.csc_matrix( np.asarray( c.generateConnectionMatrix(self.o_shape, generate), dtype=self.input.dtype) ), name ='Wi_' + str(j))) self.Wmax.append( theano.shared( sp.csc_matrix( np.asarray( np.ones((sizeFromShape(c.i_shape),sizeFromShape(self.o_shape)))*Wmax, dtype=self.input.dtype) ), name ='WM_' + str(i))) self.Wmin.append( theano.shared( sp.csc_matrix( np.asarray( np.ones((sizeFromShape(c.i_shape),sizeFromShape(self.o_shape)))*Wmin, dtype=self.input.dtype) ), name ='WM_' + str(i))) # yw # out: nx1 # Wi: mxn # outT x WiT : 1xm self.yw.append( sparse.structured_dot( sparse.transpose(self.output), sparse.transpose(self.weights[j]))) # x_yw # in: nx1 self.x_yw.append( sparse.sub( sparse.transpose(c.input), self.yw[j])) print len(self.weights) print self.weights[i].type print self.weights[i].type.ndim print if self.weights: auxX=sparse.sub(self.Wmax[j], self.weights[i]) auxY=sparse.sub(self.weights[i], self.Wmin[j]) self.LR.append(delta*( sparse.sub( sparse.structured_pow( sparse.sub(self.Wmax[j], self.weights[i]), 1), sparse.structured_pow( sparse.sub(self.Wmin[j], self.weights[i]), 1)))) self.xy.append( self.LR[i]*sparse.structured_dot( c.input, sparse.transpose(self.output))) self.AWW.append( awe*delta*sparse.structured_pow( sparse.sub(self.Wmax[j], self.weights[i]), 1)*self.weights[i]) self.i +=i self.params[2] = self.weights
#update = [(param_i, param_i + LR)] index=T.lscalar() csc_mat = sparse.csc_matrix('cscMat', dtype='float32') qq,ww,ee,rr = sparse.csm_properties(csc_mat) csc_trans = sparse.CSR(qq,ww,ee,rr) #trans = theano.function([csc_mat],csc_trans) Wis = [] Wrs = [] states = [] outs=[] a = sp.csc_matrix(np.asarray([[0, 1, 1], [0, 0, 0], [1, 0, 0]],dtype='float32')) print sparse.transpose(a).toarray() old_W = sparse.csc_matrix('old_W',dtype='float32') # Old weight matrix pop_i = sparse.csc_matrix('pop_i',dtype='float32') # Input layer pop_j = sparse.csc_matrix('pop_j',dtype='float32') # Output layer alpha = T.scalar('alpha',dtype='float32') ''' new_W = sparse.add(old_W, sparse.sub( alpha*sparse.structured_dot(sparse.transpose(pop_j), pop_i), sparse.structured_dot( sparse.structured_dot( sparse.transpose(pop_j), pop_j), old_W) )
def __init__(self, input, filter_shape, sigma,i_shape,o_shape, Wi = False, Wr = False): global generate # Mean neuron density ~80k/mm^3 in V2 (skoglund 1996) # Synapse length follow a power law () # Synapse length for feedback interareal ~10-40mm, feedforward same, but less connections # Synapse lengths is found by Sholl analysis. # Ahould compare RF data with Van den Bergh 2010 # Initialize weights as a shared variable #n_col=input.shape[1] try: if generate: np.load('asd') else: Wi=np.load(i_file) print '[info] Weights loaded from file!' print 'Shape = ' + str(Wi.shape) except IOError: print "[info] Weights file wasn't found. Generating new connections" kern1 = gkern2(filter_shape,sigma) Wi = kernel2connection(kern1, i_shape, o_shape) #Wi /= np.sum(Wi,1).reshape((Wi.shape[0],1))*15 print 'Shape = ' + str(Wi.shape) np.save(i_file,Wi) try: if generate: np.load('asd') else: Wr=np.load(r_file) print 'Weights loaded from file!' except IOError: print "Weights file wasn't found. Generating new connections" kern2 = gkern2(filter_shape,sigma) Wr = kernel2connection(kern2, o_shape,o_shape) #Wr /= np.sum(Wi,1) np.save(r_file,Wr) if np.sum(Wi,1)[0] != 1: Wi /= np.sum(Wi,1).reshape((Wi.shape[0],1))*5 if np.sum(Wr,1)[0] != 1: Wr /= np.sum(Wr,1).reshape((Wr.shape[0],1)) print np.sum(Wi,0) print np.sum(Wi,1) plt.plot(Wi[1,:]) plt.show() self.Wi= theano.shared( sp.csc_matrix( np.asarray( Wi, dtype=input.dtype) ), name ='Wi') self.Wr = theano.shared( sp.csc_matrix( np.asarray( Wr, dtype=input.dtype) ), name ='Wr') # Output of the layer is the sigmoid of the convolved network self.state = theano.shared( sp.csc_matrix( np.asarray( np.zeros((o_shape[0]*o_shape[1],1)), dtype=input.dtype) ), name ='St') self.input = input # I could do the same with biases if needed #print self.input.get_value().shape #print self.Wi.get_value().shape self.output = theano.shared( sp.csc_matrix( np.asarray( np.zeros((o_shape[0]*o_shape[1],1)), dtype=input.dtype) ), name ='Out') #sparse.structured_sigmoid(sparse.structured_dot(self.input, self.Wi)) #T.dot(self.input, self.Wi)) # input = external + recursive (from layer) # self.input = T.dot(input, self.Wi) #+ T.sum(T.dot(self.state,self.Wr),1) # out: nx1 # Wi: mxn # outT x WiT : 1xm self.yw = sparse.structured_dot( sparse.transpose(self.output), sparse.transpose(self.Wi)) # in: nx1 self.x_yw = sparse.sub( sparse.transpose(self.input), self.yw) # optional: self.output = T.nnet.sigmoid(conv_out+self.output) self.params = [self.Wi, self.Wr, self.state, self.output]
def calc_mixed_mnl_probabilities(beta, design, rows_to_obs, error_components): """ This function will calculate the MNL choice probabilities for each alternative of each choice situation in the design matrix. This function will be specific to ONLY the MNL model. Note this function is overly restrictive because Theano can only do automatic differentiation of functions that return scalars. This means the log-likelihood function must only return a single value, so this probability function must only return a 1D array (or a column vector in 2D). Parameters ---------- beta : 1D ndarray of shape `(design.shape[1],)`. All elements should by ints, floats, or longs. There should be one element per index coefficient. design : 2D ndarray. There should be one row per observation per available alternative. There should be one column per utility coefficient being estimated. All elements should be ints, floats, or longs. rows_to_obs : 2D ndarray. There should be one row per observation per available alternative and one column per observation. This matrix maps the rows of the design matrix to the unique observations (on the columns). error_components : 1D ndarray of shape `(design.shape[0],)`. All elements should be floats or longs. These will be the error components to be added to the deterministic portion of the systematic utility. Returns ------- long_probs : 1D ndarray of shape `(design.shape[0],)`. There will be one element per observation per available alternative for that observation. Each element will be the probability of the corresponding observation being associated with that rows corresponding alternative. """ # Calculate the systematic utility for each alternative for each individual sys_utilities = calc_error_comp_utilities(beta, design, error_components) # The following commands are to guard against numeric under/over-flow # Note that the strange function calls (e.g. switch) are used because # Theano needs special commands to emulate the (clearer) numpy behavior. sys_utilities = tt.switch(tt.lt(sys_utilities, min_exponent_val), min_exponent_val, sys_utilities) sys_utilities = tt.switch(tt.gt(sys_utilities, max_exponent_val), max_exponent_val, sys_utilities) # Exponentiate the transformed utilities long_exponentials = np.exp(sys_utilities) # Calculate \sum _j exp(V_j) for each individual. # Result should be a 1D array. individual_denominators = sparse.dot(sparse.transpose(rows_to_obs), long_exponentials) # Get a 1D array of the same number of rows as the design matrix, with each # element of each row representing the `individual_denominators` for the # given choice situation for the given observation. long_denominators = sparse.dot(rows_to_obs, individual_denominators) # long_probs will be of shape (num_rows,) Each element will provide the # probability of the observation associated with that row having the # alternative associated with that row as the observation's outcome long_probs = long_exponentials / long_denominators # Guard against underflow. Note the `.nonzero()` is so Theano can duplicate # the boolean slicing capabilities of numpy. long_probs = tt.switch(tt.eq(long_probs, 0), min_comp_value, long_probs) # Consider using an assert statement that ensures all probabilities add to # 1 for each choice situation. return long_probs
def theano_mnl_probabilities(beta, design, rows_to_obs): """ This function will calculate the MNL choice probabilities for each alternative of each choice situation in the design matrix. This function will be specific to ONLY the MNL model. Note this function is overly restrictive because Theano can only do automatic differentiation of functions that return scalars (so we can only return a 1D array of probabilities). Parameters ---------- beta : 2D ndarray. All elements should by ints, floats, or longs. There should be one element per index coefficient. design : 2D ndarray. There should be one row per observation per available alternative. There should be one column per utility coefficient being estimated. All elements should be ints, floats, or longs. rows_to_obs : 2D ndarray. There should be one row per observation per available alternative and one column per observation. This matrix maps the rows of the design matrix to the unique observations (on the columns). Returns ------- long_probs : 2D numpy array. There will be one element per observation per available alternative for that observation. Each element will be the probability of the corresponding observation being associated with that rows corresponding alternative. """ # Calculate the systematic utility for each alternative for each individual sys_utilities = design.dot(beta) # The following commands are to guard against numeric under/over-flow # Note that the strange function calls are used because Theano needs # special commands to emulate the numpy behavior. sys_utilities = tt.switch(tt.lt(sys_utilities, min_exponent_val), min_exponent_val, sys_utilities) sys_utilities = tt.switch(tt.gt(sys_utilities, max_exponent_val), max_exponent_val, sys_utilities) # Exponentiate the transformed utilities long_exponentials = np.exp(sys_utilities) # Calculate \sum _j exp(V_j) for each individual. # Result should be a 2D array. individual_denominators = sparse.dot(sparse.transpose(rows_to_obs), long_exponentials) # Get a 2D array of the same number of rows as the design matrix, with each # element of each row representing the `individual_denominators` for the # given choice situation for the given observation. long_denominators = sparse.dot(rows_to_obs, individual_denominators) # long_probs will be of shape (num_rows, 1) Each element will provide the # probability of the observation associated with that row having the # alternative associated with that row as the observation's outcome long_probs = long_exponentials / long_denominators # Prevent negative infinity values when calculating the log-likelihood. long_probs = tt.switch(tt.eq(long_probs, 0), min_comp_value, long_probs) # Consider using an assert statement that ensures all probabilities add to # 1 for each choice situation. return long_probs