def _compile_message_factor_(self, _factor, _node): ''' Pseudocode: (treat _node as X and _factor as L) if L is a unary factor: emit v_L,_X = v_L elif X is the output node of L v_i = compilemessage_node(X_o, L) elif X is the output node of L v_i = compilemessage_node(X_i, L) return this ''' if _factor.o.null: #If the factor is unary. return _factor.M #@TODO: See if we need another variable like self.v to represent the value of unary predicates. elif _factor.o == _node: #If the node is the output node for this factor v_i = self._comiple_message_node_(_factor.i, _factor) return sparse.structured_dot(v_i, _factor.M) # return v_i.dot(_factor.M) elif _factor.i == _node: #If the node is the input node for this factor v_i = self._comiple_message_node_(_factor.o, _factor) return sparse.structured_dot(v_i, _factor.M)
def get_output_for(self, input, **kwargs): if not isinstance(input, (S.SparseVariable, S.SparseConstant, S.sharedvar.SparseTensorSharedVariable)): raise ValueError("Input for this layer must be sparse") activation = S.structured_dot(input, self.W) #do the convolution activation = S.structured_dot(self.A, activation) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
def get_output_for(self, input, target_indices=None, **kwargs): #do the convolution activation = S.structured_dot(self.A, input) if self.use_target_indices and target_indices: activation = activation[target_indices, :] return self.nonlinearity(activation)
def buildgraphCSC(spdata, sym_mat): csc = CSC(spdata, spmat.indices[:spmat.size], spmat.indptr, spmat.shape) assert csc.type.dtype == 'float32' rval = structured_dot(csc, sym_mat) assert rval.type.dtype == 'float32' return rval
def buildgraph(spdata, sym_mat): csr = CSR(spdata, spmat.indices[:spmat.size], spmat.indptr, spmat.shape) assert csr.type.dtype == 'float64' rval = structured_dot(csr, sym_mat) assert rval.type.dtype == 'float64' return rval
def test_upcast(self): typenames = ('float32', 'int64', 'int8', 'int32', 'int16', 'float64', 'complex64', 'complex128') for dense_dtype in typenames: for sparse_dtype in typenames: correct_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype) a = SparseType('csc', dtype=sparse_dtype)() b = tensor.matrix(dtype=dense_dtype) d = structured_dot(a, b) assert d.type.dtype == correct_dtype # compile and run a function f = theano.function([a, b], d) M, N, K, nnz = (4, 3, 5, 3) spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz)) # the following madness is necessary to workaround # an intc vs. int32 bug. # The lil makes an intc on my computer when sparse_dtype # is int32. spmat.dtype = numpy.dtype(sparse_dtype) mat = numpy.asarray(numpy.random.randn(N, K) * 9, dtype=dense_dtype) print 'DTYPES', sparse_dtype, dense_dtype print 'sym types', a.type, b.type print 'dtype strings', spmat.dtype, mat.dtype print 'numpy dtype num', mat.dtype.num print 'scipy dtype num', spmat.data.dtype.num theano_result = f(spmat, mat) scipy_result = spmat * mat assert theano_result.shape == scipy_result.shape assert theano_result.dtype == scipy_result.dtype assert _allclose(theano_result, scipy_result)
def __call__(self, inputs): """ Compute and return the PCA transformation of sparse data. Precondition: `self.mean` has been subtracted from inputs. The reason for this is that, as far as I can tell, there is no way to subtract a vector from a sparse matrix without constructing an intermediary dense matrix, in theano; even the hack used in `train()` won't do, because there is no way to symbolically construct a sparse matrix by repeating a vector (again, as far as I can tell). Parameters ---------- inputs : scipy.sparse matrix object Sparse matrix of shape (n, d) on which to compute PCA Returns ------- WRITEME """ # Update component cutoff, in case min_variance or num_components has # changed (or both). self._update_cutoff() Y = structured_dot(inputs, self.W[:, :self.component_cutoff]) if self.whiten: Y /= tensor.sqrt(self.v[:self.component_cutoff]) return Y
def applySparseFilter(kerns, kshp, nkern, images, imgshp, step=(1,1), bias=None, mode='valid'): """ "images" is assumed to be a matrix of shape batch_size x img_size, where the second dimension represents each image in raster order Output feature map will have shape: .. code-block:: python batch_size x number of kernels * output_size .. note:: IMPORTANT: note that this means that each feature map is contiguous in memory. The memory layout will therefore be: [ <feature_map_0> <feature_map_1> ... <feature_map_n>], where <feature_map> represents a "feature map" in raster order Note that the concept of feature map doesn't really apply to sparse filters without weight sharing. Basically, nkern=1 will generate one output img/feature map, nkern=2 a second feature map, etc. kerns is a 1D tensor, and assume to be of shape: .. code-block:: python nkern * N.prod(outshp) x N.prod(kshp) Each filter is applied seperately to consecutive output pixels. :param kerns: nkern*outsize*ksize vector containing kernels :param kshp: tuple containing actual dimensions of kernel (not symbolic) :param nkern: number of kernels to apply at each pixel in the input image. nkern=1 will apply a single unique filter for each input pixel. :param images: bsize x imgsize matrix containing images on which to apply filters :param imgshp: tuple containing actual image dimensions (not symbolic) :param step: determines number of pixels between adjacent receptive fields (tuple containing dx,dy values) :param mode: 'full', 'valid' see CSM.evaluate function for details :return: out1, symbolic result :return: out2, logical shape of the output img (nkern,height,width) (after dot product, not of the sparse matrix!) """ # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if numpy.size(imgshp)==2: imgshp = (1,)+imgshp # construct indices and index pointers for sparse matrix indices, indptr, spmat_shape, sptype, outshp, kmap = \ convolution_indices.sparse_eval(imgshp, kshp, nkern, step, mode) # build a sparse weight matrix sparsew = theano.sparse.CSM(sptype, kmap)(kerns, indices, indptr, spmat_shape) output = sparse.structured_dot(sparsew, images.T).T if bias is not None: output += bias return output, numpy.hstack((nkern,outshp))
def conv2d(images, kerns, ishp4, kshp4, subsample=(1, 1), border_mode='valid'): # start by computing output dimensions, size, etc B, C, IR, IC = ishp4 K, CH, KR, KC = kshp4 assert C == CH # number of channels must match OR, OC = conv_out_shp(IR, IC, KR, KC, border_mode, subsample) oshp = (B, OR, OC, K) # construct indices and index pointers for sparse matrix, which, when multiplied # with input images will generate a stack of image patches patch_extractor = sp_extract_patches(IR, IC, KR, KC, CH, RasterOrders.channel_row_col, RasterOrders.channel_row_col, subsample, border_mode, flip_patches=True).tocsc() #print IR, IC, KR, KC, CH, patch_extractor.shape, patch_extractor.nnz patches = sparse.structured_dot(images.flatten(2), patch_extractor) # compute output of linear classifier patch_stack = patches.reshape((B * OR * OC, KR * KC * CH)) # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = tensor.dot(patch_stack, kerns.flatten(2).T).reshape((B, OR, OC, K)) return output, oshp
def __call__(self, inputs): """ Compute and return the PCA transformation of sparse data. Precondition: self.mean has been subtracted from inputs. The reason for this is that, as far as I can tell, there is no way to subtract a vector from a sparse matrix without constructing an intermediary dense matrix, in theano; even the hack used in train() won't do, because there is no way to symbolically construct a sparse matrix by repeating a vector (again, as far as I can tell). :type inputs: scipy.sparse matrix object, shape (n, d) :param inputs: sparse matrix on which to compute PCA TODO: docstring upgrade. Make it consistent with the numpy/pylearn standard. """ # Update component cutoff, in case min_variance or num_components has # changed (or both). self._update_cutoff() Y = structured_dot(inputs, self.W[:, :self.component_cutoff]) if self.whiten: Y /= tensor.sqrt(self.v[:self.component_cutoff]) return Y
def sample_v_given_h(self, h0_sample): vs_stat = [] vs_sample = [] for i in xrange(len(self.v_layers)): v1_stat, v1_sample = self.v_layers[i].sample_v_given_h(h0_sample) vs_stat.append(v1_stat) vs_sample.append(v1_sample) v_stat = sparse.structured_dot(T.concatenate(vs_stat, axis=1), self.big_mask) v_sample = sparse.structured_dot(T.concatenate(vs_sample, axis=1), self.big_mask) return [v_stat, v_sample]
def test_structured_dot_grad(self): # We also need the grad of CSM to be implemetned. raise SkipTest("infer_shape not implemented for the grad" " of structured_dot") for format, op in [("csc", StructuredDotGradCSC), ("csr", StructuredDotGradCSR)]: x = SparseType(format, dtype=config.floatX)() y = SparseType(format, dtype=config.floatX)() grads = tensor.grad(dense_from_sparse(structured_dot(x, y)).sum(), [x, y]) self._compile_and_check( [x, y], [grads[0]], [ as_sparse_format(random_lil((4, 5), config.floatX, 3), format), as_sparse_format(random_lil((5, 3), config.floatX, 3), format), ], op, ) self._compile_and_check( [x, y], [grads[1]], [ as_sparse_format(random_lil((4, 5), config.floatX, 3), format), as_sparse_format(random_lil((5, 3), config.floatX, 3), format), ], op, )
def test_upcast(self): typenames = ("float32", "int64", "int8", "int32", "int16", "float64", "complex64", "complex128") for dense_dtype in typenames: for sparse_dtype in typenames: correct_dtype = theano.scalar.upcast(sparse_dtype, dense_dtype) a = SparseType("csc", dtype=sparse_dtype)() b = tensor.matrix(dtype=dense_dtype) d = structured_dot(a, b) assert d.type.dtype == correct_dtype # compile and run a function f = theano.function([a, b], d) M, N, K, nnz = (4, 3, 5, 3) spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz)) # the following madness is necessary to workaround # an intc vs. int32 bug. # The lil makes an intc on my computer when sparse_dtype # is int32. spmat.dtype = numpy.dtype(sparse_dtype) mat = numpy.asarray(numpy.random.randn(N, K) * 9, dtype=dense_dtype) print "DTYPES", sparse_dtype, dense_dtype print "sym types", a.type, b.type print "dtype strings", spmat.dtype, mat.dtype print "numpy dtype num", mat.dtype.num print "scipy dtype num", spmat.data.dtype.num theano_result = f(spmat, mat) scipy_result = spmat * mat assert theano_result.shape == scipy_result.shape assert theano_result.dtype == scipy_result.dtype assert _allclose(theano_result, scipy_result)
def conv2d_channel_minor(images, kerns, ishp4, kshp4, subsample=(1,1), border_mode='valid'): # start by computing output dimensions, size, etc B, IR, IC, C = ishp4 K, KR, KC, CH = kshp4 assert C == CH # number of channels must match OR, OC = conv_out_shp(IR, IC, KR, KC, border_mode, subsample) oshp = (B, OR, OC, K) # construct indices and index pointers for sparse matrix, which, when multiplied # with input images will generate a stack of image patches patch_extractor = sp_extract_patches(IR, IC, KR, KC, CH, RasterOrders.row_col_channel, RasterOrders.row_col_channel, subsample, border_mode, flip_patches=True).tocsc() #print IR, IC, KR, KC, CH, patch_extractor.shape, patch_extractor.nnz patches = sparse.structured_dot( images.flatten(2), patch_extractor) # compute output of linear classifier patch_stack = patches.reshape((B*OR*OC, KR*KC*CH)) # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = tensor.dot(patch_stack, kerns.flatten(2).T).reshape((B, OR, OC, K)) return output, oshp
def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ N = numpy poolsize = N.int64(N.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, maxpoolshp, maxpoolshp, mode='valid') # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = tensor.stack([images.shape[0] *\ tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]), tensor.as_tensor(poolsize)]) patch_stack = tensor.reshape(patches, pshape, ndim=3) out1 = tensor.max(patch_stack, axis=2) pshape = tensor.stack([ images.shape[0], tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]) ]) out2 = tensor.reshape(out1, pshape, ndim=3) out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return tensor.flatten(out3, 2), outshp
def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ N = numpy poolsize = N.int64(N.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1,) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, maxpoolshp, maxpoolshp, mode='valid') # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = tensor.stack([images.shape[0] *\ tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]), tensor.as_tensor(poolsize)]) patch_stack = tensor.reshape(patches, pshape, ndim=3) out1 = tensor.max(patch_stack, axis=2) pshape = tensor.stack([images.shape[0], tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0])]) out2 = tensor.reshape(out1, pshape, ndim=3) out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return tensor.flatten(out3, 2), outshp
def test_structured_dot(self): x = SparseType("csc", dtype=config.floatX)() y = SparseType("csc", dtype=config.floatX)() self._compile_and_check( [x, y], [structured_dot(x, y)], [sp.csc_matrix(random_lil((4, 5), config.floatX, 3)), sp.csc_matrix(random_lil((5, 3), config.floatX, 3))], StructuredDot, )
def test_infer_shape(self): a = SparseType('csc', dtype=config.floatX)() b = SparseType('csc', dtype=config.floatX)() f = theano.function([a, b], structured_dot(a, b).shape) topo = f.maker.env.toposort() assert not any(isinstance(t, self.__class__) for t in topo) x = sp.csc_matrix((4, 5), dtype=config.floatX) y = sp.csc_matrix((5, 3), dtype=config.floatX) assert numpy.all(f(x, y) == numpy.array((4, 3)))
def get_output_for(self, input, **kwargs): if input.ndim > 2: # if the input has more than two dimensions, flatten it into a # batch of feature vectors. input = input.flatten(2) activation = sp.structured_dot(input, self.W) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
def get_output_for(self, input, **kwargs): activation = T.dot(input, self.W) #do the convolution activation = S.structured_dot(self.A, activation) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) return self.nonlinearity(activation)
def __call__(self, inputs): self._update_cutoff() Y = structured_dot(inputs, self.W[:, :self.component_cutoff]) Z = Y - tensor.dot(self.mean, self.W[:, :self.component_cutoff]) if self.whiten: Z /= tensor.sqrt(self.v[:self.component_cutoff]) return Z
def __call__(self, inputs): self._update_cutoff() Y = structured_dot(inputs, self.W[:, :self.component_cutoff]) Z = Y - tensor.dot(self.mean, self.W[:, :self.component_cutoff]) #TODO-- this is inefficient, should work by modifying W not Z if self.whiten: Z /= tensor.sqrt(self.v[:self.component_cutoff]) return Z
def get_output_for(self, input, A=None, target_indices=None, **kwargs): activation = T.dot(input, self.W) #do the convolution if A: activation = S.structured_dot(A, activation) if self.b is not None: activation = activation + self.b.dimshuffle('x', 0) if self.use_target_indices and target_indices: activation = activation[target_indices, :] return self.nonlinearity(activation)
def __init__(self, input, n_in, n_out, inputIsSparse=True): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # start-snippet-1 # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared(value=np.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) # initialize the biases b as a vector of n_out 0s self.b = theano.shared(value=np.zeros((n_out, ), dtype=theano.config.floatX), name='b', borrow=True) # symbolic expression for computing the matrix of class-membership # probabilities # Where: # W is a matrix where column-k represent the separation hyperplane for # class-k # x is a matrix where row-j represents input training sample-j # b is a vector where element-k represent the free parameter of # hyperplane-k if inputIsSparse: self.p_y_given_x = T.nnet.softmax( sparse.structured_dot(input, self.W) + self.b) else: self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b) # symbolic description of how to compute prediction as class whose # probability is maximal self.y_pred = T.argmax(self.p_y_given_x, axis=1) # end-snippet-1 # parameters of the model self.params = [self.W, self.b] # keep track of model input self.input = input
def test_infer_shape_csr_csc_grad(self): for sparsetype in ('csr', 'csc'): a = SparseType(sparsetype, dtype=config.floatX)() b = SparseType(sparsetype, dtype=config.floatX)() grads = tensor.grad(dense_from_sparse(structured_dot(a, b)).sum(), [a, b]) f = theano.function([a, b], [g.shape for g in grads]) topo = f.maker.env.toposort() assert not any(isinstance(t, self.__class__) for t in topo) call = getattr(sp, sparsetype + '_matrix') x = call(random_lil((500, 300), config.floatX, 10)) y = call(random_lil((300, 400), config.floatX, 5)) out1, out2 = f(x, y) assert numpy.all(out1 == x.shape) assert numpy.all(out2 == y.shape)
def test_opt_unpack(self): # # Test that a graph involving # structured_dot(assembled_csc_matrix) is optimized to be just # a structured_dot_csc Op and no assembly of a csc_matrix. # # The optimization from structured_dot -> structured_dot_csc # is currently disabled, So this test is not expected to pass return # kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns') spmat = sp.lil_matrix((4, 6), dtype='int64') for i in range(5): # set non-zeros in random locations (row x, col y) x = numpy.floor(numpy.random.rand() * spmat.shape[0]) y = numpy.floor(numpy.random.rand() * spmat.shape[1]) spmat[x, y] = numpy.random.rand() * 10 spmat = sp.csc_matrix(spmat) images = tensor.Tensor(dtype='float32', broadcastable=[False, False])( 'images') cscmat = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape) f = theano.function([kerns, images], structured_dot(cscmat, images.T)) sdcscpresent = False for node in f.maker.env.toposort(): print node.op assert not isinstance(node.op, CSM) assert not isinstance(node.op, CSMProperties) if isinstance(f.maker.env.toposort()[1].op, StructuredDotCSC): sdcscpresent = True assert sdcscpresent kernvals = numpy.array(spmat.data[:spmat.size]) #print 'kdtype', kernvals.dtype, kernvals.shape, #print kernvals.ndim, kernvals.dtype.num #print 'type of kernvals = ', kernvals.dtype bsize = 3 imvals = 1.0 * numpy.array(numpy.arange(bsize * spmat.shape[1]).\ reshape(bsize, spmat.shape[1]), dtype='float32') outvals = f(kernvals, imvals) print outvals
def test_opt_unpack(self): # # Test that a graph involving # structured_dot(assembled_csc_matrix) is optimized to be just # a structured_dot_csc Op and no assembly of a csc_matrix. # # The optimization from structured_dot -> structured_dot_csc # is currently disabled, So this test is not expected to pass return # kerns = tensor.Tensor(dtype='int64', broadcastable=[False])('kerns') spmat = sp.lil_matrix((4, 6), dtype='int64') for i in range(5): # set non-zeros in random locations (row x, col y) x = numpy.floor(numpy.random.rand() * spmat.shape[0]) y = numpy.floor(numpy.random.rand() * spmat.shape[1]) spmat[x, y] = numpy.random.rand() * 10 spmat = sp.csc_matrix(spmat) images = tensor.Tensor(dtype='float32', broadcastable=[False, False])('images') cscmat = CSC(kerns, spmat.indices[:spmat.size], spmat.indptr, spmat.shape) f = theano.function([kerns, images], structured_dot(cscmat, images.T)) sdcscpresent = False for node in f.maker.env.toposort(): print node.op assert not isinstance(node.op, CSM) assert not isinstance(node.op, CSMProperties) if isinstance(f.maker.env.toposort()[1].op, StructuredDotCSC): sdcscpresent = True assert sdcscpresent kernvals = numpy.array(spmat.data[:spmat.size]) #print 'kdtype', kernvals.dtype, kernvals.shape, #print kernvals.ndim, kernvals.dtype.num #print 'type of kernvals = ', kernvals.dtype bsize = 3 imvals = 1.0 * numpy.array(numpy.arange(bsize * spmat.shape[1]).\ reshape(bsize, spmat.shape[1]), dtype='float32') outvals = f(kernvals, imvals) print outvals
def __init__(self, input, n_in, n_out, inputIsSparse=True): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ # initialize with 0 the weights W as a matrix of shape (n_in, n_out) self.W = theano.shared(value=np.zeros((n_in, n_out), dtype=theano.config.floatX), name='W', borrow=True) # initialize the baises b as a vector of n_out 0s self.b = theano.shared(value=np.zeros((n_out, ), dtype=theano.config.floatX), name='b', borrow=True) # parameters of the model self.params = [self.W, self.b] if inputIsSparse: self.output = sparse.structured_dot(input, self.W) + self.b else: self.output = T.dot(input, self.W) + self.b self.y_pred = T.argmax(self.output, axis=1)
def getUpdateParams(self): update = [] aux = [] # Update state update.append( (self.params[0], input_layer.output) ) # Update output print 'Length: ' + str(len(self.connections)) for i, c in enumerate(self.connections): aux.append(sparse.structured_dot( sparse.transpose(c.input), self.params[2][i] * c.inhibition )) aux2 = aux.pop() for a in range(len(aux)): aux2 = sparse.add(aux2,aux.pop()) print aux2 from theano import pp print 'out: ' print pp(aux2) update.append((self.params[1],sparse.transpose(sparse.structured_sigmoid(aux2)))) # Hardcoded!! '''update.append((self.params[1], sparse.transpose( sparse.structured_sigmoid(sparse.structured_dot( sparse.transpose(self.connections[0].input), self.params[2][0]))))) ''' ''' update.append((self.params[1], sparse.transpose( sparse.structured_sigmoid( sparse.structured_dot( sparse.transpose(self.connections[0].input), # Input self.params[2][0]))))) # Weights ''' # Update weights ''' #Old ones (OJA) for i, w in enumerate(self.params[2]): update.append( (w, #layer.params[0])) sparse.add( w, self.LR[i]*sparse.transpose( sparse.structured_dot(self.params[1], self.x_yw[i]) ) ) )) ''' for i, w in enumerate(self.params[2]): update.append( (w, #w)) #layer.params[0])) sparse.structured_maximum( sparse.add( w, sparse.add(self.xy[i], self.AWW[i])), 0) ) ) return update
def addConnections(self, connections): global delta, Wmin, Wmax, awe self.connections = self.connections + connections i=0 for i, c in enumerate(connections): j = self.i + i # Weights self.weights.append( theano.shared( sp.csc_matrix( np.asarray( c.generateConnectionMatrix(self.o_shape, generate), dtype=self.input.dtype) ), name ='Wi_' + str(j))) self.Wmax.append( theano.shared( sp.csc_matrix( np.asarray( np.ones((sizeFromShape(c.i_shape),sizeFromShape(self.o_shape)))*Wmax, dtype=self.input.dtype) ), name ='WM_' + str(i))) self.Wmin.append( theano.shared( sp.csc_matrix( np.asarray( np.ones((sizeFromShape(c.i_shape),sizeFromShape(self.o_shape)))*Wmin, dtype=self.input.dtype) ), name ='WM_' + str(i))) # yw # out: nx1 # Wi: mxn # outT x WiT : 1xm self.yw.append( sparse.structured_dot( sparse.transpose(self.output), sparse.transpose(self.weights[j]))) # x_yw # in: nx1 self.x_yw.append( sparse.sub( sparse.transpose(c.input), self.yw[j])) print len(self.weights) print self.weights[i].type print self.weights[i].type.ndim print if self.weights: auxX=sparse.sub(self.Wmax[j], self.weights[i]) auxY=sparse.sub(self.weights[i], self.Wmin[j]) self.LR.append(delta*( sparse.sub( sparse.structured_pow( sparse.sub(self.Wmax[j], self.weights[i]), 1), sparse.structured_pow( sparse.sub(self.Wmin[j], self.weights[i]), 1)))) self.xy.append( self.LR[i]*sparse.structured_dot( c.input, sparse.transpose(self.output))) self.AWW.append( awe*delta*sparse.structured_pow( sparse.sub(self.Wmax[j], self.weights[i]), 1)*self.weights[i]) self.i +=i self.params[2] = self.weights
def run2(i, xt, L, W, labels, tsh): Wtemp = sparse.structured_dot(L[i], W) ft = T.log1p(T.exp(-labels[i] * T.dot(Wtemp, xt))).sum() gt = T.grad(ft, W) #return ft, {tgWh:tgWh+sparse.true_dot(L[i].T, gt)} return ft, {tgWh: tgWh + W}
def _dot(x, y): if isinstance(x, SS.SparseVariable): return SS.structured_dot(x, y) else: return TT.dot(x, y)
def call(self, x, mask=None): #output = K.dot(x, self.W) output = sparse.structured_dot(x, self.W) if self.bias: output += self.b return self.activation(output)
def buildgraph_T(spmat, mat): return structured_dot(mat.T, spmat.T)
new_W = old_W #hebbianL = theano.function([old_W], new_W) print layer0.input print layer0.output print layer0.Wi print old_W #wi=hebbianL([layer.params[0]]) ''' for layer in layers: Wis.append( (layer.params[0], #layer.params[0])) sparse.add( layer.params[0] , LR*sparse.transpose( sparse.structured_dot(layer.output, layer.x_yw) ) ) )) # (layer.params[0], # sparse.add( # layer.params[0] , # LR*sparse.sub( # sparse.structured_dot(sparse.transpose(layer.output), layer.input) , # sparse.structured_dot( # sparse.structured_dot( # sparse.transpose(layer.output), # layer.output), # layer.params[0])
def __init__(self, input, filter_shape, sigma,i_shape,o_shape, Wi = False, Wr = False): global generate # Mean neuron density ~80k/mm^3 in V2 (skoglund 1996) # Synapse length follow a power law () # Synapse length for feedback interareal ~10-40mm, feedforward same, but less connections # Synapse lengths is found by Sholl analysis. # Ahould compare RF data with Van den Bergh 2010 # Initialize weights as a shared variable #n_col=input.shape[1] try: if generate: np.load('asd') else: Wi=np.load(i_file) print '[info] Weights loaded from file!' print 'Shape = ' + str(Wi.shape) except IOError: print "[info] Weights file wasn't found. Generating new connections" kern1 = gkern2(filter_shape,sigma) Wi = kernel2connection(kern1, i_shape, o_shape) #Wi /= np.sum(Wi,1).reshape((Wi.shape[0],1))*15 print 'Shape = ' + str(Wi.shape) np.save(i_file,Wi) try: if generate: np.load('asd') else: Wr=np.load(r_file) print 'Weights loaded from file!' except IOError: print "Weights file wasn't found. Generating new connections" kern2 = gkern2(filter_shape,sigma) Wr = kernel2connection(kern2, o_shape,o_shape) #Wr /= np.sum(Wi,1) np.save(r_file,Wr) if np.sum(Wi,1)[0] != 1: Wi /= np.sum(Wi,1).reshape((Wi.shape[0],1))*5 if np.sum(Wr,1)[0] != 1: Wr /= np.sum(Wr,1).reshape((Wr.shape[0],1)) print np.sum(Wi,0) print np.sum(Wi,1) plt.plot(Wi[1,:]) plt.show() self.Wi= theano.shared( sp.csc_matrix( np.asarray( Wi, dtype=input.dtype) ), name ='Wi') self.Wr = theano.shared( sp.csc_matrix( np.asarray( Wr, dtype=input.dtype) ), name ='Wr') # Output of the layer is the sigmoid of the convolved network self.state = theano.shared( sp.csc_matrix( np.asarray( np.zeros((o_shape[0]*o_shape[1],1)), dtype=input.dtype) ), name ='St') self.input = input # I could do the same with biases if needed #print self.input.get_value().shape #print self.Wi.get_value().shape self.output = theano.shared( sp.csc_matrix( np.asarray( np.zeros((o_shape[0]*o_shape[1],1)), dtype=input.dtype) ), name ='Out') #sparse.structured_sigmoid(sparse.structured_dot(self.input, self.Wi)) #T.dot(self.input, self.Wi)) # input = external + recursive (from layer) # self.input = T.dot(input, self.Wi) #+ T.sum(T.dot(self.state,self.Wr),1) # out: nx1 # Wi: mxn # outT x WiT : 1xm self.yw = sparse.structured_dot( sparse.transpose(self.output), sparse.transpose(self.Wi)) # in: nx1 self.x_yw = sparse.sub( sparse.transpose(self.input), self.yw) # optional: self.output = T.nnet.sigmoid(conv_out+self.output) self.params = [self.Wi, self.Wr, self.state, self.output]
def model(radial_index, white, affine=None, surf_mask=None, min_dist=-2., smooth_weight=0.7, watson=False, idx_vertex=None, dist=None): """ Creates a probabilistic model of the transition across the white/gray matter bounary :param radial_index: (Nx, Ny, Nz) array with the radial index :param white: white/gray matter boundary :param affine: (4, 4) array with the transformation from voxel to mm space :param surf_mask: (Nvertex, ) boolean array, which is True on vertices to be included in the fit :param min_dist: only include voxels within this distance from WM/GM boundary :param smooth_weight: weighting for the smoothing parameter :param watson: assume a Watson distribution for the radial index rather than a normal distribution :param idx_vertex: (Nx, Ny, Nz) array with index of closest vertex :param dist: distance from the WM/GM boundary :return: pymc3 model which fits the sigmoidal transition across the surface """ if affine is None: affine = np.eye(4) if surf_mask is None: surf_mask = np.ones(white.nvertex, dtype='bool') vol_mask = np.isfinite(radial_index) & (radial_index != 0.) if dist is None: dist = grid.signed_distance(white, radial_index.shape, affine) if idx_vertex is None: idx_vertex_raw = grid.closest_surface(white, vol_mask, affine) else: idx_vertex_raw = idx_vertex nvertex = surf_mask.sum() idx_real_vertex = -np.ones(surf_mask.size, dtype='i4') idx_real_vertex[surf_mask] = np.arange(nvertex) voxel_vertex = idx_real_vertex[idx_vertex_raw] voxel_vertex[idx_vertex_raw == -1] = -1 voxel_use = (voxel_vertex != -1) & (dist > min_dist) & vol_mask & (dist != 0) gpp = white.graph_point_point()[surf_mask, :][:, surf_mask] smooth = sp_sparse.diags(np.array(1 / gpp.sum(-1)).ravel()).dot(gpp) assert abs(smooth.dot(np.ones(smooth.shape[0])) - 1).max() < 1e-13 # radial index model with pymc3.Model() as model: d0 = pymc3.Flat('d0', testval=0.5, shape=nvertex) log_sigma = pymc3.Flat('log_sigma', testval=-0.5, shape=nvertex) sigma = tensor.exp(log_sigma) # radial index is zero in WM, 1 in GM model_ri = 1 / (1 + tensor.exp(-(dist[voxel_use] - d0[voxel_vertex[voxel_use]]) / sigma[voxel_vertex[voxel_use]])) if watson: pymc3.Potential('alignment', model_ri * abs(radial_index[voxel_use]) + tensor.sqrt(1 - model_ri ** 2) * np.sqrt(1 - radial_index[voxel_use] ** 2)) else: pymc3.Potential('alignment', -(model_ri - abs(radial_index[voxel_use])) ** 2) d_neigh = sparse.structured_dot(smooth, tensor.stack([d0], -1))[:, 0] pymc3.Potential('d0_smooth', -smooth_weight * (d_neigh - d0) ** 2) ls_neigh = sparse.structured_dot(smooth, tensor.stack([log_sigma], -1))[:, 0] pymc3.Potential('ls_smooth', -smooth_weight * (ls_neigh - log_sigma) ** 2) # additional output to check pymc3.Deterministic('model_ri_1d', model_ri) for name, arr_1d in [('model_ri', model_ri), ('observed_ri', abs(radial_index[voxel_use])), ('dist', dist[voxel_use]), ('d0', d0[voxel_vertex[voxel_use]]), ('sigma', sigma[voxel_vertex[voxel_use]])]: vol_nan = tensor.fill(radial_index, np.nan) vol_filled = tensor.set_subtensor(vol_nan[voxel_use.nonzero()], arr_1d) pymc3.Deterministic('%s_3d' % name, vol_filled) return model
def convolve(kerns, kshp, nkern, images, imgshp, step=(1,1), bias=None,\ mode='valid', flatten=True): """Convolution implementation by sparse matrix multiplication. @note: For best speed, put the matrix which you expect to be smaller as the 'kernel' argument === Input / Output conventions=== "images" is assumed to be a matrix of shape batch_size x img_size, where the second dimension represents each image in raster order If flatten is "False", the output feature map will have shape: batch_size x number of kernels x output_size If flatten is "True", the output feature map will have shape: batch_size x number of kernels * output_size IMPORTANT: note that this means that each feature map (image generate by each kernel) is contiguous in memory. The memory layout will therefore be: [ <feature_map_0> <feature_map_1> ... <feature_map_n>], where <feature_map> represents a "feature map" in raster order kerns is a 2D tensor of shape nkern x N.prod(kshp) @param kerns: 2D tensor containing kernels which are applied at every pixel @param kshp: tuple containing actual dimensions of kernel (not symbolic) @param nkern: number of kernels/filters to apply. nkern=1 will apply one common filter to all input pixels @param images: tensor containing images on which to apply convolution @param imgshp: tuple containing image dimensions @param step: determines number of pixels between adjacent receptive fields (tuple containing dx,dy values) @param mode: 'full', 'valid' see CSM.evaluate function for details @param sumdims: dimensions over which to sum for the tensordot operation. By default ((2,),(1,)) assumes kerns is a nkern x kernsize matrix and images is a batchsize x imgsize matrix containing flattened images in raster order @param flatten: flatten the last 2 dimensions of the output. By default, instead of generating a batchsize x outsize x nkern tensor, will flatten to batchsize x outsize*nkern @output out1: symbolic result @output out2: logical shape of the output img (nkern,heigt,width) @TODO: test for 1D and think of how to do n-d convolutions """ N = numpy # start by computing output dimensions, size, etc kern_size = N.int64(N.prod(kshp)) # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp)==2: imgshp = (1,)+imgshp # construct indices and index pointers for sparse matrix, which, when multiplied # with input images will generate a stack of image patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, kshp, step, mode) # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = (sparse.structured_dot(csc, images.T)).T # compute output of linear classifier pshape = tensor.stack(images.shape[0] * tensor.as_tensor(N.prod(outshp)),\ tensor.as_tensor(imgshp[0]*kern_size)) patch_stack = tensor.reshape(patches, pshape, ndim=2); # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = tensor.dot(patch_stack,kerns.T) # add bias across each feature map (more efficient to do it now) if bias is not None: output += bias # now to have feature maps in raster order ... # go from bsize*outshp x nkern to bsize x nkern*outshp newshp = tensor.stack(images.shape[0],\ tensor.as_tensor(N.prod(outshp)),\ tensor.as_tensor(nkern)) tensout= tensor.reshape(output, newshp, ndim=3) output = tensor.DimShuffle((False,)*tensout.ndim, (0,2,1))(tensout) if flatten: output = tensor.flatten(output, 2) return output, N.hstack((nkern,outshp))
def applySparseFilter(kerns, kshp, nkern, images, imgshp, step=(1,1), bias=None, mode='valid'): """ WRITEME Output feature map will have shape `batch_size x number of kernels * output_size`. Each filter is applied seperately to consecutive output pixels. Parameters ---------- kerns : 1D tensor_like `nkern * outsize * ksize` vector containing kernels kshp : tuple Tuple containing actual dimensions of kernel (not symbolic) nkern : int Number of kernels to apply at each pixel in the input image. \ `nkern = 1` will apply a single unique filter for each input pixel. images : WRITEME `bsize x imgsize` matrix containing images on which to apply filters. \ Second dimension represents each image in raster order. imgshp : tuple Tuple containing actual image dimensions (not symbolic) step : WRITEME Determines number of pixels between adjacent receptive fields \ (tuple containing dx,dy values) mode : str 'full', 'valid' see `CSM.evaluate` function for details Returns ------- out1 : WRITEME Symbolic result out2 : WRITEME Logical shape of the output img (nkern,height,width) \ (after dot product, not of the sparse matrix!) Notes ----- Note that this means that each feature map is contiguous in memory. The memory layout will therefore be `[ <feature_map_0> <feature_map_1> ... <feature_map_n>]`, where `<feature_map>` represents a "feature map" in raster order. Also, the concept of feature map doesn't really apply to sparse filters without weight sharing. Basically, nkern=1 will generate one output img/feature map, nkern=2 a second feature map, etc. """ # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if numpy.size(imgshp)==2: imgshp = (1,)+imgshp # construct indices and index pointers for sparse matrix indices, indptr, spmat_shape, sptype, outshp, kmap = \ convolution_indices.sparse_eval(imgshp, kshp, nkern, step, mode) # build a sparse weight matrix sparsew = theano.sparse.CSM(sptype, kmap)(kerns, indices, indptr, spmat_shape) output = sparse.structured_dot(sparsew, images.T).T if bias is not None: output += bias return output, numpy.hstack((nkern,outshp))
targety = TT.lvector() #print x, targ #random_weights w1 = TT.dmatrix() b1 = TT.dvector() if HLAYERS == 2: wh = TT.dmatrix() bh = TT.dvector() w2 = TT.dmatrix() b2 = TT.dvector() from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias from theano.compile.function_module import function xw1 = TS.structured_dot(w1.T, x.T).T h = ACTIVATION_FUNCTION(xw1 + b1) if HLAYERS == 2: xwh = theano.dot(wh.T, h.T).T h = ACTIVATION_FUNCTION(xwh + bh) #zero = tensor.zeros_like(x[0,:]) (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety) if HLAYERS == 2: validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE)
targety = TT.lvector() #print x, targ #random_weights w1 = TT.dmatrix() b1 = TT.dvector() if HLAYERS == 2: wh = TT.dmatrix() bh = TT.dvector() w2 = TT.dmatrix() b2 = TT.dvector() from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias from theano.compile.function_module import function xw1 = TS.structured_dot(w1.T, x.T).T h = ACTIVATION_FUNCTION(xw1 + b1) if HLAYERS == 2: xwh = theano.dot(wh.T, h.T).T h = ACTIVATION_FUNCTION(xwh + bh) #zero = tensor.zeros_like(x[0,:]) (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety) if HLAYERS == 2: validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE) (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2]) trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh, theano.compile.io.Out(gw1, borrow = True), gb1, gwh, gbh, gw2, gb2], mode=COMPILE_MODE) else: validatefn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1], mode=COMPILE_MODE)
def run2(i,xt, L, W, labels, tsh): Wtemp = sparse.structured_dot(L[i], W) ft = T.log1p(T.exp(-labels[i] * T.dot(Wtemp, xt))).sum() gt = T.grad(ft, W) #return ft, {tgWh:tgWh+sparse.true_dot(L[i].T, gt)} return ft, {tgWh:tgWh+W}
w2 = random_weights(HID, 1) print "w2", w2, w2.shape, w2.dtype b2 = N.zeros(1) print "b2", b2, b2.shape, b2.dtype #random_weights w1R = TT.dmatrix('w1') b1R = TT.dvector('b1') w2R = TT.dmatrix('w2') b2R = TT.dvector('b2') import pylearn.algorithms.cost as cost from theano.compile.function_module import function #xw1R = theano.dot(w1R.T, xR.T).T xw1R = TS.structured_dot(w1R.T, xR.T).T #print w1R.type #print xR.type hR = ACTIVATION_FUNCTION(xw1R + b1R) yR = nnet.sigmoid(theano.dot(hR, w2R).T + b2R) loss = cost.KL_divergence(targR, yR) fn = function([xR, targR, w1R, b1R, w2R, b2R], [yR, loss], mode=COMPILE_MODE) (gw1, gb1, gw2, gb2) = TT.grad(loss, [w1R, b1R, w2R, b2R]) trainfn = function([xR, targR, w1R, b1R, w2R, b2R], [yR, loss, theano.compile.io.Out(gw1, borrow = True), gb1, gw2, gb2, hR], mode=COMPILE_MODE) #print type(hR), type(yR) print "TRAINING" nex = xinstances.shape[0] for epoch in range(EPOCHS): print "Epoch #", epoch
def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh, inputIsSparse=True): """ Typical hidden layer of a MLP: units are fully-connected and have sigmoidal activation function. Weight matrix W is of shape (n_in,n_out) and the bias vector b is of shape (n_out,). NOTE : The nonlinearity used here is tanh Hidden unit activation is given by: tanh(dot(input,W) + b) :type rng: np.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dmatrix :param input: a symbolic tensor of shape (n_examples, n_in) :type n_in: int :param n_in: dimensionality of input :type n_out: int :param n_out: number of hidden units :type activation: theano.Op or function :param activation: Non linearity to be applied in the hidden layer """ self.input = input # if inputIsSparse: # activation = sparse.tanh # else: # activation = T.tanh # `W` is initialized with `W_values` which is uniformely sampled # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden)) # for tanh activation function # the output of uniform if converted using asarray to dtype # theano.config.floatX so that the code is runable on GPU # Note : optimal initialization of weights is dependent on the # activation function used (among other things). # For example, results presented in [Xavier10] suggest that you # should use 4 times larger initial weights for sigmoid # compared to tanh # We have no info for other function, so we use the same as # tanh. if W is None: W_values = np.asarray(rng.uniform( low=-np.sqrt(6. / (n_in + n_out)), high=np.sqrt(6. / (n_in + n_out)), size=(n_in, n_out)), dtype=theano.config.floatX) if activation == theano.tensor.nnet.sigmoid: W_values *= 4 W = theano.shared(value=W_values, name='W', borrow=True) if b is None: b_values = np.zeros((n_out, ), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b', borrow=True) self.W = W self.b = b if inputIsSparse: lin_output = sparse.structured_dot(input, self.W) + self.b else: lin_output = T.dot(input, self.W) + self.b # parameters of the model self.output = (lin_output if activation is None else activation(lin_output)) self.params = [self.W, self.b]
def __init__(self, d, V, r, #nf, embeddings=None, nc=2, nf=0, pairwise_constraint=False, lambda_w=0.01, lambda_e=0.01, lambda_f=0.01, learning_rate='optimal', rnn=True, l1_ratio=0.15, beta=None, fixed_beta=True): assert(0 <= l1_ratio <= 1) if not rnn: print('skipping rnn...') #d = dimensionality of embeddings #V = size of vocabulary #r = number of dependency relations #nc = number of classes for classification self.learning_rate = learning_rate #|V| x d embedding matrix if embeddings is None: self.We = theano.shared(name='embeddings', value=0.2 * np.random.uniform(-1.0, 1.0, (V, d)) ).astype(theano.config.floatX) else: self.We = theano.shared(name='embeddings', value=embeddings, borrow=True ).astype(theano.config.floatX) #r x d x d tensor (matrix for each dependency relation) self.Wr = theano.shared(name='dependencies', value=0.2 * np.random.uniform(-1.0, 1.0, (r, d, d)) ).astype(theano.config.floatX) #d x d map from embedding to hidden vector self.Wv = theano.shared(name='Wv', value=0.2 * np.random.uniform(-1.0, 1.0, (d, d)) ).astype(theano.config.floatX) #d long bias vector self.b = theano.shared(name='b', value=np.zeros(d, dtype=theano.config.floatX)) if nc > 2: self.gamma = theano.shared(name='gamma', value=0.2 * np.random.uniform(-1.0, 1.0, (d, nc)) ).astype(theano.config.floatX) if nf > 0: #weights for fine grained features plus bias self.beta = theano.shared(name='beta', value=0.2 * np.random.uniform(-1.0, 1.0, (nf, nc)) ).astype(theano.config.floatX) else: self.gamma = theano.shared(name='gamma', value=0.2 * np.random.uniform(-1.0, 1.0, (d)) ).astype(theano.config.floatX) if nf > 0: #weights for fine grained features plus bias self.beta = theano.shared(name='beta', value=0.2 * np.random.uniform(-1.0, 1.0, (nf)) ).astype(theano.config.floatX) if nf > 0 and beta is not None: self.beta = theano.shared(name='beta', value=beta ).astype(theano.config.floatX) self.params = [] if rnn: self.params += [self.We, self.Wr, self.Wv, self.b, self.gamma] if nf > 0 and (beta is None or not fixed_beta): self.params += [self.beta] if learning_rate == 'adagrad': self.descender = Adagrad(self.params) #self.f = T.tanh self.f = normalized_tanh def recurrence(k, hidden_states, hidden_sums, x, r, p, mask): #at each node n in the tree, calculate Wr(p,n) \dot f(W_v \dot We_word(n) + b + sum_n) and add to sum_p h_k = self.f((T.dot(self.Wv, x[k].T) + hidden_sums[k].T).T + self.b).T*mask[k] #D x N sum_k = T.batched_dot(r[k], h_k.T) #N x D return T.set_subtensor(hidden_states[k], h_k.T), T.inc_subtensor(hidden_sums[p[k], T.arange(sum_k.shape[0])], sum_k) y = T.ivector('y') #all N x K matrices, where N is batch size and K is max sentence length (padded) x_idxs = T.imatrix('x') x_parents = T.imatrix('x_parents') x_rel_idxs = T.imatrix('x_rel') x_mask = T.imatrix('x_mask') #now these are K x N x D tensors X = self.We[x_idxs.T] #these are K x N x D x D tensors X_rel = self.Wr[x_rel_idxs.T] X_hidden_states = T.zeros((x_idxs.shape[1], x_idxs.shape[0], d), dtype=theano.config.floatX) X_hidden_sums = T.zeros((x_idxs.shape[1]+1, x_idxs.shape[0], d), dtype=theano.config.floatX) #these are K(+1) x K x N x D [X_h, X_s], updates = theano.scan(fn=recurrence, sequences=T.arange(x_idxs.shape[1]), outputs_info=[X_hidden_states, X_hidden_sums], non_sequences=[X, X_rel, x_parents.T, x_mask.T]) phi = sp.csc_fmatrix('phi') #X_h[-1, -1] is N x D base = 0 if rnn: base = base + T.dot(X_h[-1, -1], self.gamma) if nf > 0: if nc > 2: base = base + sp.structured_dot(phi, self.beta) else: base = base + sp.structured_dot(phi, self.beta.dimshuffle(0, 'x')).flatten() if nc > 2: p_y_given_x = T.nnet.softmax(base) y_pred = T.argmax(p_y_given_x, axis=1) costs = -T.log(p_y_given_x)[T.arange(y.shape[0]), y] else: p_y_given_x = T.nnet.sigmoid(base) y_pred = p_y_given_x > 0.5 costs = -y * T.log(p_y_given_x) - (1-y) * T.log(1-p_y_given_x) cost = costs.mean() if rnn: cost = cost + lambda_w * (self.We ** 2).sum() + lambda_w * (self.Wr ** 2).sum() + lambda_w * (self.Wv ** 2).sum() + lambda_w * (self.b ** 2).sum() + lambda_w * (self.gamma ** 2).sum() if pairwise_constraint: cost = cost - lambda_e * T.batched_dot(X_h[-1, -1][::2], X_h[-1, -1][1::2]).mean() if nf > 0 and (beta is None or not fixed_beta): cost = cost + lambda_f*(l1_ratio*T.abs_(self.beta).sum() + (1-l1_ratio) * (self.beta ** 2).sum()) grad = T.grad(cost, self.params) if learning_rate == 'optimal': def dloss(p, y): z = p * y if z > 18: return np.exp(-z) * -y if z < -18: return -y return -y / (np.exp(z) + 1) typw = np.sqrt(1.0 / np.sqrt(lambda_w)) initial_eta0 = typw / max(1.0, dloss(-typw, 1.0)) optimal_init = 1.0 / (initial_eta0 * lambda_w) print(typw, initial_eta0, optimal_init) self.t = theano.shared(name='t', value=0.).astype(theano.config.floatX) eta = 1.0 / (lambda_w * (optimal_init + self.t)) updates = [(p, p - eta*g) for p,g in zip(self.params, grad)] else: updates = [] inputs = [] if rnn: inputs += [x_idxs, x_parents, x_rel_idxs, x_mask] if nf > 0: inputs += [phi] inputs += [y] self.cost_and_grad = theano.function(inputs=inputs, outputs=[cost] + grad, updates=updates, allow_input_downcast=True) self.sums = theano.function(inputs=[x_idxs, x_parents, x_rel_idxs, x_mask], outputs=X_s, allow_input_downcast=True) self.states = theano.function(inputs=[x_idxs, x_parents, x_rel_idxs, x_mask], outputs=X_h, allow_input_downcast=True) self.classify = theano.function(inputs=inputs[:-1], outputs=y_pred, allow_input_downcast=True)
def convolve(kerns, kshp, nkern, images, imgshp, step=(1, 1), bias=None, mode='valid', flatten=True): """Convolution implementation by sparse matrix multiplication. :note: For best speed, put the matrix which you expect to be smaller as the 'kernel' argument "images" is assumed to be a matrix of shape batch_size x img_size, where the second dimension represents each image in raster order If flatten is "False", the output feature map will have shape: .. code-block:: python batch_size x number of kernels x output_size If flatten is "True", the output feature map will have shape: .. code-block:: python batch_size x number of kernels * output_size .. note:: IMPORTANT: note that this means that each feature map (image generate by each kernel) is contiguous in memory. The memory layout will therefore be: [ <feature_map_0> <feature_map_1> ... <feature_map_n>], where <feature_map> represents a "feature map" in raster order kerns is a 2D tensor of shape nkern x N.prod(kshp) :param kerns: 2D tensor containing kernels which are applied at every pixel :param kshp: tuple containing actual dimensions of kernel (not symbolic) :param nkern: number of kernels/filters to apply. nkern=1 will apply one common filter to all input pixels :param images: tensor containing images on which to apply convolution :param imgshp: tuple containing image dimensions :param step: determines number of pixels between adjacent receptive fields (tuple containing dx,dy values) :param mode: 'full', 'valid' see CSM.evaluate function for details :param sumdims: dimensions over which to sum for the tensordot operation. By default ((2,),(1,)) assumes kerns is a nkern x kernsize matrix and images is a batchsize x imgsize matrix containing flattened images in raster order :param flatten: flatten the last 2 dimensions of the output. By default, instead of generating a batchsize x outsize x nkern tensor, will flatten to batchsize x outsize*nkern :return: out1, symbolic result :return: out2, logical shape of the output img (nkern,heigt,width) :TODO: test for 1D and think of how to do n-d convolutions """ N = numpy # start by computing output dimensions, size, etc kern_size = N.int64(N.prod(kshp)) # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, kshp, step, mode) # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = (sparse.structured_dot(csc, images.T)).T # compute output of linear classifier pshape = tensor.stack(images.shape[0] * tensor.as_tensor(N.prod(outshp)),\ tensor.as_tensor(imgshp[0] * kern_size)) patch_stack = tensor.reshape(patches, pshape, ndim=2) # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = tensor.dot(patch_stack, kerns.T) # add bias across each feature map (more efficient to do it now) if bias is not None: output += bias # now to have feature maps in raster order ... # go from bsize*outshp x nkern to bsize x nkern*outshp newshp = tensor.stack(images.shape[0],\ tensor.as_tensor(N.prod(outshp)),\ tensor.as_tensor(nkern)) tensout = tensor.reshape(output, newshp, ndim=3) output = tensor.DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout) if flatten: output = tensor.flatten(output, 2) return output, N.hstack((nkern, outshp))
def v_given_h(self, h): vs_stat = [] for i in xrange(len(self.v_layers)): vs_stat.append(self.v_layers[i].v_given_h(h)) return sparse.structured_dot(T.concatenate(vs_stat, axis=1), self.big_mask)