class AddSSData(gof.op.Op): '''Add two sparse matrices assuming they have the same sparsity pattern. ''' def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, x, y): x, y = map(as_sparse_variable, [x, y]) if x.type.dtype != y.type.dtype: raise NotImplementedError() if x.type.format != y.type.format: raise NotImplementedError() return gof.Apply(self, [x, y], [ SparseType(dtype=x.type.dtype, format=x.type.format).make_variable() ]) def perform(self, node, (x, y), (out, )): assert _is_sparse(x) and _is_sparse(y) assert x.shape == y.shape out[0] = x.copy() out[0].data += y.data
class StructuredAddSV(gof.op.Op): '''Structured addition of a sparse matrix and a dense vector. The elements of the vector are are only added to the corresponding non-zero elements. Therefore, this operation outputs another sparse matrix.''' def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, x, y): x = as_sparse_variable(x) y = tensor.as_tensor_variable(y) assert y.type.ndim == 1 if x.type.dtype != y.type.dtype: raise NotImplementedError() return gof.Apply(self, [x, y], [ SparseType(dtype=x.type.dtype, format=x.type.format).make_variable() ]) def perform(self, node, (x, y), (out, )): assert _is_sparse(x) and not _is_sparse(y) assert x.shape[1] == y.shape[0] out[0] = x.__class__(x + (x.toarray() != 0) * y)
class MulSV(gof.op.Op): '''Multiplication of sparse matrix by a broadcasted dense vector.''' def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, x, y): x = as_sparse_variable(x) y = tensor.as_tensor_variable(y) assert y.type.ndim == 1 if x.type.dtype != y.type.dtype: raise NotImplementedError() return gof.Apply(self, [x, y], [ SparseType(dtype=x.type.dtype, format=x.type.format).make_variable() ]) def perform(self, node, (x, y), (out, )): assert _is_sparse(x) and not _is_sparse(y) assert x.shape[1] == y.shape[0] out[0] = x.__class__(x.toarray() * y)
def _testSS( self, op, array1=numpy.array([[1.0, 0], [3, 0], [0, 6]]), array2=numpy.asarray([[0, 2.0], [0, 4], [5, 0]]) ): for mtype in _mtypes: a = mtype(array1) aR = as_sparse_variable(a) self.assertFalse(aR.data is a) self.assertTrue(_is_sparse(a)) self.assertTrue(_is_sparse_variable(aR)) b = mtype(array2) bR = as_sparse_variable(b) self.assertFalse(bR.data is b) self.assertTrue(_is_sparse(b)) self.assertTrue(_is_sparse_variable(bR)) apb = op(aR, bR) self.assertTrue(_is_sparse_variable(apb)) self.assertTrue(apb.type.dtype == aR.type.dtype, apb.type.dtype) self.assertTrue(apb.type.dtype == bR.type.dtype, apb.type.dtype) self.assertTrue(apb.type.format == aR.type.format, apb.type.format) self.assertTrue(apb.type.format == bR.type.format, apb.type.format) val = eval_outputs([apb]) self.assertTrue(val.shape == (3, 2)) if op is add: self.assertTrue(numpy.all(val.todense() == (a + b).todense())) ans = numpy.array([[1.0, 2], [3, 4], [5, 6]]) self.assertTrue(numpy.all(val.todense() == ans)) verify_grad_sparse(op, [a, b], structured=False) elif op is mul: self.assertTrue(numpy.all(val.todense() == (a.multiply(b)).todense())) ans = numpy.array([[1, 0], [9, 0], [0, 36]]) self.assertTrue(numpy.all(val.todense() == ans))
def _testDS(self, op, array1=numpy.array([[1., 0], [3, 0], [0, 6]]), array2=numpy.asarray([[0, 2.], [0, 4], [5, 0]])): for mtype in _mtypes: a = mtype(array1) aR = as_sparse_variable(a) self.assertFalse(aR.data is a) self.assertTrue(_is_sparse(a)) self.assertTrue(_is_sparse_variable(aR)) b = numpy.asarray(array2) bR = tensor.as_tensor_variable(b) self.assertFalse(bR.data is b) self.assertTrue(_is_dense(b)) self.assertTrue(_is_dense_variable(bR)) apb = op(aR, bR) self.assertTrue(apb.type.dtype == aR.type.dtype, apb.type.dtype) self.assertTrue(apb.type.dtype == bR.type.dtype, apb.type.dtype) val = eval_outputs([apb]) self.assertTrue(val.shape == (3, 2)) if op is add: self.assertTrue(_is_dense_variable(apb)) self.assertTrue(numpy.all(val == (a + b))) ans = numpy.array([[1., 2], [3, 4], [5, 6]]) self.assertTrue(numpy.all(val == ans)) elif op is mul: self.assertTrue(_is_sparse_variable(apb)) ans = numpy.array([[1, 0], [9, 0], [0, 36]]) self.assertTrue(numpy.all(val.todense() == (a.multiply(b)))) self.assertTrue(numpy.all(val.todense() == ans))
class Poisson(gof.op.Op): """Return a sparse having random values from a Poisson density with mean from the input. WARNING: This Op is NOT deterministic, as calling it twice with the same inputs will NOT give the same result. This is a violation of Theano's contract for Ops :param x: Sparse matrix. :return: A sparse matrix of random integers of a Poisson density with mean of `x` element wise. """ def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, x): x = as_sparse_variable(x) return gof.Apply(self, [x], [x.type()]) def perform(self, node, (x, ), (out, )): assert _is_sparse(x) assert x.format in ["csr", "csc"] out[0] = x.copy() out[0].data = numpy.asarray(numpy.random.poisson(out[0].data), dtype=x.dtype) out[0].eliminate_zeros()
def _testSD( self, op, array1=numpy.array([[1.0, 0], [3, 0], [0, 6]]), array2=numpy.asarray([[0, 2.0], [0, 4], [5, 0]]) ): for mtype in _mtypes: a = numpy.array(array1) aR = tensor.as_tensor_variable(a) self.assertFalse(aR.data is a) # constants are copied self.assertTrue(_is_dense(a)) self.assertTrue(_is_dense_variable(aR)) b = mtype(array2) bR = as_sparse_variable(b) self.assertFalse(bR.data is b) # constants are copied self.assertTrue(_is_sparse(b)) self.assertTrue(_is_sparse_variable(bR)) apb = op(aR, bR) self.assertTrue(apb.type.dtype == aR.type.dtype, apb.type.dtype) self.assertTrue(apb.type.dtype == bR.type.dtype, apb.type.dtype) val = eval_outputs([apb]) self.assertTrue(val.shape == (3, 2)) if op is add: self.assertTrue(_is_dense_variable(apb)) self.assertTrue(numpy.all(val == (a + b))) ans = numpy.array([[1.0, 2], [3, 4], [5, 6]]) self.assertTrue(numpy.all(val == ans)) elif op is mul: self.assertTrue(_is_sparse_variable(apb)) self.assertTrue(numpy.all(val.todense() == (b.multiply(a)))) self.assertTrue(numpy.all(val.todense() == numpy.array([[1, 0], [9, 0], [0, 36]])))
def test_basicSS(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500, 3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) xT = x.T self.assertTrue(_is_sparse_variable(xT)) zop = true_dot(x, xT) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500, 500)) self.assertTrue(type(z) is mtype) w = mtype((500, 500)) w[(10, 10)] = 1 w[(20, 20)] = 4 self.assertTrue(z.shape == w.shape) self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) #self.assertTrue(z == w) self.assertTrue(abs(z - w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def test_basicSS(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500,3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) xT = x.T self.assertTrue(_is_sparse_variable(xT)) zop = true_dot(x,xT) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500,500)) self.assertTrue(type(z) is mtype) w = mtype((500,500)) w[(10, 10)] = 1 w[(20, 20)] = 4 self.assertTrue(z.shape == w.shape) self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) #self.assertTrue(z == w) self.assertTrue(abs(z-w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def test_basicSD(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500,3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]]) self.assertTrue(_is_dense_variable(y)) zop = true_dot(x,y) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500,2)) self.assertTrue(type(z) is mtype) w = mtype((500,2)) w[(10, 0)] = 3. w[(20, 0)] = 4 w[(10, 1)] = 4 w[(20, 1)] = 2 self.assertTrue(z.shape == w.shape) self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) #self.assertTrue(z == w) self.assertTrue(abs(z-w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def test_basicSD(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500, 3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]]) self.assertTrue(_is_dense_variable(y)) zop = true_dot(x, y) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500, 2)) self.assertTrue(type(z) is mtype) w = mtype((500, 2)) w[(10, 0)] = 3. w[(20, 0)] = 4 w[(10, 1)] = 4 w[(20, 1)] = 2 self.assertTrue(z.shape == w.shape) self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) #self.assertTrue(z == w) self.assertTrue(abs(z - w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def perform(self, node, inputs, outputs): (x, ) = inputs (out, ) = outputs assert _is_sparse(x) assert x.format in ["csr", "csc"] out[0] = x.copy() out[0].data = np.asarray(np.random.poisson(out[0].data), dtype=x.dtype) out[0].eliminate_zeros()
def perform(self, node, inputs, outputs): (x,) = inputs (out,) = outputs assert _is_sparse(x) assert x.format in ["csr", "csc"] out[0] = x.copy() out[0].data = numpy.asarray(numpy.random.poisson(out[0].data), dtype=x.dtype) out[0].eliminate_zeros()
class Multinomial(gof.op.Op): """Return a sparse matrix having random values from a multinomial density having number of experiment `n` and probability of succes `p`. WARNING: This Op is NOT deterministic, as calling it twice with the same inputs will NOT give the same result. This is a violation of Theano's contract for Ops :param n: Tensor type vector or scalar representing the number of experiment for each row. If `n` is a scalar, it will be used for each row. :param p: Sparse matrix of probability where each row is a probability vector representing the probability of succes. N.B. Each row must sum to one. :return: A sparse matrix of random integers from a multinomial density for each row. :note: It will works only if `p` have csr format. """ def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, n, p): n = tensor.as_tensor_variable(n) p = as_sparse_variable(p) assert p.format in ["csr", "csc"] return gof.Apply(self, [n, p], [p.type()]) def perform(self, node, (n, p), (out, )): assert _is_sparse(p) if p.format != 'csr': raise NotImplemented() out[0] = p.copy() if n.ndim == 0: for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = numpy.random.multinomial(n, p.data[k:l]) elif n.ndim == 1: if n.shape[0] != p.shape[0]: raise ValueError('The number of element of n must be ' 'the same as the number of row of p.') for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = numpy.random.multinomial(n[i], p.data[k:l])
class SamplingDot(gof.op.Op): """ Operand for calculating the dot product DOT(X, Y) = Z when you only want to calculate a subset of Z. It is equivalent to P o (X . Y) where o is the element-wise product, X and Y operands of the dot product and P is a matrix that contains 1 when the corresponding element of Z should be calculated and 0 when it shouldn't. Note that SamplingDot has a different interface than DOT because SamplingDot requires X to be a MxK matrix while Y is a NxK matrix instead of the usual KxN matrix. It will work if the pattern is not binary value, but if the pattern doesn't have a high sparsity proportion it will be slower then a more optimized dot followed by a normal elemwise multiplication. """ def __eq__(self, other): return type(self) == type(other) def __hash__(self): return hash(type(self)) def __str__(self): return 'SamplingDot' def make_node(self, x, y, p): x = tensor.as_tensor_variable(x) y = tensor.as_tensor_variable(y) if not _is_sparse_variable(p): raise TypeError(p) #TODO: use it. dtype_out = scalar.upcast(x.type.dtype, y.type.dtype, p.type.dtype) return gof.Apply(self, [x, y, p], [p.type()]) def perform(self, node, (x, y, p), (out, )): if _is_sparse_variable(x): raise TypeError(x) if _is_sparse_variable(y): raise TypeError(y) if not _is_sparse(p): raise TypeError(p) rval = p.__class__(p.multiply(numpy.dot(x, y.T))) out[0] = rval
class EliminateZeros(gof.op.Op): def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, x): x = as_sparse_variable(x) return gof.Apply(self, [x], [x.type()]) def perform(self, node, (x, ), (out, )): assert _is_sparse(x) out[0] = x.copy() out[0].eliminate_zeros()
class Poisson(gof.op.Op): def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, x): x = as_sparse_variable(x) return gof.Apply(self, [x], [x.type()]) def perform(self, node, (x, ), (out, )): assert _is_sparse(x) out[0] = x.copy() out[0].data = numpy.asarray(numpy.random.poisson(out[0].data), dtype=x.dtype) out[0].eliminate_zeros()
class Cast(gof.op.Op): def __init__(self, out_type): self.out_type = out_type def __eq__(self, other): return (type(self) == type(other)) and self.out_type == other.out_type def __hash__(self): return hash(type(self)) ^ hash(self.out_type) def make_node(self, x): x = as_sparse_variable(x) return gof.Apply( self, [x], [SparseType(dtype=self.out_type, format=x.format).make_variable()]) def perform(self, node, (x, ), (out, )): assert _is_sparse(x) out[0] = x out[0].data = numpy.asarray(out[0].data, dtype=self.out_type)
def perform(self, node, inputs, outputs): (n, p) = inputs (out,) = outputs assert _is_sparse(p) if p.format != 'csr': raise NotImplemented() out[0] = p.copy() if n.ndim == 0: for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = numpy.random.multinomial(n, p.data[k:l]) elif n.ndim == 1: if n.shape[0] != p.shape[0]: raise ValueError('The number of element of n must be ' 'the same as the number of row of p.') for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = numpy.random.multinomial(n[i], p.data[k:l])
def perform(self, node, inputs, outputs): (n, p) = inputs (out, ) = outputs assert _is_sparse(p) if p.format != 'csr': raise NotImplemented() out[0] = p.copy() if n.ndim == 0: for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = np.random.multinomial(n, p.data[k:l]) elif n.ndim == 1: if n.shape[0] != p.shape[0]: raise ValueError('The number of element of n must be ' 'the same as the number of row of p.') for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = np.random.multinomial(n[i], p.data[k:l])
def test_basicDS(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500,3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]]) self.assertTrue(_is_dense_variable(y)) x.data = x.data.T y.data = y.data.T zop = true_dot(y, x) zop = transpose(true_dot(y, x)) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500,2)) # self.assertTrue(type(z) is mtype) w = mtype((500,2)) w[(10, 0)] = 3. w[(20, 0)] = 4 w[(10, 1)] = 4 w[(20, 1)] = 2 self.assertTrue(z.shape == w.shape) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(z == w) self.assertTrue(abs(z-w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
def test_basicDS(self): for mtype in _mtypes: x = as_sparse_variable(mtype((500, 3))) x.data[(10, 1)] = 1 x.data[(20, 2)] = 2 self.assertTrue(_is_sparse_variable(x)) y = tensor.as_tensor_variable([[1., 2], [3, 4], [2, 1]]) self.assertTrue(_is_dense_variable(y)) x.data = x.data.T y.data = y.data.T zop = true_dot(y, x) zop = transpose(true_dot(y, x)) self.assertTrue(_is_sparse_variable(zop)) z = eval_outputs([zop]) self.assertTrue(_is_sparse(z)) self.assertTrue(z.shape == (500, 2)) # self.assertTrue(type(z) is mtype) w = mtype((500, 2)) w[(10, 0)] = 3. w[(20, 0)] = 4 w[(10, 1)] = 4 w[(20, 1)] = 2 self.assertTrue(z.shape == w.shape) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(type(z) == type(w)) self.assertTrue(z.dtype == w.dtype) # Type should switch from csr to csc and vice-versa, so don't perform this test #self.assertTrue(z == w) self.assertTrue(abs(z - w).nnz == 0) z = z.todense() w = w.todense() self.assertTrue((z == w).all() == True)
class Multinomial(gof.op.Op): def __eq__(self, other): return (type(self) == type(other)) def __hash__(self): return hash(type(self)) def make_node(self, n, p): n = tensor.as_tensor_variable(n) p = as_sparse_variable(p) return gof.Apply(self, [n, p], [p.type()]) def perform(self, node, (n, p), (out, )): assert _is_sparse(p) if p.format != 'csr': raise NotImplemented() out[0] = p.copy() for i in xrange(p.shape[0]): k, l = p.indptr[i], p.indptr[i + 1] out[0].data[k:l] = numpy.random.multinomial(n[i], p.data[k:l])