class TestMatMul(unittest.TestCase): def setUp(self): W = np.random.rand(4, 2) self.matmul = MatMul(W) self.x = np.random.rand(2, 4) def test_forward(self): out = self.matmul.forward(self.x) self.assertEqual((2, 2), out.shape) def test_backward(self): dout = self.matmul.forward(self.x) dx = self.matmul.backward(dout) self.assertEqual((2, 4), dx.shape)
def __new__(cls, mat, **kwargs): if not mat.is_Matrix: return mat**(-1) try: return mat.eval_inverse(**kwargs) except (AttributeError, NotImplementedError): pass if hasattr(mat, 'inv'): return mat.inv() if mat.is_Inverse: return mat.arg if mat.is_Identity: return mat if not mat.is_square: raise ShapeError("Inverse of non-square matrix %s" % mat) if mat.is_Mul: try: return MatMul(*[Inverse(arg) for arg in mat.args[::-1]]) except ShapeError: pass return MatPow.__new__(cls, mat, -1)
def __new__(cls, *args): args = map(matrixify, args) args = [arg for arg in args if arg!=0] if not all(arg.is_Matrix for arg in args): raise ValueError("Mix of Matrix and Scalar symbols") # Check that the shape of the args is consistent A = args[0] for B in args[1:]: if A.shape != B.shape: raise ShapeError("Matrices %s and %s are not aligned"%(A,B)) expr = Add.__new__(cls, *args) if expr == S.Zero: return ZeroMatrix(*args[0].shape) expr = matrixify(expr) if expr.is_Mul: return MatMul(*expr.args) # Clear out Identities # Any zeros around? if expr.is_Add and any(M.is_ZeroMatrix for M in expr.args): newargs = [M for M in expr.args if not M.is_ZeroMatrix] # clear out if len(newargs)==0: # Did we lose everything? return ZeroMatrix(*args[0].shape) if expr.args != newargs: # Removed some 0's but not everything? return MatAdd(*newargs) # Repeat with simpler expr return expr
def bc_matmul(expr): factor, matrices = expr.as_coeff_matrices() i = 0 while (i + 1 < len(matrices)): A, B = matrices[i:i + 2] if A.is_BlockMatrix and B.is_BlockMatrix: matrices[i] = A._blockmul(B) matrices.pop(i + 1) else: i += 1 return MatMul(factor, *matrices)
def transpose(self): if isinstance(self, Transpose): return self.arg if self.is_Mul: return MatMul(*[Transpose(arg) for arg in self.args[::-1]]) if self.is_Add: return MatAdd(*[Transpose(arg) for arg in self.args]) try: return self._eval_transpose() except (AttributeError, NotImplementedError): return Basic.__new__(Transpose, self)
def __new__(cls, mat): if not mat.is_Matrix: return mat if isinstance(mat, Transpose): return mat.arg if hasattr(mat, 'transpose'): return mat.transpose() if mat.is_Mul: return MatMul(*[Transpose(arg) for arg in mat.args[::-1]]) if mat.is_Add: return MatAdd(*[Transpose(arg) for arg in mat.args]) return Basic.__new__(cls, mat)
class SimpleCBOW: def __init__(self, vocab_size, hidden_size): V = vocab_size H = hidden_size # Initialise weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # Generate layers self.in_layer_0 = MatMul(W_in) self.in_layer_1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftMaxWithLoss() # Integrate all weight and gradients in a list layers = [self.in_layer_0, self.in_layer_1, self.out_layer] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads # Assign a word embedding to an instance variable self.word_vecs = W_in def forward(self, contexts, target): h0 = self.in_layer_0.forward(contexts[:, 0]) h1 = self.in_layer_1.forward(contexts[:, 1]) h = (h0 + h1) * 0.5 score = self.out_layer.forward(h) loss = self.loss_layer.forward(score, target) return loss def backward(self, dout=1): ds = self.loss_layer.backward(dout) da = self.out_layer.backward(ds) da *= 0.5 self.in_layer_1.backward(da) self.in_layer_0.backward(da) return None
def __init__(self, vocab_size, hidden_size): V = vocab_size H = hidden_size # Initialise weight W_in = 0.01 * np.random.randn(V, H).astype('f') W_out = 0.01 * np.random.randn(H, V).astype('f') # Generate layers self.in_layer_0 = MatMul(W_in) self.in_layer_1 = MatMul(W_in) self.out_layer = MatMul(W_out) self.loss_layer = SoftMaxWithLoss() # Integrate all weight and gradients in a list layers = [self.in_layer_0, self.in_layer_1, self.out_layer] self.params = [] self.grads = [] for layer in layers: self.params += layer.params self.grads += layer.grads # Assign a word embedding to an instance variable self.word_vecs = W_in
def __rmul__(self, other): return MatMul(other, self).doit()
def __mul__(self, other): return MatMul(self, other).doit()
def __neg__(self): return MatMul(S.NegativeOne, self).doit()
def as_coeff_mmul(self): return 1, MatMul(self)
def setUp(self): W = np.random.rand(4, 2) self.matmul = MatMul(W) self.x = np.random.rand(2, 4)
def block_collapse(expr): """Evaluates a block matrix expression >>> from sympy import MatrixSymbol, BlockMatrix, symbols, Identity, Matrix, ZeroMatrix, block_collapse >>> n,m,l = symbols('n m l') >>> X = MatrixSymbol('X', n, n) >>> Y = MatrixSymbol('Y', m ,m) >>> Z = MatrixSymbol('Z', n, m) >>> B = BlockMatrix([[X, Z], [ZeroMatrix(m, n), Y]]) >>> print B [X, Z] [0, Y] >>> C = BlockMatrix([[Identity(n), Z]]) >>> print C [I, Z] >>> print block_collapse(C*B) [X, Z + Z*Y] """ if expr.__class__ in [tuple, list, set, frozenset]: return expr.__class__([block_collapse(arg) for arg in expr]) if expr.__class__ in [Tuple, FiniteSet]: return expr.__class__(*[block_collapse(arg) for arg in expr]) if not expr.is_Matrix or (not expr.is_Add and not expr.is_Mul and not expr.is_Transpose and not expr.is_Pow and not expr.is_Inverse): return expr if expr.is_Transpose: expr = Transpose(block_collapse(expr.arg)) if expr.is_Transpose and expr.arg.is_BlockMatrix: expr = expr.arg.eval_transpose() return expr if expr.is_Inverse: return Inverse(block_collapse(expr.arg)) # Recurse on the subargs args = list(expr.args) for i in range(len(args)): arg = args[i] newarg = block_collapse(arg) while(newarg != arg): # Repeat until no new changes arg = newarg newarg = block_collapse(arg) args[i] = newarg if tuple(args) != expr.args: expr = expr.__class__(*args) # Turn -[X, Y] into [-X, -Y] if (expr.is_Mul and len(expr.args)==2 and not expr.args[0].is_Matrix and expr.args[1].is_BlockMatrix): if expr.args[1].is_BlockDiagMatrix: return BlockDiagMatrix( *[expr.args[0]*arg for arg in expr.args[1].diag]) else: return BlockMatrix(expr.args[0]*expr.args[1].mat) if expr.is_Add: nonblocks = [arg for arg in expr.args if not arg.is_BlockMatrix] blocks = [arg for arg in expr.args if arg.is_BlockMatrix] if not blocks: return MatAdd(*nonblocks) block = blocks[0] for b in blocks[1:]: block = block._blockadd(b) if block.blockshape == (1,1): # Bring all the non-blocks into the block_matrix mat = Matrix(1, 1, (block.blocks[0,0] + MatAdd(*nonblocks), )) return BlockMatrix(mat) # Add identities to the blocks as block identities for i, mat in enumerate(nonblocks): c, M = mat.as_coeff_Mul() if M.is_Identity and block.is_structurally_symmetric: block_id = BlockDiagMatrix( *[c*Identity(k) for k in block.rowblocksizes]) nonblocks.pop(i) block = block._blockadd(block_id) return MatAdd(*(nonblocks+[block])) if expr.is_Mul: nonmatrices = [arg for arg in expr.args if not arg.is_Matrix] matrices = [arg for arg in expr.args if arg.is_Matrix] i = 0 while (i+1 < len(matrices)): A, B = matrices[i:i+2] if A.is_BlockMatrix and B.is_BlockMatrix: matrices[i] = A._blockmul(B) matrices.pop(i+1) else: i+=1 return MatMul(*(nonmatrices + matrices)) if expr.is_Pow: rv = expr.base for i in range(1, expr.exp): rv = rv._blockmul(expr.base) return rv
def __div__(self, other): return MatMul(self, other**S.NegativeOne)
def _entry(self, i, j): if self.exp.is_Integer: # Make an explicity MatMul out of the MatPow return MatMul(*[self.base for k in range(self.exp)])._entry(i, j)
def conjugate(self): return MatMul(*[arg.conjugate() for arg in self.args])
def adjoint(self): return MatMul(*[arg.adjoint() for arg in self.args[::-1]])
def bc_matpow(expr): if expr.exp.is_number and expr.exp.is_integer: return MatMul(*([expr.base] * expr.exp)) return expr