def compound_dot(self, A, B, C, alpha=1.0, beta=0.0, relu=False, bsum=None): """ Doing following operations (* is dot product) C = alpha * A * B + beta * C C = alpha * A.T * B + beta * C C = alpha * A * B.T + beta * C. relu: if true applied before output (and prior to beta addition) The operation will be short-circuited to: out <- alpha * left * right if beta has value 0 (the default). Arguments: A, B (CPUTensor): input operands C (MCPUTensor): output alpha (float): scale A*B term beta (float): scale C term before sum relu (bool): whether to apply ReLu before output """ # checking type and shape assert A.dtype == B.dtype == C.dtype assert A.shape[0] == C.shape[0] assert B.shape[1] == C.shape[1] assert A.shape[1] == B.shape[0] # cleaner implementation, shall be equivalent to the one below # if relu: # C[:] = self.log(1. + self.exp(alpha * self.dot(A, B))) + beta * C # else: # C[:] = alpha * self.dot(A, B) + beta * C if not relu: if C._tensor.flags['C_CONTIGUOUS'] is not True: tmp = np.empty(C.shape, dtype=C.dtype) if beta != 0: tmp[:] = C._tensor math_cpu.blas_dot(A._tensor, B._tensor, tmp, alpha, beta) C._tensor[:] = tmp else: math_cpu.blas_dot(A._tensor, B._tensor, C._tensor, alpha, beta) else: # mfma: change np.multiply to mul if beta != 1: np.multiply(C._tensor, beta, C._tensor) tmp = np.empty(C.shape, dtype=C.dtype) np.dot(A._tensor, B._tensor, tmp) # mfma: change np.multiply to mul if alpha != 1: np.multiply(tmp, alpha, tmp) if relu: self.Relu(tmp, tmp) np.add(C._tensor, tmp, C._tensor) if bsum is not None: bsum[:] = self.sum(C, 1) return C