コード例 #1
0
ファイル: nervanamkl.py プロジェクト: NervanaSystems/neon
    def compound_dot(self, A, B, C, alpha=1.0, beta=0.0, relu=False, bsum=None):
        """
        Doing following operations (* is dot product)
        C = alpha * A * B   + beta * C
        C = alpha * A.T * B + beta * C
        C = alpha * A * B.T + beta * C.

        relu: if true applied before output (and prior to beta addition)

        The operation will be short-circuited to: out <- alpha * left * right
        if beta has value 0 (the default).

        Arguments:
            A, B (CPUTensor): input operands
            C (MCPUTensor): output
            alpha (float): scale A*B term
            beta (float): scale C term before sum
            relu (bool): whether to apply ReLu before output
        """

        # checking type and shape
        assert A.dtype == B.dtype == C.dtype
        assert A.shape[0] == C.shape[0]
        assert B.shape[1] == C.shape[1]
        assert A.shape[1] == B.shape[0]

        # cleaner implementation, shall be equivalent to the one below
        # if relu:
        #     C[:] = self.log(1. + self.exp(alpha * self.dot(A, B))) + beta * C
        # else:
        #     C[:] = alpha * self.dot(A, B) + beta * C

        if not relu:
            if C._tensor.flags['C_CONTIGUOUS'] is not True:
                tmp = np.empty(C.shape, dtype=C.dtype)
                if beta != 0:
                    tmp[:] = C._tensor
                math_cpu.blas_dot(A._tensor, B._tensor, tmp, alpha, beta)
                C._tensor[:] = tmp
            else:
                math_cpu.blas_dot(A._tensor, B._tensor, C._tensor, alpha, beta)
        else:
            # mfma: change np.multiply to mul
            if beta != 1:
                np.multiply(C._tensor, beta, C._tensor)
            tmp = np.empty(C.shape, dtype=C.dtype)
            np.dot(A._tensor, B._tensor, tmp)
            # mfma: change np.multiply to mul
            if alpha != 1:
                np.multiply(tmp, alpha, tmp)
            if relu:
                self.Relu(tmp, tmp)
            np.add(C._tensor, tmp, C._tensor)
        if bsum is not None:
            bsum[:] = self.sum(C, 1)

        return C
コード例 #2
0
ファイル: nervanamkl.py プロジェクト: alexleethinker/neon
    def compound_dot(self, A, B, C, alpha=1.0, beta=0.0, relu=False, bsum=None):
        """
        Doing following operations (* is dot product)
        C = alpha * A * B   + beta * C
        C = alpha * A.T * B + beta * C
        C = alpha * A * B.T + beta * C.

        relu: if true applied before output (and prior to beta addition)

        The operation will be short-circuited to: out <- alpha * left * right
        if beta has value 0 (the default).

        Arguments:
            A, B (CPUTensor): input operands
            C (MCPUTensor): output
            alpha (float): scale A*B term
            beta (float): scale C term before sum
            relu (bool): whether to apply ReLu before output
        """

        # checking type and shape
        assert A.dtype == B.dtype == C.dtype
        assert A.shape[0] == C.shape[0]
        assert B.shape[1] == C.shape[1]
        assert A.shape[1] == B.shape[0]

        # cleaner implementation, shall be equivalent to the one below
        # if relu:
        #     C[:] = self.log(1. + self.exp(alpha * self.dot(A, B))) + beta * C
        # else:
        #     C[:] = alpha * self.dot(A, B) + beta * C

        if not relu:
            if C._tensor.flags['C_CONTIGUOUS'] is not True:
                tmp = np.empty(C.shape, dtype=C.dtype)
                if beta != 0:
                    tmp[:] = C._tensor
                math_cpu.blas_dot(A._tensor, B._tensor, tmp, alpha, beta)
                C._tensor[:] = tmp
            else:
                math_cpu.blas_dot(A._tensor, B._tensor, C._tensor, alpha, beta)
        else:
            # mfma: change np.multiply to mul
            if beta != 1:
                np.multiply(C._tensor, beta, C._tensor)
            tmp = np.empty(C.shape, dtype=C.dtype)
            np.dot(A._tensor, B._tensor, tmp)
            # mfma: change np.multiply to mul
            if alpha != 1:
                np.multiply(tmp, alpha, tmp)
            if relu:
                self.Relu(tmp, tmp)
            np.add(C._tensor, tmp, C._tensor)
        if bsum is not None:
            bsum[:] = self.sum(C, 1)

        return C