Python Matmul.update Examples

Programming Language: Python

Namespace/Package Name: layer

Class/Type: Matmul

Method/Function: update

Examples at hotexamples.com: 3

Python Matmul.update - 3 examples found. These are the top rated real world Python examples of layer.Matmul.update extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Matmul(7)

objective(6)

build(4)

specification(4)

function(3)

gradient(3)

update(3)

T(2)

X(2)

_Y(2)

_dY(2)

gradient_numerical(2)

_D(1)

_dX(1)

class_id(1)

lr(1)

Example #1

Show file

File: test_020_binary_classifier.py Project: oonisim/python-programs

def train_binary_classifier(N: int,
                            D: int,
                            M: int,
                            X: np.ndarray,
                            T: np.ndarray,
                            W: np.ndarray,
                            log_loss_function: Callable,
                            optimizer: Optimizer,
                            num_epochs: int = 100,
                            test_numerical_gradient: bool = False,
                            log_level: int = logging.ERROR,
                            callback: Callable = None):
    """Test case for binary classification with matmul + log loss.
    Args:
        N: Batch size
        D: Number of features
        M: Number of nodes. 1 for sigmoid and 2 for softmax
        X: train data
        T: labels
        W: weight
        log_loss_function: cross entropy logg loss function
        optimizer: Optimizer
        num_epochs: Number of epochs to run
        test_numerical_gradient: Flag if test the analytical gradient with the numerical one.
        log_level: logging level
        callback: callback function to invoke at the each epoch end.
    """
    name = __name__
    assert isinstance(T, np.ndarray) and np.issubdtype(
        T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N
    assert isinstance(
        X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[
            0] == N and X.shape[1] == D
    assert isinstance(
        W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[
            0] == M and W.shape[1] == D + 1
    assert num_epochs > 0 and N > 0 and D > 0

    assert ((log_loss_function == sigmoid_cross_entropy_log_loss and M == 1) or
            (log_loss_function == softmax_cross_entropy_log_loss and M >= 2))

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    loss = CrossEntropyLogLoss(name="loss",
                               num_nodes=M,
                               log_loss_function=log_loss_function,
                               log_level=log_level)

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    matmul = Matmul(name="matmul",
                    num_nodes=M,
                    W=W,
                    optimizer=optimizer,
                    log_level=log_level)
    matmul.objective = loss.function

    num_no_progress: int = 0  # how many time when loss L not decreased.
    loss.T = T
    history: List[np.ndarray] = [loss.function(matmul.function(X))]

    for i in range(num_epochs):
        # --------------------------------------------------------------------------------
        # Layer forward path
        # Calculate the matmul output Y=f(X), and get the loss L = objective(Y)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        # --------------------------------------------------------------------------------
        Y = matmul.function(X)
        L = loss.function(Y)

        if not (i % 50): print(f"iteration {i} Loss {L}")
        Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L)

        # --------------------------------------------------------------------------------
        # Constraint: 1. Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # --------------------------------------------------------------------------------
        if L >= history[-1] and (i % 20) == 1:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s].",
                i, L, history[-1])
            if (num_no_progress := num_no_progress + 1) > 20:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                # break
        else:
            num_no_progress = 0

        history.append(L)

        # --------------------------------------------------------------------------------
        # Expected dL/dW.T = X.T @ dL/dY = X.T @ (P-T) / N, and dL/dX = dL/dY @ W
        # P = sigmoid(X) or softmax(X)
        # dL/dX = dL/dY * W is to use W BEFORE updating W.
        # --------------------------------------------------------------------------------
        P = None
        if log_loss_function == sigmoid_cross_entropy_log_loss:
            # P = sigmoid(np.matmul(X, W.T))
            P = sigmoid(np.matmul(matmul.X, matmul.W.T))
            P = P - T.reshape(-1, 1)  # T(N,) -> T(N,1) to align with P(N,1)
            assert P.shape == (
                N, 1), "P.shape is %s T.shape is %s" % (P.shape, T.shape)

        elif log_loss_function == softmax_cross_entropy_log_loss:
            # matmul.X.shape is (N, D+1), matmul.W.T.shape is (D+1, M)
            P = softmax(np.matmul(matmul.X, matmul.W.T))  # (N, M)
            P[np.arange(N), T] -= 1

        EDX = np.matmul(P / N, matmul.W)  # (N,M) @ (M, D+1) -> (N, D+1)
        EDX = EDX[::, 1:]  # Hide the bias    -> (N, D)
        EDW = np.matmul(matmul.X.T,
                        P / N).T  # ((D+1,N) @ (N, M)).T -> (M, D+1)

        # --------------------------------------------------------------------------------
        # Layer backward path
        # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY.
        # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # --------------------------------------------------------------------------------
        before = copy.deepcopy(matmul.W)
        dY = loss.gradient(TYPE_FLOAT(1))
        dX = matmul.gradient(dY)

        # gradient descent and get the analytical gradients dS=[dL/dX, dL/dW]
        # dL/dX.shape = (N, D)
        # dL/dW.shape = (M, D+1)
        dS = matmul.update()
        dW = dS[0]
        # --------------------------------------------------------------------------------
        #  Constraint 1. W in the matmul has been updated by the gradient descent.
        # --------------------------------------------------------------------------------
        Logger.debug("W after is \n%s", matmul.W)
        assert not np.array_equal(before, matmul.W), "W has not been updated."

        if not validate_against_expected_gradient(EDX, dX):
            Logger.warning("Expected dL/dX \n%s\nDiff\n%s", EDX, EDX - dX)
        if not validate_against_expected_gradient(EDW, dW):
            Logger.warning("Expected dL/dW \n%s\nDiff\n%s", EDW, EDW - dW)

        if test_numerical_gradient:
            # --------------------------------------------------------------------------------
            # Numerical gradients gn=[dL/dX, dL/dW]
            # dL/dX.shape = (N, D)
            # dL/dW.shape = (M, D+1)
            # --------------------------------------------------------------------------------
            gn = matmul.gradient_numerical()
            validate_against_numerical_gradient([dX] + dS, gn, Logger)

        if callback:
            # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0))
            callback(W=matmul.W[0])

Example #2

Show file

File: test_020_matmul.py Project: oonisim/python-programs

def disabled_test_020_matmul_round_trip():
    """
    TODO: Disabled as need to re-design numerical_jacobian for 32 bit float e.g TF.

    Objective:
        Verify the forward and backward paths at matmul.

    Expected:
        Forward path:
        1. Matmul function(X) == X @ W.T
        2. Numerical gradient should be the same with numerical Jacobian

        Backward path:
        3. Analytical gradient dL/dX == dY @ W
        4. Analytical dL/dW == X.T @ dY
        5. Analytical gradients are similar to the numerical gradient ones

        Gradient descent
        6. W is updated via the gradient descent.
        7. Objective L is decreasing via the gradient descent.

    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        # --------------------------------------------------------------------------------
        # Instantiate a Matmul layer
        # --------------------------------------------------------------------------------
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = np.random.randint(1, NUM_MAX_NODES)
        D: int = np.random.randint(1, NUM_MAX_FEATURES)
        W = weights.he(M, D + 1)
        name = "test_020_matmul_methods"

        def objective(X: np.ndarray) -> Union[float, np.ndarray]:
            """Dummy objective function to calculate the loss L"""
            return np.sum(X)

        # Test both static instantiation and build()
        if TYPE_FLOAT(np.random.uniform()) < 0.5:
            matmul = Matmul(name=name,
                            num_nodes=M,
                            W=W,
                            log_level=logging.DEBUG)
        else:
            matmul_spec = {
                _NAME: "test_020_matmul_builder_to_fail_matmul_spec",
                _NUM_NODES: M,
                _NUM_FEATURES: D,
                _WEIGHTS: {
                    _SCHEME: "he",
                },
                _OPTIMIZER: {
                    _SCHEME: "sGd"
                }
            }
            matmul = Matmul.build(matmul_spec)

        matmul.objective = objective

        # ================================================================================
        # Layer forward path
        # Calculate the layer output Y=f(X), and get the loss L = objective(Y)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        #
        # Note that bias columns are added inside the matmul layer instance, hence
        # matmul.X.shape is (N, 1+D), matmul.W.shape is (M, 1+D)
        # ================================================================================
        X = np.random.randn(N, D).astype(TYPE_FLOAT)
        Logger.debug("%s: X is \n%s", name, X)

        # pylint: disable=not-callable
        Y = matmul.function(X)
        # pylint: disable=not-callable
        L = matmul.objective(Y)

        # Constraint 1 : Matmul outputs Y should be [email protected]
        assert np.array_equal(Y, np.matmul(matmul.X, matmul.W.T))

        # Constraint 2: Numerical gradient should be the same with numerical Jacobian
        GN = matmul.gradient_numerical()  # [dL/dX, dL/dW]

        # DO NOT use matmul.function() as the objective function for numerical_jacobian().
        # The state of the layer will be modified.
        # LX = lambda x: matmul.objective(matmul.function(x))
        def LX(x):
            y = np.matmul(x, matmul.W.T)
            # pylint: disable=not-callable
            return matmul.objective(y)

        EGNX = numerical_jacobian(LX,
                                  matmul.X)  # Numerical dL/dX including bias
        EGNX = EGNX[::, 1::]  # Remove bias for dL/dX
        assert np.array_equal(GN[0], EGNX), \
            "GN[0]\n%s\nEGNX=\n%s\n" % (GN[0], EGNX)

        # DO NOT use matmul.function() as the objective function for numerical_jacobian().
        # The state of the layer will be modified.
        # LW = lambda w: matmul.objective(np.matmul(X, w.T))
        def LW(w):
            Y = np.matmul(matmul.X, w.T)
            # pylint: disable=not-callable
            return matmul.objective(Y)

        EGNW = numerical_jacobian(LW,
                                  matmul.W)  # Numerical dL/dW including bias
        assert np.array_equal(GN[1], EGNW)  # No need to remove bias

        # ================================================================================
        # Layer backward path
        # Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dummy dL/dY.
        # ================================================================================
        dY = np.ones_like(Y)
        dX = matmul.gradient(dY)

        # Constraint 3: Matmul gradient dL/dX should be dL/dY @ W. Use a dummy dL/dY = 1.0.
        expected_dX = np.matmul(dY, matmul.W)
        expected_dX = expected_dX[::, 1::  # Omit bias
                                  ]
        assert np.array_equal(dX, expected_dX)

        # Constraint 5: Analytical gradient dL/dX close to the numerical gradient GN.
        assert np.all(np.abs(dX - GN[0]) < GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            "dX need close to GN[0]. dX:\n%s\ndiff \n%s\n" % (dX, dX-GN[0])

        # --------------------------------------------------------------------------------
        # Gradient update.
        # Run the gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # --------------------------------------------------------------------------------
        # Python passes the reference to W, hence it is directly updated by the gradient-
        # descent to avoid a temporary copy. Backup W before to compare before/after.
        backup = copy.deepcopy(W)

        # Gradient descent and returns analytical dL/dX, dL/dW
        dS = matmul.update()
        dW = dS[0]

        # Constraint 6.: W has been updated by the gradient descent.
        assert np.any(backup != matmul.W), "W has not been updated "

        # Constraint 5: the numerical gradient (dL/dX, dL/dW) are closer to the analytical ones.
        assert validate_against_expected_gradient(GN[0], dX), \
            "dX=\n%s\nGN[0]=\n%sdiff=\n%s\n" % (dX, GN[0], (dX-GN[0]))
        assert validate_against_expected_gradient(GN[1], dW), \
            "dW=\n%s\nGN[1]=\n%sdiff=\n%s\n" % (dW, GN[1], (dW-GN[1]))

        # Constraint 7: gradient descent progressing with the new objective L(Yn+1) < L(Yn)
        # pylint: disable=not-callable
        assert np.all(np.abs(objective(matmul.function(X)) < L))

    profiler.disable()
    profiler.print_stats(sort="cumtime")

Example #3

Show file

File: base.py Project: oonisim/python-programs

def validate_relu_neuron_training(matmul: Matmul,
                                  activation: ReLU,
                                  loss: CrossEntropyLogLoss,
                                  X: np.ndarray,
                                  T: np.ndarray,
                                  num_epochs: int = 100,
                                  test_numerical_gradient: bool = False,
                                  callback: Callable = None):
    activation.objective = loss.function
    matmul.objective = compose(activation.function, loss.function)
    objective = compose(matmul.function, matmul.objective)

    num_no_progress: int = 0  # how many time when loss L not decreased.
    history: List[np.ndarray] = []

    loss.T = T
    for i in range(num_epochs):
        L = objective(X)
        N = X.shape[0]
        P = softmax(relu(np.matmul(matmul.X, matmul.W.T)))
        EDA = expected_gradient_from_log_loss(P=P, T=T, N=N)

        # ********************************************************************************
        # Constraint: Expected gradients must match actual
        # ********************************************************************************
        validate_relu_neuron_round_trip(matmul=matmul,
                                        activation=activation,
                                        X=X,
                                        dA=EDA)

        # --------------------------------------------------------------------------------
        # gradient descent and get the analytical dL/dX, dL/dW
        # --------------------------------------------------------------------------------
        previous_W = copy.deepcopy(matmul.W)
        matmul.update()  # dL/dX, dL/dW

        # ********************************************************************************
        #  Constraint. W in the matmul has been updated by the gradient descent.
        # ********************************************************************************
        Logger.debug("W after is \n%s", matmul.W)
        if np.array_equal(previous_W, matmul.W):
            Logger.warning("W has not been updated")

        # ********************************************************************************
        # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # ********************************************************************************
        if i > 0 and L >= history[-1]:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.",
                i, L, history[-1], num_no_progress + 1)
            # --------------------------------------------------------------------------------
            # Reduce the learning rate can make the situation worse.
            # When reduced the lr every time L >= history, the (L >= history) became successive
            # and eventually exceeded 50 successive non-improvement ending in failure.
            # Keep the learning rate make the L>=history more frequent but still up to 3
            # successive events, and the training still kept progressing.
            # --------------------------------------------------------------------------------
            num_no_progress += 1
            if num_no_progress > 5:
                matmul.lr = matmul.lr * 0.95

            if num_no_progress > 50:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                break
        else:
            num_no_progress = 0

        history.append(L)

        if callback:
            callback(W=matmul.W)

    return history