def test_020_matmul_build_specification():
    name = "matmul01"
    num_nodes = 8
    num_features = 2
    weights_initialization_scheme = "he"
    expected_spec = {
        _SCHEME: Matmul.class_id(),
        _PARAMETERS: {
            _NAME: name,
            _NUM_NODES: num_nodes,
            _NUM_FEATURES: num_features,  # NOT including bias
            _WEIGHTS: {
                _SCHEME: weights_initialization_scheme
            },
            _OPTIMIZER: SGD.specification(name="sgd")
        }
    }
    actual_spec = Matmul.specification(
        name=name,
        num_nodes=num_nodes,
        num_features=num_features,
        weights_initialization_scheme=weights_initialization_scheme,
    )
    assert expected_spec == actual_spec, \
        "expected\n%s\nactual\n%s\n" % (expected_spec, actual_spec)
def _instantiate(name: str, num_nodes: int, num_features: int, objective=None):
    category = TYPE_FLOAT(np.random.uniform())
    if category < 0.3:
        W = weights.he(num_nodes, num_features + 1)
    elif category < 0.7:
        W = weights.xavier(num_nodes, num_features + 1)
    else:
        W = weights.uniform(num_nodes, num_features + 1)

    matmul = Matmul(name=name, num_nodes=num_nodes, W=W)
    if objective is not None:
        matmul.objective = objective
    return matmul
def test_020_matmul_instantiation():
    """
    Objective:
        Verify the initialized layer instance provides its properties.
    Expected:
        * name, num_nodes, M, log_level are the same as initialized.
        * X, T, dX, objective returns what is set.
        * N, M property are provided after X is set.
        * Y, dY properties are provided after they are set.
    """
    def objective(X: np.ndarray) -> Union[float, np.ndarray]:
        """Dummy objective function"""
        return np.sum(X)

    for _ in range(NUM_MAX_TEST_TIMES):
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = np.random.randint(1, NUM_MAX_NODES)
        D: int = np.random.randint(1, NUM_MAX_FEATURES)
        name = "test_020_matmul_instantiation"
        matmul = Matmul(name=name,
                        num_nodes=M,
                        W=weights.he(M, D + 1),
                        log_level=logging.DEBUG)
        matmul.objective = objective

        assert matmul.name == name
        assert matmul.num_nodes == matmul.M == M

        matmul._D = D
        assert matmul.D == D

        X = np.random.randn(N, D).astype(TYPE_FLOAT)
        matmul.X = X
        assert np.array_equal(matmul.X, X)
        assert matmul.N == N == X.shape[0]

        matmul._dX = X
        assert np.array_equal(matmul.dX, X)

        T = np.random.randint(0, M, N).astype(TYPE_LABEL)
        matmul.T = T
        assert np.array_equal(matmul.T, T)

        matmul._Y = np.dot(X, X.T)
        assert np.array_equal(matmul.Y, np.dot(X, X.T))

        matmul._dY = np.array(0.9)
        assert matmul._dY == np.array(0.9)

        matmul.logger.debug("This is a pytest")

        assert matmul.objective == objective
def test_020_matmul_instantiation_to_fail():
    """
    Objective:
        Verify the layer class validates the initialization parameter constraints.
    Expected:
        Initialization detects parameter constraints not meet and fails.
    """
    name = "test_020_matmul_instantiation_to_fail"
    for _ in range(NUM_MAX_TEST_TIMES):
        M: int = np.random.randint(1, NUM_MAX_NODES)
        D = 1
        # Constraint: Name is string with length > 0.
        try:
            Matmul(name="", num_nodes=1, W=weights.xavier(M, D + 1))
            raise RuntimeError(
                "Matmul initialization with invalid name must fail")
        except AssertionError:
            pass

        # Constraint: num_nodes > 1
        try:
            Matmul(name="test_020_matmul",
                   num_nodes=0,
                   W=weights.xavier(M, D + 1))
            raise RuntimeError("Matmul(num_nodes<1) must fail.")
        except AssertionError:
            pass

        # Constraint: logging level is correct.
        try:
            Matmul(name="test_020_matmul",
                   num_nodes=M,
                   W=weights.xavier(M, D + 1),
                   log_level=-1)
            raise RuntimeError(
                "Matmul initialization with invalid log level must fail")
        except (AssertionError, KeyError) as e:
            pass

        # Matmul instance creation fails as W.shape[1] != num_nodes
        try:
            Matmul(name="", num_nodes=1, W=weights.xavier(2, D + 1))
            raise RuntimeError(
                "Matmul initialization with invalid name must fail")
        except AssertionError:
            pass
def test_020_matmul_builder_to_fail_weight_spec():
    """
    Objective:
        Verify the Matmul.build()
    Expected:
        build() parse the spec and fail with invalid weight configurations
    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        M = np.random.randint(1, 100)
        D = np.random.randint(1, 100)  # NOT including bias

        # ----------------------------------------------------------------------
        # Validate the correct specification.
        # NOTE: Invalidate one parameter at a time from the correct one.
        # Otherwise not sure what you are testing.
        # ----------------------------------------------------------------------
        valid_matmul_spec = {
            _NAME: "test_020_matmul_builder_to_fail_matmul_spec",
            _NUM_NODES: M,
            _NUM_FEATURES: D,
            _WEIGHTS: {
                _SCHEME: "he"
            }
        }
        try:
            Matmul.build(valid_matmul_spec)
        except Exception as e:
            raise RuntimeError("Matmul.build() must succeed with %s" %
                               valid_matmul_spec)

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_WEIGHTS][_SCHEME] = "invalid_scheme"
        try:
            Matmul.build(matmul_spec)
            raise RuntimeError(
                "Matmul.build() must fail with invalid weight scheme")
        except AssertionError:
            pass

    profiler.disable()
    profiler.print_stats(sort="cumtime")
Beispiel #6
0
 def output(m, d):
     return {
         "matmul":
         Matmul.specification(
             name="matmul",
             num_nodes=m,
             num_features=d,
             weights_initialization_scheme="he",
             weights_optimizer_specification=optimizer.SGD.specification(
                 lr=0.05, l2=1e-3)),
         "loss":
         CrossEntropyLogLoss.specification(name="loss", num_nodes=m)
     }
def test():
    M = 1
    D = 2
    N = 100

    X, T, V = linear_separable(d=D, n=N)
    x_min, x_max = X[:, 0].min(), X[:, 0].max()
    y_min, y_max = X[:, 1].min(), X[:, 1].max()

    sigmoid_classifier_specification = {
        _NAME: "softmax_classifier",
        _NUM_NODES: M,
        _LOG_LEVEL: logging.ERROR,
        _COMPOSITE_LAYER_SPEC: {
            "matmul01":
            Matmul.specification(
                name="matmul",
                num_nodes=M,
                num_features=D,
                weights_initialization_scheme="he",
                weights_optimizer_specification=SGD.specification(
                    lr=TYPE_FLOAT(0.2), l2=TYPE_FLOAT(1e-3))),
            "loss":
            CrossEntropyLogLoss.specification(
                name="loss",
                num_nodes=M,
                loss_function=sigmoid_cross_entropy_log_loss.__qualname__)
        }
    }
    logistic_classifier = SequentialNetwork.build(
        specification=sigmoid_classifier_specification, )

    for i in range(50):
        logistic_classifier.train(X=X, T=T)

    prediction = logistic_classifier.predict(
        np.array([-1., -1.], dtype=TYPE_FLOAT))
    np.isin(prediction, [0, 1])
    print(prediction)
Beispiel #8
0
 def inference(index: int, m: int, d: int) -> Dict[str, dict]:
     """Build matmul-bn-activation specifications
     Args:
         index: stack position in the network
         m: number of outputs (== number of nodes)
         d: number of features in the input
     """
     return {
         f"matmul{index:03d}":
         Matmul.specification(
             name=f"matmul{index:03d}",
             num_nodes=m,
             num_features=d,
             weights_initialization_scheme="he",
             weights_optimizer_specification=optimizer.SGD.specification(
                 lr=0.05, l2=1e-3)),
         f"bn{index:03d}":
         BatchNormalization.specification(
             name=f"bn{index:03d}",
             num_nodes=m,
             gamma_optimizer_specification=optimizer.SGD.specification(
                 lr=0.05, l2=1e-3),
             beta_optimizer_specification=optimizer.SGD.specification(
                 lr=0.05,
                 l2=1e-3,
             ),
             momentum=0.9),
         f"activation{index:03d}":
         ReLU.specification(
             name=f"relu{index:03d}",
             num_nodes=m,
         ) if activation == ReLU.class_id() else Sigmoid.specification(
             name=f"sigmoid{index:03d}",
             num_nodes=m,
         )
     }
def train_matmul_bn_relu_classifier(N: int,
                                    D: int,
                                    M: int,
                                    X: np.ndarray,
                                    T: np.ndarray,
                                    W: np.ndarray,
                                    log_loss_function: Callable,
                                    optimizer: Optimizer,
                                    num_epochs: int = 100,
                                    test_numerical_gradient: bool = False,
                                    log_level: int = logging.ERROR,
                                    callback: Callable = None):
    """Test case for binary classification with matmul + log loss.
    Args:
        N: Batch size
        D: Number of features
        M: Number of nodes. 1 for sigmoid and 2 for softmax
        X: train data
        T: labels
        W: weight
        log_loss_function: cross entropy logg loss function
        optimizer: Optimizer
        num_epochs: Number of epochs to run
        test_numerical_gradient: Flag if test the analytical gradient with the numerical one.
        log_level: logging level
        callback: callback function to invoke at the each epoch end.
    """
    name = __name__
    assert isinstance(T, np.ndarray) and np.issubdtype(
        T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N
    assert isinstance(
        X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[
            0] == N and X.shape[1] == D
    assert isinstance(
        W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[
            0] == M and W.shape[1] == D + 1
    assert num_epochs > 0 and N > 0 and D > 0

    assert (log_loss_function == softmax_cross_entropy_log_loss and M >= 2)

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    loss: CrossEntropyLogLoss = CrossEntropyLogLoss(
        name="loss",
        num_nodes=M,
        log_loss_function=log_loss_function,
        log_level=log_level)

    # --------------------------------------------------------------------------------
    # Instantiate a ReLU layer
    # --------------------------------------------------------------------------------
    activation: ReLU = ReLU(name="relu", num_nodes=M, log_level=log_level)
    activation.objective = loss.function

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    bn: BatchNormalization = BatchNormalization(name=name,
                                                num_nodes=M,
                                                log_level=logging.WARNING)
    bn.objective = compose(activation.function, activation.objective)

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    matmul: Matmul = Matmul(name="matmul",
                            num_nodes=M,
                            W=W,
                            optimizer=optimizer,
                            log_level=log_level)
    matmul.objective = compose(bn.function, bn.objective)

    # --------------------------------------------------------------------------------
    # Instantiate a Normalization layer
    # Need to apply the same mean and std to the non-training data set.
    # --------------------------------------------------------------------------------
    # norm = Standardization(
    #     name="standardization",
    #     num_nodes=M,
    #     log_level=log_level
    # )
    # X = np.copy(X)
    # X = norm.function(X)

    # Network objective function f: L=f(X)
    objective = compose(matmul.function, matmul.objective)
    prediction = compose(matmul.predict, bn.predict, activation.predict)

    num_no_progress: int = 0  # how many time when loss L not decreased.
    loss.T = T
    # pylint: disable=not-callable
    history: List[np.ndarray] = [matmul.objective(matmul.function(X))]

    for i in range(num_epochs):
        # --------------------------------------------------------------------------------
        # Layer forward path
        # 1. Calculate the matmul output Y=matmul.f(X)
        # 2. Calculate the ReLU output A=activation.f(Y)
        # 3. Calculate the loss L = loss(A)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        # --------------------------------------------------------------------------------
        Y = matmul.function(X)
        BN = bn.function(Y)
        A = activation.function(BN)
        L = loss.function(A)

        # ********************************************************************************
        # Constraint: Network objective L must match layer-by-layer output
        # ********************************************************************************
        # pylint: disable=not-callable
        assert L == objective(X) and L.shape == (), \
            f"Network objective L(X) %s must match layer-by-layer output %s." \
            % (objective(X), L)

        if not (i % 10): print(f"iteration {i} Loss {L}")
        Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L)

        # ********************************************************************************
        # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # ********************************************************************************
        if L >= history[-1] and i > 0:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.",
                i, L, history[-1], num_no_progress + 1)
            # --------------------------------------------------------------------------------
            # Reduce the learning rate can make the situation worse.
            # When reduced the lr every time L >= history, the (L >= history) became successive
            # and eventually exceeded 50 successive non-improvement ending in failure.
            # Keep the learning rate make the L>=history more frequent but still up to 3
            # successive events, and the training still kept progressing.
            # --------------------------------------------------------------------------------
            num_no_progress += 1
            if num_no_progress > 5:
                matmul.lr = matmul.lr * TYPE_FLOAT(0.95)

            if num_no_progress > 50:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                break
        else:
            num_no_progress = 0

        history.append(L)

        # ================================================================================
        # Layer backward path
        # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY.
        # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # ================================================================================
        before = copy.deepcopy(matmul.W)
        dA = loss.gradient(TYPE_FLOAT(1))  # dL/dA
        dBN = activation.gradient(dA)  # dL/dBN
        dY = bn.gradient(dBN)  # dL/dY
        dX = matmul.gradient(dY)  # dL/dX

        # gradient descent and get the analytical gradients
        bn.update()

        dS = matmul.update()  # dL/dX, dL/dW
        # ********************************************************************************
        #  Constraint. W in the matmul has been updated by the gradient descent.
        # ********************************************************************************
        Logger.debug("W after is \n%s", matmul.W)
        assert not np.array_equal(before, matmul.W), "W has not been updated."

        if test_numerical_gradient:
            # --------------------------------------------------------------------------------
            # Numerical gradient
            # --------------------------------------------------------------------------------
            gn = matmul.gradient_numerical()
            validate_against_numerical_gradient([dX] + dS, gn,
                                                Logger)  # prepend dL/dX

        if callback:
            # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0))
            callback(W=matmul.W)

    return matmul.W, objective, prediction
def disabled_test_020_matmul_round_trip():
    """
    TODO: Disabled as need to re-design numerical_jacobian for 32 bit float e.g TF.

    Objective:
        Verify the forward and backward paths at matmul.

    Expected:
        Forward path:
        1. Matmul function(X) == X @ W.T
        2. Numerical gradient should be the same with numerical Jacobian

        Backward path:
        3. Analytical gradient dL/dX == dY @ W
        4. Analytical dL/dW == X.T @ dY
        5. Analytical gradients are similar to the numerical gradient ones

        Gradient descent
        6. W is updated via the gradient descent.
        7. Objective L is decreasing via the gradient descent.

    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        # --------------------------------------------------------------------------------
        # Instantiate a Matmul layer
        # --------------------------------------------------------------------------------
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = np.random.randint(1, NUM_MAX_NODES)
        D: int = np.random.randint(1, NUM_MAX_FEATURES)
        W = weights.he(M, D + 1)
        name = "test_020_matmul_methods"

        def objective(X: np.ndarray) -> Union[float, np.ndarray]:
            """Dummy objective function to calculate the loss L"""
            return np.sum(X)

        # Test both static instantiation and build()
        if TYPE_FLOAT(np.random.uniform()) < 0.5:
            matmul = Matmul(name=name,
                            num_nodes=M,
                            W=W,
                            log_level=logging.DEBUG)
        else:
            matmul_spec = {
                _NAME: "test_020_matmul_builder_to_fail_matmul_spec",
                _NUM_NODES: M,
                _NUM_FEATURES: D,
                _WEIGHTS: {
                    _SCHEME: "he",
                },
                _OPTIMIZER: {
                    _SCHEME: "sGd"
                }
            }
            matmul = Matmul.build(matmul_spec)

        matmul.objective = objective

        # ================================================================================
        # Layer forward path
        # Calculate the layer output Y=f(X), and get the loss L = objective(Y)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        #
        # Note that bias columns are added inside the matmul layer instance, hence
        # matmul.X.shape is (N, 1+D), matmul.W.shape is (M, 1+D)
        # ================================================================================
        X = np.random.randn(N, D).astype(TYPE_FLOAT)
        Logger.debug("%s: X is \n%s", name, X)

        # pylint: disable=not-callable
        Y = matmul.function(X)
        # pylint: disable=not-callable
        L = matmul.objective(Y)

        # Constraint 1 : Matmul outputs Y should be [email protected]
        assert np.array_equal(Y, np.matmul(matmul.X, matmul.W.T))

        # Constraint 2: Numerical gradient should be the same with numerical Jacobian
        GN = matmul.gradient_numerical()  # [dL/dX, dL/dW]

        # DO NOT use matmul.function() as the objective function for numerical_jacobian().
        # The state of the layer will be modified.
        # LX = lambda x: matmul.objective(matmul.function(x))
        def LX(x):
            y = np.matmul(x, matmul.W.T)
            # pylint: disable=not-callable
            return matmul.objective(y)

        EGNX = numerical_jacobian(LX,
                                  matmul.X)  # Numerical dL/dX including bias
        EGNX = EGNX[::, 1::]  # Remove bias for dL/dX
        assert np.array_equal(GN[0], EGNX), \
            "GN[0]\n%s\nEGNX=\n%s\n" % (GN[0], EGNX)

        # DO NOT use matmul.function() as the objective function for numerical_jacobian().
        # The state of the layer will be modified.
        # LW = lambda w: matmul.objective(np.matmul(X, w.T))
        def LW(w):
            Y = np.matmul(matmul.X, w.T)
            # pylint: disable=not-callable
            return matmul.objective(Y)

        EGNW = numerical_jacobian(LW,
                                  matmul.W)  # Numerical dL/dW including bias
        assert np.array_equal(GN[1], EGNW)  # No need to remove bias

        # ================================================================================
        # Layer backward path
        # Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dummy dL/dY.
        # ================================================================================
        dY = np.ones_like(Y)
        dX = matmul.gradient(dY)

        # Constraint 3: Matmul gradient dL/dX should be dL/dY @ W. Use a dummy dL/dY = 1.0.
        expected_dX = np.matmul(dY, matmul.W)
        expected_dX = expected_dX[::, 1::  # Omit bias
                                  ]
        assert np.array_equal(dX, expected_dX)

        # Constraint 5: Analytical gradient dL/dX close to the numerical gradient GN.
        assert np.all(np.abs(dX - GN[0]) < GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            "dX need close to GN[0]. dX:\n%s\ndiff \n%s\n" % (dX, dX-GN[0])

        # --------------------------------------------------------------------------------
        # Gradient update.
        # Run the gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # --------------------------------------------------------------------------------
        # Python passes the reference to W, hence it is directly updated by the gradient-
        # descent to avoid a temporary copy. Backup W before to compare before/after.
        backup = copy.deepcopy(W)

        # Gradient descent and returns analytical dL/dX, dL/dW
        dS = matmul.update()
        dW = dS[0]

        # Constraint 6.: W has been updated by the gradient descent.
        assert np.any(backup != matmul.W), "W has not been updated "

        # Constraint 5: the numerical gradient (dL/dX, dL/dW) are closer to the analytical ones.
        assert validate_against_expected_gradient(GN[0], dX), \
            "dX=\n%s\nGN[0]=\n%sdiff=\n%s\n" % (dX, GN[0], (dX-GN[0]))
        assert validate_against_expected_gradient(GN[1], dW), \
            "dW=\n%s\nGN[1]=\n%sdiff=\n%s\n" % (dW, GN[1], (dW-GN[1]))

        # Constraint 7: gradient descent progressing with the new objective L(Yn+1) < L(Yn)
        # pylint: disable=not-callable
        assert np.all(np.abs(objective(matmul.function(X)) < L))

    profiler.disable()
    profiler.print_stats(sort="cumtime")
def test_020_matmul_builder_to_succeed():
    """
    Objective:
        Verify the Matmul.build()
    Expected:
        build() parse the spec and succeed
    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        M = np.random.randint(1, 100)
        D = np.random.randint(1, 100)  # NOT including bias

        # ----------------------------------------------------------------------
        # Validate the correct specification.
        # NOTE: Invalidate one parameter at a time from the correct one.
        # Otherwise not sure what you are testing.
        # ----------------------------------------------------------------------
        lr = TYPE_FLOAT(np.random.uniform())
        l2 = TYPE_FLOAT(np.random.uniform())
        valid_matmul_spec = {
            _NAME: "test_020_matmul_builder_to_fail_matmul_spec",
            _NUM_NODES: M,
            _NUM_FEATURES: D,
            _WEIGHTS: {
                _SCHEME: "he",
            },
            _OPTIMIZER: {
                _SCHEME: "sGd",
                _PARAMETERS: {
                    "lr": lr,
                    "l2": l2
                }
            }
        }
        try:
            matmul: Matmul = Matmul.build(valid_matmul_spec)
            assert matmul.optimizer.lr == lr
            assert matmul.optimizer.l2 == l2
        except Exception as e:
            raise RuntimeError("Matmul.build() must succeed with %s" %
                               valid_matmul_spec)

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_OPTIMIZER][_SCHEME] = "sgd"
        try:
            Matmul.build(valid_matmul_spec)
        except Exception as e:
            raise RuntimeError("Matmul.build() must succeed with %s" %
                               valid_matmul_spec)

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_OPTIMIZER][_SCHEME] = "SGD"
        try:
            Matmul.build(valid_matmul_spec)
        except Exception as e:
            raise RuntimeError("Matmul.build() must succeed with %s" %
                               valid_matmul_spec)

    profiler.disable()
    profiler.print_stats(sort="cumtime")
def test_020_matmul_builder_to_fail_optimizer_spec():
    """
    Objective:
        Verify the Matmul.build()
    Expected:
        build() parse the spec and fail with invalid configurations
    """
    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        M = np.random.randint(1, 100)
        D = np.random.randint(1, 100)  # NOT including bias

        # ----------------------------------------------------------------------
        # Validate the correct specification.
        # NOTE: Invalidate one parameter at a time from the correct one.
        # Otherwise not sure what you are testing.
        # ----------------------------------------------------------------------
        valid_matmul_spec = {
            _NAME: "test_020_matmul_builder_to_fail_matmul_spec",
            _NUM_NODES: M,
            _NUM_FEATURES: D,
            _WEIGHTS: {
                _SCHEME: "he"
            },
            _OPTIMIZER: {
                _SCHEME: "sGd",
                _PARAMETERS: {
                    "lr": TYPE_FLOAT(np.random.uniform()),
                    "l2": TYPE_FLOAT(np.random.uniform())
                }
            },
            "log_level": logging.ERROR
        }
        try:
            Matmul.build(valid_matmul_spec)
        except Exception as e:
            raise RuntimeError("Matmul.build() must succeed with %s" %
                               valid_matmul_spec)

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_OPTIMIZER] = ""
        try:
            Matmul.build(matmul_spec)
            raise RuntimeError(
                "Matmul.build() must fail with invalid optimizer spec")
        except AssertionError:
            pass

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_OPTIMIZER][_SCHEME] = "invalid"
        try:
            Matmul.build(matmul_spec)
            raise RuntimeError(
                "Matmul.build() must fail with invalid optimizer spec")
        except AssertionError:
            pass

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_OPTIMIZER][_PARAMETERS]["lr"] = np.random.uniform(-1, 0)
        try:
            Matmul.build(matmul_spec)
            raise RuntimeError(
                "Matmul.build() must fail with invalid lr value")
        except AssertionError:
            pass

        matmul_spec = copy.deepcopy(valid_matmul_spec)
        matmul_spec[_OPTIMIZER][_PARAMETERS]["l2"] = np.random.uniform(-1, 0)
        try:
            Matmul.build(matmul_spec)
            raise RuntimeError(
                "Matmul.build() must fail with invalid l2 value")
        except AssertionError:
            pass

    profiler.disable()
    profiler.print_stats(sort="cumtime")
def test_020_matmul_instance_properties():
    """
    Objective:
        Verify the layer class validates the parameters have been initialized before accessed.
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
    """
    msg = "Accessing uninitialized property of the layer must fail."

    for _ in range(NUM_MAX_TEST_TIMES):
        name = random_string(np.random.randint(1, 10))
        M: int = np.random.randint(1, NUM_MAX_NODES)
        D: int = np.random.randint(1, NUM_MAX_FEATURES)
        matmul = Matmul(name=name,
                        num_nodes=M,
                        W=weights.uniform(M, D + 1),
                        log_level=logging.DEBUG)

        # --------------------------------------------------------------------------------
        # To pass
        # --------------------------------------------------------------------------------
        try:
            if not matmul.name == name:
                raise RuntimeError("matmul.name == name should be true")
        except AssertionError as e:
            raise RuntimeError(
                "Access to name should be allowed as already initialized."
            ) from e

        try:
            if not matmul.M == M:
                raise RuntimeError("matmul.M == M should be true")
        except AssertionError as e:
            raise RuntimeError(
                "Access to M should be allowed as already initialized.") from e

        try:
            if not isinstance(matmul.logger, logging.Logger):
                raise RuntimeError(
                    "isinstance(matmul.logger, logging.Logger) should be true")
        except AssertionError as e:
            raise RuntimeError(
                "Access to logger should be allowed as already initialized."
            ) from e

        try:
            a = matmul.D
        except AssertionError:
            raise RuntimeError(
                "Access to D should be allowed as already initialized.")

        try:
            matmul.W is not None
        except AssertionError:
            raise RuntimeError(
                "Access to W should be allowed as already initialized.")

        try:
            matmul.optimizer is not None
        except AssertionError:
            raise RuntimeError(
                "Access to optimizer should be allowed as already initialized."
            )

        # --------------------------------------------------------------------------------
        # To fail
        # --------------------------------------------------------------------------------
        try:
            print(matmul.X)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            matmul.X = int(1)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(matmul.dX)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(matmul.dW)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(matmul.Y)
            raise RuntimeError(msg)
        except AssertionError:
            pass
        try:
            matmul._Y = int(1)
            print(matmul.Y)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(matmul.dY)
            raise RuntimeError(msg)
        except AssertionError:
            pass
        try:
            matmul._dY = int(1)
            print(matmul.dY)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(matmul.T)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            matmul.T = float(1)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            # pylint: disable=not-callable
            matmul.objective(np.array(1.0, dtype=TYPE_FLOAT))
            raise RuntimeError(msg)
        except AssertionError:
            pass

        try:
            print(matmul.N)
            raise RuntimeError(msg)
        except AssertionError:
            pass

        assert matmul.name == name
        assert matmul.num_nodes == M

        try:
            matmul = Matmul(name=name,
                            num_nodes=M,
                            W=weights.xavier(M, D + 1),
                            log_level=logging.DEBUG)
            matmul.function(int(1))
            raise RuntimeError("Invoke matmul.function(int(1)) must fail.")
        except AssertionError:
            pass

        try:
            matmul = Matmul(name=name,
                            num_nodes=M,
                            W=weights.xavier(M, D + 1),
                            log_level=logging.DEBUG)
            matmul.gradient(int(1))
            raise RuntimeError("Invoke matmul.gradient(int(1)) must fail.")
        except AssertionError:
            pass
def train_binary_classifier(N: int,
                            D: int,
                            M: int,
                            X: np.ndarray,
                            T: np.ndarray,
                            W: np.ndarray,
                            log_loss_function: Callable,
                            optimizer: Optimizer,
                            num_epochs: int = 100,
                            test_numerical_gradient: bool = False,
                            log_level: int = logging.ERROR,
                            callback: Callable = None):
    """Test case for binary classification with matmul + log loss.
    Args:
        N: Batch size
        D: Number of features
        M: Number of nodes. 1 for sigmoid and 2 for softmax
        X: train data
        T: labels
        W: weight
        log_loss_function: cross entropy logg loss function
        optimizer: Optimizer
        num_epochs: Number of epochs to run
        test_numerical_gradient: Flag if test the analytical gradient with the numerical one.
        log_level: logging level
        callback: callback function to invoke at the each epoch end.
    """
    name = __name__
    assert isinstance(T, np.ndarray) and np.issubdtype(
        T.dtype, np.integer) and T.ndim == 1 and T.shape[0] == N
    assert isinstance(
        X, np.ndarray) and X.dtype == TYPE_FLOAT and X.ndim == 2 and X.shape[
            0] == N and X.shape[1] == D
    assert isinstance(
        W, np.ndarray) and W.dtype == TYPE_FLOAT and W.ndim == 2 and W.shape[
            0] == M and W.shape[1] == D + 1
    assert num_epochs > 0 and N > 0 and D > 0

    assert ((log_loss_function == sigmoid_cross_entropy_log_loss and M == 1) or
            (log_loss_function == softmax_cross_entropy_log_loss and M >= 2))

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    loss = CrossEntropyLogLoss(name="loss",
                               num_nodes=M,
                               log_loss_function=log_loss_function,
                               log_level=log_level)

    # --------------------------------------------------------------------------------
    # Instantiate a Matmul layer
    # --------------------------------------------------------------------------------
    matmul = Matmul(name="matmul",
                    num_nodes=M,
                    W=W,
                    optimizer=optimizer,
                    log_level=log_level)
    matmul.objective = loss.function

    num_no_progress: int = 0  # how many time when loss L not decreased.
    loss.T = T
    history: List[np.ndarray] = [loss.function(matmul.function(X))]

    for i in range(num_epochs):
        # --------------------------------------------------------------------------------
        # Layer forward path
        # Calculate the matmul output Y=f(X), and get the loss L = objective(Y)
        # Test the numerical gradient dL/dX=matmul.gradient_numerical().
        # --------------------------------------------------------------------------------
        Y = matmul.function(X)
        L = loss.function(Y)

        if not (i % 50): print(f"iteration {i} Loss {L}")
        Logger.info("%s: iteration[%s]. Loss is [%s]", name, i, L)

        # --------------------------------------------------------------------------------
        # Constraint: 1. Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # --------------------------------------------------------------------------------
        if L >= history[-1] and (i % 20) == 1:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s].",
                i, L, history[-1])
            if (num_no_progress := num_no_progress + 1) > 20:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                # break
        else:
            num_no_progress = 0

        history.append(L)

        # --------------------------------------------------------------------------------
        # Expected dL/dW.T = X.T @ dL/dY = X.T @ (P-T) / N, and dL/dX = dL/dY @ W
        # P = sigmoid(X) or softmax(X)
        # dL/dX = dL/dY * W is to use W BEFORE updating W.
        # --------------------------------------------------------------------------------
        P = None
        if log_loss_function == sigmoid_cross_entropy_log_loss:
            # P = sigmoid(np.matmul(X, W.T))
            P = sigmoid(np.matmul(matmul.X, matmul.W.T))
            P = P - T.reshape(-1, 1)  # T(N,) -> T(N,1) to align with P(N,1)
            assert P.shape == (
                N, 1), "P.shape is %s T.shape is %s" % (P.shape, T.shape)

        elif log_loss_function == softmax_cross_entropy_log_loss:
            # matmul.X.shape is (N, D+1), matmul.W.T.shape is (D+1, M)
            P = softmax(np.matmul(matmul.X, matmul.W.T))  # (N, M)
            P[np.arange(N), T] -= 1

        EDX = np.matmul(P / N, matmul.W)  # (N,M) @ (M, D+1) -> (N, D+1)
        EDX = EDX[::, 1:]  # Hide the bias    -> (N, D)
        EDW = np.matmul(matmul.X.T,
                        P / N).T  # ((D+1,N) @ (N, M)).T -> (M, D+1)

        # --------------------------------------------------------------------------------
        # Layer backward path
        # 1. Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dL/dY.
        # 2. Gradient descent to update Wn+1 = Wn - lr * dL/dX.
        # --------------------------------------------------------------------------------
        before = copy.deepcopy(matmul.W)
        dY = loss.gradient(TYPE_FLOAT(1))
        dX = matmul.gradient(dY)

        # gradient descent and get the analytical gradients dS=[dL/dX, dL/dW]
        # dL/dX.shape = (N, D)
        # dL/dW.shape = (M, D+1)
        dS = matmul.update()
        dW = dS[0]
        # --------------------------------------------------------------------------------
        #  Constraint 1. W in the matmul has been updated by the gradient descent.
        # --------------------------------------------------------------------------------
        Logger.debug("W after is \n%s", matmul.W)
        assert not np.array_equal(before, matmul.W), "W has not been updated."

        if not validate_against_expected_gradient(EDX, dX):
            Logger.warning("Expected dL/dX \n%s\nDiff\n%s", EDX, EDX - dX)
        if not validate_against_expected_gradient(EDW, dW):
            Logger.warning("Expected dL/dW \n%s\nDiff\n%s", EDW, EDW - dW)

        if test_numerical_gradient:
            # --------------------------------------------------------------------------------
            # Numerical gradients gn=[dL/dX, dL/dW]
            # dL/dX.shape = (N, D)
            # dL/dW.shape = (M, D+1)
            # --------------------------------------------------------------------------------
            gn = matmul.gradient_numerical()
            validate_against_numerical_gradient([dX] + dS, gn, Logger)

        if callback:
            # if W.shape[1] == 1 else callback(W=np.average(matmul.W, axis=0))
            callback(W=matmul.W[0])
Beispiel #15
0
def validate_relu_neuron_training(matmul: Matmul,
                                  activation: ReLU,
                                  loss: CrossEntropyLogLoss,
                                  X: np.ndarray,
                                  T: np.ndarray,
                                  num_epochs: int = 100,
                                  test_numerical_gradient: bool = False,
                                  callback: Callable = None):
    activation.objective = loss.function
    matmul.objective = compose(activation.function, loss.function)
    objective = compose(matmul.function, matmul.objective)

    num_no_progress: int = 0  # how many time when loss L not decreased.
    history: List[np.ndarray] = []

    loss.T = T
    for i in range(num_epochs):
        L = objective(X)
        N = X.shape[0]
        P = softmax(relu(np.matmul(matmul.X, matmul.W.T)))
        EDA = expected_gradient_from_log_loss(P=P, T=T, N=N)

        # ********************************************************************************
        # Constraint: Expected gradients must match actual
        # ********************************************************************************
        validate_relu_neuron_round_trip(matmul=matmul,
                                        activation=activation,
                                        X=X,
                                        dA=EDA)

        # --------------------------------------------------------------------------------
        # gradient descent and get the analytical dL/dX, dL/dW
        # --------------------------------------------------------------------------------
        previous_W = copy.deepcopy(matmul.W)
        matmul.update()  # dL/dX, dL/dW

        # ********************************************************************************
        #  Constraint. W in the matmul has been updated by the gradient descent.
        # ********************************************************************************
        Logger.debug("W after is \n%s", matmul.W)
        if np.array_equal(previous_W, matmul.W):
            Logger.warning("W has not been updated")

        # ********************************************************************************
        # Constraint: Objective/Loss L(Yn+1) after gradient descent < L(Yn)
        # ********************************************************************************
        if i > 0 and L >= history[-1]:
            Logger.warning(
                "Iteration [%i]: Loss[%s] has not improved from the previous [%s] for %s times.",
                i, L, history[-1], num_no_progress + 1)
            # --------------------------------------------------------------------------------
            # Reduce the learning rate can make the situation worse.
            # When reduced the lr every time L >= history, the (L >= history) became successive
            # and eventually exceeded 50 successive non-improvement ending in failure.
            # Keep the learning rate make the L>=history more frequent but still up to 3
            # successive events, and the training still kept progressing.
            # --------------------------------------------------------------------------------
            num_no_progress += 1
            if num_no_progress > 5:
                matmul.lr = matmul.lr * 0.95

            if num_no_progress > 50:
                Logger.error(
                    "The training has no progress more than %s times.",
                    num_no_progress)
                break
        else:
            num_no_progress = 0

        history.append(L)

        if callback:
            callback(W=matmul.W)

    return history