コード例 #1
0
def test_010_sigmoid_cross_entropy_log_loss_2d(caplog):
    """
    Objective:
        Test case for sigmoid_cross_entropy_log_loss(X, T) =
        -( T * log(sigmoid(X)) + (1 -T) * log(1-sigmoid(X)) )

        For the input X of shape (N,1) and T in index format of shape (N,1),
        calculate the sigmoid log loss and verify the values are as expected.

    Expected:
        For  Z = sigmoid(X) = 1 / (1 + exp(-X)) and T=[[1]]
        Then -log(Z) should be almost same with sigmoid_cross_entropy_log_loss(X, T).
        Almost because finite float precision always has rounding errors.
    """
    # caplog.set_level(logging.DEBUG, logger=Logger.name)
    u = REFORMULA_DIFF_ACCEPTANCE_VALUE

    # --------------------------------------------------------------------------------
    # [Test case 01]
    # X:(N,M)=(1, 1). X=(x0) where x0=0 by which sigmoid(X) generates 0.5.
    # Expected:
    #   sigmoid_cross_entropy_log_loss(X, T) == -log(0.5)
    # --------------------------------------------------------------------------------
    X = np.array([[TYPE_FLOAT(0.0)]])
    T = np.array([TYPE_LABEL(1)])
    X, T = transform_X_T(X, T)
    E = -logarithm(np.array([TYPE_FLOAT(0.5)]))

    J, P = sigmoid_cross_entropy_log_loss(X, T)
    assert E.shape == J.shape
    assert np.all(E == J), \
        "Expected (E==J) but \n%s\nE=\n%s\nT=%s\nX=\n%s\nJ=\n%s\n" \
        % (np.abs(E - J), E, T, X, J)
    assert P == 0.5

    # --------------------------------------------------------------------------------
    # [Test case 02]
    # For X:(N,1)
    # --------------------------------------------------------------------------------
    for _ in range(NUM_MAX_TEST_TIMES):
        # X(N, M), and T(N,) in index label format
        N = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M = 1   # always 1 for binary classification 0 or 1.

        X = np.random.randn(N, M).astype(TYPE_FLOAT)
        T = np.random.randint(0, M, N).astype(TYPE_LABEL)
        X, T = transform_X_T(X, T)
        Logger.debug("T is %s\nX is \n%s\n", T, X)

        # ----------------------------------------------------------------------
        # Expected value EJ for J and Z for P
        # Note:
        #   To handle both index label format and OHE label format in the
        #   Loss layer(s), X and T are transformed into (N,1) shapes in
        #   transform_X_T(X, T) for logistic log loss.
        # DO NOT squeeze Z nor P.
        # ----------------------------------------------------------------------
        Z = sigmoid(X)
        EJ = np.squeeze(-(T * logarithm(Z) + TYPE_FLOAT(1-T) * logarithm(TYPE_FLOAT(1-Z))), axis=-1)

        # **********************************************************************
        # Constraint: Actual J should be close to EJ.
        # **********************************************************************
        J, P = sigmoid_cross_entropy_log_loss(X, T)
        assert EJ.shape == J.shape
        assert np.all(np.abs(EJ-J) < u), \
            "Expected abs(EJ-J) < %s but \n%s\nEJ=\n%s\nT=%s\nX=\n%s\nJ=\n%s\n" \
            % (u, np.abs(EJ-J), EJ, T, X, J)
        
        # **********************************************************************
        # Constraint: Actual P should be close to Z.
        # **********************************************************************
        assert np.all(np.abs(Z-P) < u), \
            "EP \n%s\nP\n%s\nEP-P \n%s\n" % (Z, P, Z-P)

        # ----------------------------------------------------------------------
        # L = cross_entropy_log_loss(P, T) should be close to J
        # ----------------------------------------------------------------------
        L = cross_entropy_log_loss(P=Z, T=T, f=logistic_log_loss)
        assert L.shape == J.shape
        assert np.all(np.abs(L-J) < u), \
            "Expected abs(L-J) < %s but \n%s\nL=\n%s\nT=%s\nX=\n%s\nJ=\n%s\n" \
            % (u, np.abs(L-J), L, T, X, J)
コード例 #2
0
def test_030_objective_methods_1d_ohe():
    """
    Objective:
        Verify the forward path constraints:
        1. Layer output L/loss is np.sum(cross_entropy_log_loss(softmax(X), T)) / N.
        2. gradient_numerical() == numerical Jacobian numerical_jacobian(O, X).

        Verify the backward path constraints:
        1. Analytical gradient G: gradient() == (P-1)/N
        2. Analytical gradient G is close to GN: gradient_numerical().
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
        
        For X.ndim > 0, the layer transform X into 2D so as to use the numpy tuple-
        like indexing:
        P[
            (0,3),
            (2,4)
        ]
        Hence, the shape of GN, G are 2D.
    """
    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    name = "test_030_objective_methods_1d_ohe"
    N = 1

    for _ in range(NUM_MAX_TEST_TIMES):
        M: int = np.random.randint(2, NUM_MAX_NODES)
        assert M >= 2, "Softmax is for multi label classification. "\
                       " Use Sigmoid for binary classification."

        _layer = layer.CrossEntropyLogLoss(name=name,
                                           num_nodes=M,
                                           log_level=logging.DEBUG)

        # ================================================================================
        # Layer forward path
        # ================================================================================
        X = np.random.randn(M).astype(TYPE_FLOAT)
        T = np.zeros_like(X, dtype=TYPE_LABEL)  # OHE labels.
        T[np.random.randint(0, M)] = TYPE_LABEL(1)
        _layer.T = T

        P = softmax(X)
        EG = ((P - T) / N).reshape(1, -1).astype(
            TYPE_FLOAT)  # Expected analytical gradient dL/dX = (P-T)/N

        Logger.debug("%s: X is \n%s\nT is %s\nP is %s\nEG is %s\n", name, X, T,
                     P, EG)

        # --------------------------------------------------------------------------------
        # constraint: L/loss == np.sum(cross_entropy_log_loss(softmax(X), T)) / N.
        # --------------------------------------------------------------------------------
        L = _layer.function(X)
        Z = np.array(np.sum(cross_entropy_log_loss(softmax(X), T)),
                     dtype=TYPE_FLOAT) / TYPE_FLOAT(N)
        assert np.array_equal(
            L, Z), f"SoftmaxLogLoss output should be {L} but {Z}."

        # --------------------------------------------------------------------------------
        # constraint: gradient_numerical() == numerical Jacobian numerical_jacobian(O, X)
        # Use a dummy _layer for the objective function because using the "_layer"
        # updates the X, Y which can interfere the independence of the _layer.
        # --------------------------------------------------------------------------------
        GN = _layer.gradient_numerical()  # [dL/dX] from the _layer

        # --------------------------------------------------------------------------------
        # Cannot use CrossEntropyLogLoss.function() to simulate the objective function L.
        # because it causes applying transform_X_T multiple times.
        # Because internally transform_X_T(X, T) has transformed T into the index label
        # in 1D with with length 1 by "T = T.reshape(-1)".
        # Then providing X in 1D into "dummy.function(x)" re-run "transform_X_T(X, T)"
        # again. The (X.ndim == T.ndim ==1) as an input and T must be OHE label for such
        # combination and T.shape == P.shape must be true for OHE labels.
        # However, T has been converted into the index format already by transform_X_T
        # (applying transform_X_T multiple times) and (T.shape=(1,1), X.shape=(1, > 1)
        # that violates the (X.shape == T.shape) constraint.
        # --------------------------------------------------------------------------------
        # dummy = CrossEntropyLogLoss(
        #     name="dummy",
        #     num_nodes=M,
        #     log_level=logging.DEBUG
        # )
        # dummy.T = T
        # dummy.objective = objective
        # dummy.function(X)
        # --------------------------------------------------------------------------------
        # O = lambda x: dummy.objective(dummy.function(x))    # Objective function
        O = lambda x: np.sum(cross_entropy_log_loss(softmax(x), T),
                             dtype=TYPE_FLOAT) / TYPE_FLOAT(N)
        # --------------------------------------------------------------------------------
        EGN = numerical_jacobian(O, X).reshape(1,
                                               -1)  # Expected numerical dL/dX
        assert np.array_equal(GN[0], EGN), \
            f"Layer gradient_numerical GN \n{GN} \nneeds to be \n{EGN}."

        # ================================================================================
        # Layer backward path
        # ================================================================================
        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G: gradient() == (P-1)/N.
        # --------------------------------------------------------------------------------
        dY = TYPE_FLOAT(1)
        G = _layer.gradient(dY)
        assert np.all(np.abs(G-EG) <= GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            f"Layer gradient dL/dX \n{G} \nneeds to be \n{EG} but G-EG \n{np.abs(G-EG)}\n"

        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G is close to GN: gradient_numerical().
        # --------------------------------------------------------------------------------
        assert \
            np.all(np.abs(G - GN[0]) <= GRADIENT_DIFF_ACCEPTANCE_VALUE) or \
            np.all(np.abs(G-GN[0]) <= np.abs(GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0])), \
            f"dX is \n{G}\nGN[0] is \n{GN[0]}\nRatio * GN[0] is \n{GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0]}.\n"
コード例 #3
0
def disabled_test_030_objective_methods_2d_ohe():
    """
    TODO: Disabled as need to redesign numerical_jacobian for 32 bit floating.

    Objective:
        Verify the forward path constraints:
        1. Layer output L/loss is np.sum(cross_entropy_log_loss(softmax(X), T)) / N.
        2. gradient_numerical() == numerical Jacobian numerical_jacobian(O, X).

        Verify the backward path constraints:
        1. Analytical gradient G: gradient() == (P-1)/N
        2. Analytical gradient G is close to GN: gradient_numerical().
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
    """
    def objective(X: np.ndarray) -> Union[float, np.ndarray]:
        """Dummy objective function to calculate the loss L"""
        assert X.ndim == 0, "The output of the log loss should be of shape ()"
        return X

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    name = "test_030_objective_methods_2d_ohe"
    for _ in range(NUM_MAX_TEST_TIMES):
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = np.random.randint(2, NUM_MAX_NODES)
        assert M >= 2, "Softmax is for multi label classification. "\
                       " Use Sigmoid for binary classification."

        _layer = layer.CrossEntropyLogLoss(name=name,
                                           num_nodes=M,
                                           log_level=logging.DEBUG)
        _layer.objective = objective

        # ================================================================================
        # Layer forward path
        # ================================================================================
        X = np.random.randn(N, M).astype(TYPE_FLOAT)
        T = np.zeros_like(X, dtype=TYPE_LABEL)  # OHE labels.
        T[np.arange(N), np.random.randint(0, M, N)] = TYPE_LABEL(1)
        _layer.T = T

        Logger.debug("%s: X is \n%s\nT is \n%s", name, X, T)

        P = softmax(X)
        EG = (P - T) / N  # Expected analytical gradient dL/dX = (P-T)/N

        # --------------------------------------------------------------------------------
        # constraint: L/loss == np.sum(cross_entropy_log_loss(softmax(X), T)) / N.
        # --------------------------------------------------------------------------------
        L = _layer.function(X)
        Z = np.array(np.sum(cross_entropy_log_loss(softmax(X), T))) / N
        assert np.array_equal(
            L, Z), f"SoftmaxLogLoss output should be {L} but {Z}."

        # --------------------------------------------------------------------------------
        # constraint: gradient_numerical() == numerical Jacobian numerical_jacobian(O, X)
        # --------------------------------------------------------------------------------
        GN = _layer.gradient_numerical()  # [dL/dX] from the _layer

        # --------------------------------------------------------------------------------
        # DO not use CrossEntropyLogLoss.function() to simulate the objective function for
        # the expected GN. See the same part in test_030_objective_methods_1d_ohe().
        # --------------------------------------------------------------------------------
        # dummy= CrossEntropyLogLoss(
        #     name=name,
        #     num_nodes=M,
        #     log_level=logging.DEBUG
        # )
        # dummy.T = T
        # dummy.objective = objective
        # O = lambda x: dummy.objective(dummy.function(x))    # Objective function
        O = lambda x: np.sum(cross_entropy_log_loss(softmax(x), T)) / N
        # --------------------------------------------------------------------------------

        EGN = numerical_jacobian(O, X)  # Expected numerical dL/dX
        assert np.array_equal(GN[0], EGN), \
            f"GN[0]==EGN expected but GN[0] is \n%s\n EGN is \n%s\n" % (GN[0], EGN)

        # ================================================================================
        # Layer backward path
        # ================================================================================
        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G: gradient() == (P-1)/N.
        # --------------------------------------------------------------------------------
        dY = float(1)
        G = _layer.gradient(dY)
        assert np.all(abs(G-EG) <= GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            f"Layer gradient dL/dX \n{G} \nneeds to be \n{EG}."

        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G is close to GN: gradient_numerical().
        # --------------------------------------------------------------------------------
        assert \
            np.all(np.abs(G - GN[0]) <= GRADIENT_DIFF_ACCEPTANCE_VALUE) or \
            np.all(np.abs(G - GN[0]) <= np.abs(GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0])), \
            f"dX is \n{G}\nGN[0] is \n{GN[0]}\nRatio * GN[0] is \n{GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0]}.\n"
コード例 #4
0
        # P:[0, 0, ..., 1, 0, ...] where Pi = 1
        # T:[0, 0, ..., 1, 0, ...] is OHE label where Ti=1
        # sum(-t * log(p+k)) -> log(1+k)
        # dlog(P+k)/dP -> -1 / (1+k)
        # --------------------------------------------------------------------------------
        M = np.random.randint(2, NUM_MAX_NODES)  # M > 1
        index = np.random.randint(0, M)  # location of the truth
        while not (x := TYPE_FLOAT(
                np.random.uniform(low=-BOUNDARY_SIGMOID,
                                  high=BOUNDARY_SIGMOID))):
            pass
        p = softmax(x)
        P3 = np.zeros(M, dtype=TYPE_FLOAT)
        P3[index] = p
        T3 = np.zeros(M).astype(TYPE_LABEL)  # OHE index
        T3[index] = TYPE_LABEL(1)

        # --------------------------------------------------------------------------------
        # The Jacobian G shape is the same with P.shape.
        # --------------------------------------------------------------------------------
        N3 = np.zeros_like(P3, dtype=TYPE_FLOAT)
        N3[index] = TYPE_FLOAT(-1 * logarithm(p + h) +
                               1 * logarithm(p - h)) / TYPE_FLOAT(2 * h)
        N3 = numerical_jacobian(partial(f, T=T3), P3)
        assert N3.shape == N3.shape
        assert np.all(np.abs(N3-N3) < u), \
            f"Delta expected to be < {u} but \n{np.abs(N3-N3)}"

        G3 = np.zeros_like(P3, dtype=TYPE_FLOAT)
        G3[index] = -1 / p
        check.equal(np.all(np.abs(G3 - N3) < u), True,
コード例 #5
0
def test_020_cross_entropy_log_loss_1d(caplog):
    """
    Objective:
        Test the categorical log loss values for P in 1 dimension.

    Constraints:
        1. The numerical gradient gn = (-t * logarithm(p+h) + t * logarithm(p-h)) / 2h.
        2. The numerical gradient gn is within +/- u within the analytical g = -T/P.

    P: Probabilities from softmax of shape (M,)
    M: Number of nodes in the cross_entropy_log_loss layer.
    T: Labels

    Note:
        log(P=1) -> 0
        dlog(x)/dx = 1/x
    """
    def f(P: np.ndarray, T: np.ndarray):
        return np.sum(cross_entropy_log_loss(P, T))

    # caplog.set_level(logging.DEBUG, logger=Logger.name)

    h: TYPE_FLOAT = OFFSET_DELTA
    u: TYPE_FLOAT = GRADIENT_DIFF_ACCEPTANCE_VALUE

    # --------------------------------------------------------------------------------
    # For (P, T): P[index] = True/1, OHE label T[index] = 1 where
    # P=[0,0,0,...,1,...0], T = [0,0,0,...1,...0]. T[i] == 1
    #
    # Do not forget the Jacobian shape is (N,) and calculate each element.
    # 1. For T=1, loss L = -log(Pi) = 0 and dL/dP=(1/Pi)= -1 is expected.
    # 2. For T=0, Loss L = (-log(0+offset+h)-log(0+offset-h)) / 2h = 0 is expected.
    # --------------------------------------------------------------------------------
    M: TYPE_INT = np.random.randint(2, NUM_MAX_NODES)
    index: TYPE_INT = TYPE_INT(np.random.randint(
        0, M))  # Position of the true label in P
    P1 = np.zeros(M, dtype=TYPE_FLOAT)
    P1[index] = TYPE_FLOAT(1.0)
    T1 = np.zeros(M, dtype=TYPE_LABEL)
    T1[index] = TYPE_LABEL(1)

    # Analytica correct gradient for P=1, T=1
    AG = np.zeros_like(P1, dtype=TYPE_FLOAT)
    AG[index] = TYPE_FLOAT(-1)  # dL/dP = -1

    EGN1 = np.zeros_like(P1, dtype=TYPE_FLOAT)  # Expected numerical gradient
    EGN1[index] = (-1 * logarithm(TYPE_FLOAT(1.0 + h)) + TYPE_FLOAT(1) *
                   logarithm(TYPE_FLOAT(1.0 - h))) / TYPE_FLOAT(2 * h)
    assert np.all(np.abs(EGN1-AG) < u), \
        "Expected EGN-1<%s but %s\nEGN=\n%s" % (u, (EGN1-AG), EGN1)

    GN1 = numerical_jacobian(partial(f, T=T1), P1)
    assert np.all(np.abs(GN1-AG) < u), \
        "Expected GN-1<%s but %s\nGN=\n%s" % (u, (GN1-AG), GN1)

    # The numerical gradient gn = (-t * logarithm(p+h) + t * logarithm(p-h)) / 2h
    assert GN1.shape == EGN1.shape
    assert np.all(np.abs(EGN1-GN1) < u), \
        "Expected GN1==EGN1 but GN1-EGN1=\n%sP=\n%s\nT=%s\nEGN=\n%s\nGN=\n%s\n" \
        % (np.abs(GN1-EGN1), P1, T1, EGN1, GN1)

    # The numerical gradient gn is within +/- u within the analytical g = -T/P
    G1 = np.zeros_like(P1, dtype=TYPE_FLOAT)
    G1[T1 == 1] = -1 * (T1[index] / P1[index])
    # G1[T1 != 0] = 0
    check.equal(np.all(np.abs(G1 - GN1) < u), True,
                "G1-GN1 %s\n" % np.abs(G1 - GN1))

    # --------------------------------------------------------------------------------
    # For (P, T): P[index] = np uniform(), index label T=index
    # --------------------------------------------------------------------------------
    for _ in range(NUM_MAX_TEST_TIMES):
        M = np.random.randint(2, NUM_MAX_NODES)  # M > 1
        T2 = TYPE_LABEL(np.random.randint(0, M))  # location of the truth
        P2 = np.zeros(M, dtype=TYPE_FLOAT)
        while not (x := TYPE_FLOAT(
                np.random.uniform(low=-BOUNDARY_SIGMOID,
                                  high=BOUNDARY_SIGMOID))):
            pass
        p = softmax(x)
        P2[T2] = p

        # --------------------------------------------------------------------------------
        # The Jacobian G shape is the same with P.shape.
        # G:[0, 0, ...,g, 0, ...] where Gi is numerical gradient close to -1/(1+k).
        # --------------------------------------------------------------------------------
        N2 = np.zeros_like(P2, dtype=TYPE_FLOAT)
        N2[T2] = TYPE_FLOAT(-1) * (logarithm(p + h) -
                                   logarithm(p - h)) / TYPE_FLOAT(2 * h)
        N2 = numerical_jacobian(partial(f, T=T2), P2)

        # The numerical gradient gn = (-t * logarithm(p+h) + t * logarithm(p-h)) / 2h
        assert N2.shape == N2.shape
        assert np.all(np.abs(N2-N2) < u), \
            f"Delta expected to be < {u} but \n{np.abs(N2-N2)}"

        G2 = np.zeros_like(P2, dtype=TYPE_FLOAT)
        G2[T2] = -1 / p

        # The numerical gradient gn is within +/- u within the analytical g = -T/P
        check.equal(np.all(np.abs(G2 - N2) < u), True,
                    "G2-N2 %s\n" % np.abs(G2 - N2))
コード例 #6
0
def test_010_base_instance_properties():
    """
    Objective:
        Verify the layer class validates the parameters have been initialized before accessed.
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
    """
    msg = "Accessing uninitialized property of the _layer must fail."
    M: int = np.random.randint(1, NUM_MAX_NODES)
    name = "test_010_base"
    _layer = Layer(name=name, num_nodes=M, log_level=logging.DEBUG)
    # --------------------------------------------------------------------------------
    # To pass
    # --------------------------------------------------------------------------------
    try:
        if not _layer.name == name:
            raise RuntimeError("layer.name == name should be true")
    except AssertionError:
        raise RuntimeError(
            "Access to name should be allowed as already initialized.")

    try:
        if not _layer.M == M: raise RuntimeError("layer.M == M should be true")
    except AssertionError:
        raise RuntimeError(
            "Access to M should be allowed as already initialized.")

    try:
        if not isinstance(_layer.logger, logging.Logger):
            raise RuntimeError(
                "isinstance(layer.logger, logging.Logger) should be true")
    except AssertionError:
        raise RuntimeError(
            "Access to logger should be allowed as already initialized.")

    # --------------------------------------------------------------------------------
    # To fail
    # --------------------------------------------------------------------------------

    try:
        print(_layer.D)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        print(_layer.X)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        _layer.X = int(1)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        print(_layer.dX)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        print(_layer.Y)
        raise RuntimeError(msg)
    except AssertionError:
        pass
    try:
        _layer._Y = int(1)
        print(_layer.Y)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        print(_layer.dY)
        raise RuntimeError(msg)
    except AssertionError:
        pass
    try:
        _layer._dY = int(1)
        print(_layer.dY)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        print(_layer.T)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        _layer.T = TYPE_LABEL(1)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        # pylint: disable=not-callable
        _layer.objective(np.array(1.0, dtype=TYPE_FLOAT))
        raise RuntimeError(msg)
    except AssertionError:
        pass

    try:
        print(_layer.N)
        raise RuntimeError(msg)
    except AssertionError:
        pass

    assert _layer.name == name
    assert _layer.num_nodes == M
コード例 #7
0
def disabled_test_040_objective_methods_2d_ohe(caplog):
    """
    TODO: Disabled as need to redesign numerical_jacobian for 32 bit floating.

    Objective:
        Verify the forward path constraints:
        1. Layer output L/loss is np.sum(sigmoid_cross_entropy_log_loss) / N.
        2. gradient_numerical() == numerical Jacobian numerical_jacobian(O, X).

        Verify the backward path constraints:
        1. Analytical gradient G: gradient() == (P-1)/N
        2. Analytical gradient G is close to GN: gradient_numerical().
    """
    caplog.set_level(logging.DEBUG)

    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    name = "test_040_objective_methods_2d_ohe"

    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE)
        M: int = 1  # node number is 1 for 0/1 binary classification.
        layer = CrossEntropyLogLoss(
            name=name,
            num_nodes=M,
            log_loss_function=sigmoid_cross_entropy_log_loss,
            log_level=logging.DEBUG)

        # ================================================================================
        # Layer forward path
        # ================================================================================
        X = np.random.randn(N, M).astype(TYPE_FLOAT)
        T = np.zeros_like(X, dtype=TYPE_LABEL)  # OHE labels.
        T[np.arange(N), np.random.randint(0, M, N)] = TYPE_LABEL(1)

        # log_loss function require (X, T) in X(N, M), and T(N, M) in OHE label format.
        X, T = transform_X_T(X, T)
        layer.T = T
        Logger.debug("%s: X is \n%s\nT is \n%s", name, X, T)

        # --------------------------------------------------------------------------------
        # Expected analytical gradient EG = (dX/dL) = (A-T)/N
        # --------------------------------------------------------------------------------
        A = sigmoid(X)
        EG = ((A - T).astype(TYPE_FLOAT) / TYPE_FLOAT(N))

        # --------------------------------------------------------------------------------
        # Total loss Z = np.sum(J)/N
        # Expected loss EL = sum((1-T)X + np.log(1 + np.exp(-X)))
        # (J, P) = sigmoid_cross_entropy_log_loss(X, T) and J:shape(N,) where J:shape(N,)
        # is loss for each input and P is activation by sigmoid(X).
        # --------------------------------------------------------------------------------
        L = layer.function(X)
        J, P = sigmoid_cross_entropy_log_loss(X, T)
        EL = np.array(np.sum((1 - T) * X + logarithm(1 + np.exp(-X))) / N,
                      dtype=TYPE_FLOAT)

        # Constraint: A == P as they are sigmoid(X)
        assert np.all(np.abs(A-P) < ACTIVATION_DIFF_ACCEPTANCE_VALUE), \
            f"Need A==P==sigmoid(X) but A=\n{A}\n P=\n{P}\n(A-P)=\n{(A-P)}\n"

        # Constraint: Log loss layer output L == sum(J) from the log loss function
        Z = np.array(np.sum(J) / N, dtype=TYPE_FLOAT)
        assert np.array_equal(L, Z), \
            f"Need log loss layer output L == sum(J) but L=\n{L}\nZ=\n{Z}."

        # Constraint: L/loss is close to expected loss EL.
        assert np.all(np.abs(EL-L) < LOSS_DIFF_ACCEPTANCE_VALUE), \
            "Need EL close to L but \nEL=\n{EL}\nL=\n{L}\n"

        # --------------------------------------------------------------------------------
        # constraint: gradient_numerical() == numerical_jacobian(objective, X)
        # TODO: compare the diff to accommodate numerical errors.
        # --------------------------------------------------------------------------------
        GN = layer.gradient_numerical()  # [dL/dX] from the layer

        def objective(x):
            """Function to calculate the scalar loss L for cross entropy log loss"""
            j, p = sigmoid_cross_entropy_log_loss(x, T)
            return np.array(np.sum(j) / N, dtype=TYPE_FLOAT)

        EGN = numerical_jacobian(objective, X)  # Expected numerical dL/dX
        assert np.array_equal(GN[0], EGN), \
            f"GN[0]==EGN expected but GN[0] is \n%s\n EGN is \n%s\n" % (GN[0], EGN)

        # ================================================================================
        # Layer backward path
        # ================================================================================
        # constraint: Analytical gradient G: gradient() == (P-1)/N.
        dY = TYPE_FLOAT(1)
        G = layer.gradient(dY)
        assert np.all(np.abs(G-EG) <= GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            f"Layer gradient dL/dX \n{G} \nneeds to be \n{EG}."

        # constraint: Analytical gradient G is close to GN: gradient_numerical().
        assert \
            np.allclose(GN[0], G, atol=GRADIENT_DIFF_ACCEPTANCE_VALUE, rtol=GRADIENT_DIFF_ACCEPTANCE_RATIO), \
            f"dX is \n{G}\nGN[0] is \n{GN[0]}\nRDiff is \n{G-GN[0]}.\n"

        # constraint: Gradient g of the log loss layer needs -1 < g < 1
        # abs(P-T) = abs(sigmoid(X)-T) cannot be > 1.
        assert np.all(np.abs(G) < 1), \
            f"Log loss layer gradient cannot be < -1 nor > 1 but\n{G}"
        assert np.all(np.abs(GN[0]) < (1+GRADIENT_DIFF_ACCEPTANCE_RATIO)), \
            f"Log loss layer gradient cannot be < -1 nor > 1 but\n{GN[0]}"

    profiler.disable()
    profiler.print_stats(sort="cumtime")
コード例 #8
0
def disabled_test_040_objective_methods_1d_ohe():
    """
    TODO: Disabled as need to redesign numerical_jacobian for 32 bit floating.

    Objective:
        Verify the forward path constraints:
        1. Layer output L/loss is np.sum(cross_entropy_log_loss(sigmoid(X), T, f=logistic_log_loss))) / N.
        2. gradient_numerical() == numerical Jacobian numerical_jacobian(O, X).

        Verify the backward path constraints:
        1. Analytical gradient G: gradient() == (P-1)/N
        2. Analytical gradient G is close to GN: gradient_numerical().
    Expected:
        Initialization detects the access to the non-initialized parameters and fails.
        
        For X.ndim > 0, the layer transform X into 2D so as to use the numpy tuple-
        like indexing:
        P[
            (0,3),
            (2,4)
        ]
        Hence, the shape of GN, G are 2D.
    """
    # --------------------------------------------------------------------------------
    # Instantiate a CrossEntropyLogLoss layer
    # --------------------------------------------------------------------------------
    name = "test_040_objective_methods_1d_ohe"
    N = 1

    for _ in range(NUM_MAX_TEST_TIMES):
        layer = CrossEntropyLogLoss(
            name=name,
            num_nodes=1,
            log_loss_function=sigmoid_cross_entropy_log_loss,
            log_level=logging.DEBUG)

        # ================================================================================
        # Layer forward path
        # ================================================================================
        X = TYPE_FLOAT(
            np.random.uniform(low=-BOUNDARY_SIGMOID, high=BOUNDARY_SIGMOID))
        T = TYPE_LABEL(np.random.randint(0, 2))  # OHE labels.

        # log_loss function require (X, T) in X(N, M), and T(N, M) in OHE label format.
        X, T = transform_X_T(X, T)
        layer.T = T

        # Expected analytical gradient dL/dX = (P-T)/N of shape (N,M)
        A = sigmoid(X)
        EG = ((A - T) / N).reshape(1, -1).astype(TYPE_FLOAT)

        Logger.debug("%s: X is \n%s\nT is %s\nP is %s\nEG is %s\n", name, X, T,
                     A, EG)

        # --------------------------------------------------------------------------------
        # constraint: L/loss == np.sum(J) / N.
        # J, P = sigmoid_cross_entropy_log_loss(X, T)
        # --------------------------------------------------------------------------------
        L = layer.function(X)  # L is shape ()
        J, P = sigmoid_cross_entropy_log_loss(X, T)
        Z = np.array(np.sum(J), dtype=TYPE_FLOAT) / TYPE_FLOAT(N)
        assert np.array_equal(L, Z), f"LogLoss output should be {L} but {Z}."

        # --------------------------------------------------------------------------------
        # constraint: gradient_numerical() == numerical Jacobian numerical_jacobian(O, X)
        # Use a dummy layer for the objective function because using the "layer"
        # updates the X, Y which can interfere the independence of the layer.
        # --------------------------------------------------------------------------------
        GN = layer.gradient_numerical()  # [dL/dX] from the layer

        # --------------------------------------------------------------------------------
        # Cannot use CrossEntropyLogLoss.function() to simulate the objective function L.
        # because it causes applying transform_X_T multiple times.
        # Because internally transform_X_T(X, T) has transformed T into the index label
        # in 1D with with length 1 by "T = T.reshape(-1)".
        # Then providing X in 1D into "dummy.function(x)" re-run "transform_X_T(X, T)"
        # again. The (X.ndim == T.ndim ==1) as an input and T must be OHE label for such
        # combination and T.shape == P.shape must be true for OHE labels.
        # However, T has been converted into the index format already by transform_X_T
        # (applying transform_X_T multiple times) and (T.shape=(1,1), X.shape=(1, > 1)
        # that violates the (X.shape == T.shape) constraint.
        # --------------------------------------------------------------------------------
        # dummy = CrossEntropyLogLoss(
        #     name="dummy",
        #     num_nodes=M,
        #     log_level=logging.DEBUG
        # )
        # dummy.T = T
        # dummy.objective = objective
        # dummy.function(X)
        # --------------------------------------------------------------------------------
        def objective(x):
            j, p = sigmoid_cross_entropy_log_loss(x, T)
            return np.array(np.sum(j) / N, dtype=TYPE_FLOAT)

        EGN = numerical_jacobian(objective,
                                 X).reshape(1, -1)  # Expected numerical dL/dX
        assert np.array_equal(GN[0], EGN), \
            f"Layer gradient_numerical GN \n{GN} \nneeds to be \n{EGN}."

        # ================================================================================
        # Layer backward path
        # ================================================================================
        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G: gradient() == (P-1)/N.
        # --------------------------------------------------------------------------------
        dY = TYPE_FLOAT(1)
        G = layer.gradient(dY)
        assert np.all(np.abs(G-EG) <= GRADIENT_DIFF_ACCEPTANCE_VALUE), \
            f"Layer gradient dL/dX \n{G} \nneeds to be \n{EG}."

        # --------------------------------------------------------------------------------
        # constraint: Analytical gradient G is close to GN: gradient_numerical().
        # --------------------------------------------------------------------------------
        assert \
            np.all(np.abs(G-GN[0]) <= GRADIENT_DIFF_ACCEPTANCE_VALUE) or \
            np.all(np.abs(G-GN[0]) <= np.abs(GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0])), \
            "dX is \n%s\nGN is \n%s\nG-GN is \n%s\n Ratio * GN[0] is \n%s.\n" \
            % (G, GN[0], G-GN[0], GRADIENT_DIFF_ACCEPTANCE_RATIO * GN[0])
コード例 #9
0
def test_020_adapt_embedding_loss_adapter_gradient_to_succeed(caplog):
    """
    Objective:
        Verify the Adapter gradient method handles dY in shape (N, 1+SL)

        Adapter.function(Y) returns
        - For Y:(N, 1+SL), the return is in shape (N*(1+SL),1).
          Log loss T is set to the same shape

    Expected:
    """
    caplog.set_level(logging.DEBUG)
    name = "test_020_adapt_embedding_logistic_loss_function_multi_lines"

    sentences = """
    Verify the EventIndexing function can handle multi line sentences
    the asbestos fiber <unk> is unusually <unk> once it enters the <unk> 
    with even brief exposures to it causing symptoms that show up decades later researchers said
    """

    dictionary: EventIndexing = _instantiate_event_indexing()

    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        # First validate the correct configuration, then change parameter one by one.
        E = target_size = TYPE_INT(np.random.randint(1, 3))
        C = context_size = TYPE_INT(2 * np.random.randint(1, 5))
        SL = negative_sample_size = TYPE_INT(np.random.randint(1, 5))
        event_vector_size: TYPE_INT = TYPE_INT(np.random.randint(5, 20))
        W: TYPE_TENSOR = np.random.rand(dictionary.vocabulary_size,
                                        event_vector_size)

        loss, adapter, embedding, event_context = _instantiate(
            name=name,
            num_nodes=TYPE_INT(1),
            target_size=target_size,
            context_size=context_size,
            negative_sample_size=negative_sample_size,
            event_vector_size=event_vector_size,
            dictionary=dictionary,
            W=W,
            log_level=logging.DEBUG,
        )

        # ================================================================================
        # Forward path
        # ================================================================================
        # --------------------------------------------------------------------------------
        # Event indexing
        # --------------------------------------------------------------------------------
        sequences = dictionary.function(sentences)

        # --------------------------------------------------------------------------------
        # Event context pairs
        # --------------------------------------------------------------------------------
        target_context_pairs = event_context.function(sequences)

        # --------------------------------------------------------------------------------
        # Embedding
        # --------------------------------------------------------------------------------
        Y = embedding.function(target_context_pairs)
        N, _ = embedding.tensor_shape(Y)
        batch_size = TYPE_FLOAT(N * (1 + SL))

        # --------------------------------------------------------------------------------
        # Adapter
        # --------------------------------------------------------------------------------
        Z = adapter.function(Y)

        # --------------------------------------------------------------------------------
        # Loss
        # --------------------------------------------------------------------------------
        L = loss.function(Z)

        # ********************************************************************************
        # Constraint:
        #   loss.T is set to the T by adapter.function()
        # ********************************************************************************
        T = np.zeros(shape=(N, (1 + SL)), dtype=TYPE_LABEL)
        T[::, 0] = TYPE_LABEL(1)
        assert embedding.all_equal(T.reshape(-1, 1), loss.T), \
            "Expected T must equals loss.T. Expected\n%s\nLoss.T\n%s\n" % (T, loss.T)

        # ********************************************************************************
        # Constraint:
        #   Expected loss is sum(sigmoid_cross_entropy_log_loss(Y, T)) / (N*(1+SL))
        #   The batch size for the Log Loss is (N*(1+SL))
        # ********************************************************************************
        EJ, EP = sigmoid_cross_entropy_log_loss(X=Z, T=T.reshape(-1, 1))
        EL = np.sum(EJ, dtype=TYPE_FLOAT) / batch_size

        assert embedding.all_close(EL, L), \
            "Expected EL=L but EL=\n%s\nL=\n%s\nDiff=\n%s\n" % (EL, L, (EL-L))

        # ================================================================================
        # Backward path
        # ================================================================================
        # ********************************************************************************
        # Constraint:
        #   Expected dL/dY from the Log Loss is (P-T)/N
        # ********************************************************************************
        EDY = (sigmoid(Y) - T.astype(TYPE_FLOAT)) / batch_size
        assert EDY.shape == Y.shape

        dY = adapter.gradient(loss.gradient(TYPE_FLOAT(1)))
        assert dY.shape == Y.shape
        assert embedding.all_close(EDY, dY), \
            "Expected EDY==dY. EDY=\n%s\nDiff\n%s\n" % (EDY, (EDY-dY))

    profiler.disable()
    profiler.print_stats(sort="cumtime")
コード例 #10
0
def test_020_adapt_embedding_loss_adapter_function_Y_to_succeed(caplog):
    """
    Objective:
        Verify the Adapter function handles Y in shape
        - Y:(N, 1+SL)
        - ys:(N,SL)
        - ye:(N,1)
    Expected:
        Adapter.function(Y) returns
        - For Y:(N, 1+SL), the return is in shape (N*(1+SL),1).
          Log loss T is set to the same shape

        - For Y:(N, SL), the return is in shape (N*SL,1).
          Log loss T is set to the same shape

        - For Y:(N,), the return is in shape (N,1).
          Log loss T is set to the same shape
    """
    caplog.set_level(logging.DEBUG)
    name = "test_020_adapt_embedding_logistic_loss_function_multi_lines"

    sentences = """
    Verify the EventIndexing function can handle multi line sentences
    the asbestos fiber <unk> is unusually <unk> once it enters the <unk> 
    with even brief exposures to it causing symptoms that show up decades later researchers said
    """

    dictionary: EventIndexing = _instantiate_event_indexing()

    profiler = cProfile.Profile()
    profiler.enable()

    for _ in range(NUM_MAX_TEST_TIMES):
        # First validate the correct configuration, then change parameter one by one.
        E = target_size = TYPE_INT(np.random.randint(1, 3))
        C = context_size = TYPE_INT(2 * np.random.randint(1, 5))
        SL = negative_sample_size = TYPE_INT(np.random.randint(1, 5))
        event_vector_size: TYPE_INT = TYPE_INT(np.random.randint(5, 20))
        W: TYPE_TENSOR = np.random.randn(dictionary.vocabulary_size,
                                         event_vector_size)

        loss, adapter, embedding, event_context = _instantiate(
            name=name,
            num_nodes=TYPE_INT(1),
            target_size=target_size,
            context_size=context_size,
            negative_sample_size=negative_sample_size,
            event_vector_size=event_vector_size,
            dictionary=dictionary,
            W=W,
            log_level=logging.DEBUG,
        )

        sequences = dictionary.function(sentences)
        target_context_pairs = event_context.function(sequences)
        Y = embedding.function(target_context_pairs)
        N, _ = embedding.tensor_shape(Y)

        # ********************************************************************************
        # Constraint:
        # - Adapter function returns (N*(SL+1),1) with the same values of Y
        # - Adapter function has set T:(N*(SL+1),1) in the loss layer
        # ********************************************************************************
        msg = "Y must succeed"
        EZ = expected_Z = embedding.reshape(Y, shape=(N * (SL + 1), 1))
        Z = _function_must_succeed(adapter=adapter, Y=Y, msg=msg)
        assert embedding.all_close(
            Z, EZ,
            "Z must close to EZ. Z:\n%s\nEZ\n%s\nDiff\n%s\n" % (Z, EZ,
                                                                (EZ - Z)))
        T = np.zeros(shape=(N, (1 + SL)), dtype=TYPE_LABEL)
        T[::, 0] = TYPE_LABEL(1)
        T = embedding.reshape(T, shape=(-1, 1))
        assert embedding.all_equal(T, loss.T), \
            "Expected T must equals loss.T. Expected\n%s\nLoss.T\n%s\n" % (T, loss.T)

    profiler.disable()
    profiler.print_stats(sort="cumtime")