def test_matmul_bn_relu_classifier(M: int = 3): """Test case for layer matmul class """ N = 10 D = 2 W = weights.he(M, D + 1) optimizer = SGD(lr=TYPE_FLOAT(0.5)) X, T, V = linear_separable_sectors(n=N, d=D, m=M) assert X.shape == (N, D) X, T = transform_X_T(X, T) def callback(W): """Dummy callback""" W profiler = cProfile.Profile() profiler.enable() train_matmul_bn_relu_classifier( N=N, D=D, M=M, X=X, T=T, W=W, log_loss_function=softmax_cross_entropy_log_loss, optimizer=optimizer, test_numerical_gradient=True, callback=callback) profiler.disable() profiler.print_stats(sort="cumtime")
def test_categorical_classifier( M: int = 3, log_loss_function: Callable = softmax_cross_entropy_log_loss): """Test case for layer matmul class """ N = 10 D = 2 W = weights.he(M, D + 1) optimizer = SGD(lr=TYPE_FLOAT(0.1)) X, T, V = linear_separable_sectors(n=N, d=D, m=M) assert X.shape == (N, D) X, T = transform_X_T(X, T) def callback(W): W profiler = cProfile.Profile() profiler.enable() train_binary_classifier(N=N, D=D, M=M, X=X, T=T, W=W, log_loss_function=log_loss_function, optimizer=optimizer, test_numerical_gradient=True, log_level=logging.WARNING, callback=callback) profiler.disable() profiler.print_stats(sort="cumtime")
def _test_binary_classifier( M: int = 2, log_loss_function: Callable = softmax_cross_entropy_log_loss, num_epochs: int = 100): """Test case for layer matmul class """ N = 50 D = 2 W = weights.he(M, D + 1) optimizer = SGD(lr=TYPE_FLOAT(0.1)) X, T, V = linear_separable(d=D, n=N) # X, T = transform_X_T(X, T) def callback(W): return W train_binary_classifier(N=N, D=D, M=M, X=X, T=T, W=W, log_loss_function=log_loss_function, optimizer=optimizer, num_epochs=num_epochs, test_numerical_gradient=True, callback=callback)
def test_020_matmul_instantiation(): """ Objective: Verify the initialized layer instance provides its properties. Expected: * name, num_nodes, M, log_level are the same as initialized. * X, T, dX, objective returns what is set. * N, M property are provided after X is set. * Y, dY properties are provided after they are set. """ def objective(X: np.ndarray) -> Union[float, np.ndarray]: """Dummy objective function""" return np.sum(X) for _ in range(NUM_MAX_TEST_TIMES): N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE) M: int = np.random.randint(1, NUM_MAX_NODES) D: int = np.random.randint(1, NUM_MAX_FEATURES) name = "test_020_matmul_instantiation" matmul = Matmul(name=name, num_nodes=M, W=weights.he(M, D + 1), log_level=logging.DEBUG) matmul.objective = objective assert matmul.name == name assert matmul.num_nodes == matmul.M == M matmul._D = D assert matmul.D == D X = np.random.randn(N, D).astype(TYPE_FLOAT) matmul.X = X assert np.array_equal(matmul.X, X) assert matmul.N == N == X.shape[0] matmul._dX = X assert np.array_equal(matmul.dX, X) T = np.random.randint(0, M, N).astype(TYPE_LABEL) matmul.T = T assert np.array_equal(matmul.T, T) matmul._Y = np.dot(X, X.T) assert np.array_equal(matmul.Y, np.dot(X, X.T)) matmul._dY = np.array(0.9) assert matmul._dY == np.array(0.9) matmul.logger.debug("This is a pytest") assert matmul.objective == objective
def _instantiate(name: str, num_nodes: int, num_features: int, objective=None): category = TYPE_FLOAT(np.random.uniform()) if category < 0.3: W = weights.he(num_nodes, num_features + 1) elif category < 0.7: W = weights.xavier(num_nodes, num_features + 1) else: W = weights.uniform(num_nodes, num_features + 1) matmul = Matmul(name=name, num_nodes=num_nodes, W=W) if objective is not None: matmul.objective = objective return matmul
def disabled_test_020_matmul_round_trip(): """ TODO: Disabled as need to re-design numerical_jacobian for 32 bit float e.g TF. Objective: Verify the forward and backward paths at matmul. Expected: Forward path: 1. Matmul function(X) == X @ W.T 2. Numerical gradient should be the same with numerical Jacobian Backward path: 3. Analytical gradient dL/dX == dY @ W 4. Analytical dL/dW == X.T @ dY 5. Analytical gradients are similar to the numerical gradient ones Gradient descent 6. W is updated via the gradient descent. 7. Objective L is decreasing via the gradient descent. """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): # -------------------------------------------------------------------------------- # Instantiate a Matmul layer # -------------------------------------------------------------------------------- N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE) M: int = np.random.randint(1, NUM_MAX_NODES) D: int = np.random.randint(1, NUM_MAX_FEATURES) W = weights.he(M, D + 1) name = "test_020_matmul_methods" def objective(X: np.ndarray) -> Union[float, np.ndarray]: """Dummy objective function to calculate the loss L""" return np.sum(X) # Test both static instantiation and build() if TYPE_FLOAT(np.random.uniform()) < 0.5: matmul = Matmul(name=name, num_nodes=M, W=W, log_level=logging.DEBUG) else: matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he", }, _OPTIMIZER: { _SCHEME: "sGd" } } matmul = Matmul.build(matmul_spec) matmul.objective = objective # ================================================================================ # Layer forward path # Calculate the layer output Y=f(X), and get the loss L = objective(Y) # Test the numerical gradient dL/dX=matmul.gradient_numerical(). # # Note that bias columns are added inside the matmul layer instance, hence # matmul.X.shape is (N, 1+D), matmul.W.shape is (M, 1+D) # ================================================================================ X = np.random.randn(N, D).astype(TYPE_FLOAT) Logger.debug("%s: X is \n%s", name, X) # pylint: disable=not-callable Y = matmul.function(X) # pylint: disable=not-callable L = matmul.objective(Y) # Constraint 1 : Matmul outputs Y should be [email protected] assert np.array_equal(Y, np.matmul(matmul.X, matmul.W.T)) # Constraint 2: Numerical gradient should be the same with numerical Jacobian GN = matmul.gradient_numerical() # [dL/dX, dL/dW] # DO NOT use matmul.function() as the objective function for numerical_jacobian(). # The state of the layer will be modified. # LX = lambda x: matmul.objective(matmul.function(x)) def LX(x): y = np.matmul(x, matmul.W.T) # pylint: disable=not-callable return matmul.objective(y) EGNX = numerical_jacobian(LX, matmul.X) # Numerical dL/dX including bias EGNX = EGNX[::, 1::] # Remove bias for dL/dX assert np.array_equal(GN[0], EGNX), \ "GN[0]\n%s\nEGNX=\n%s\n" % (GN[0], EGNX) # DO NOT use matmul.function() as the objective function for numerical_jacobian(). # The state of the layer will be modified. # LW = lambda w: matmul.objective(np.matmul(X, w.T)) def LW(w): Y = np.matmul(matmul.X, w.T) # pylint: disable=not-callable return matmul.objective(Y) EGNW = numerical_jacobian(LW, matmul.W) # Numerical dL/dW including bias assert np.array_equal(GN[1], EGNW) # No need to remove bias # ================================================================================ # Layer backward path # Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dummy dL/dY. # ================================================================================ dY = np.ones_like(Y) dX = matmul.gradient(dY) # Constraint 3: Matmul gradient dL/dX should be dL/dY @ W. Use a dummy dL/dY = 1.0. expected_dX = np.matmul(dY, matmul.W) expected_dX = expected_dX[::, 1:: # Omit bias ] assert np.array_equal(dX, expected_dX) # Constraint 5: Analytical gradient dL/dX close to the numerical gradient GN. assert np.all(np.abs(dX - GN[0]) < GRADIENT_DIFF_ACCEPTANCE_VALUE), \ "dX need close to GN[0]. dX:\n%s\ndiff \n%s\n" % (dX, dX-GN[0]) # -------------------------------------------------------------------------------- # Gradient update. # Run the gradient descent to update Wn+1 = Wn - lr * dL/dX. # -------------------------------------------------------------------------------- # Python passes the reference to W, hence it is directly updated by the gradient- # descent to avoid a temporary copy. Backup W before to compare before/after. backup = copy.deepcopy(W) # Gradient descent and returns analytical dL/dX, dL/dW dS = matmul.update() dW = dS[0] # Constraint 6.: W has been updated by the gradient descent. assert np.any(backup != matmul.W), "W has not been updated " # Constraint 5: the numerical gradient (dL/dX, dL/dW) are closer to the analytical ones. assert validate_against_expected_gradient(GN[0], dX), \ "dX=\n%s\nGN[0]=\n%sdiff=\n%s\n" % (dX, GN[0], (dX-GN[0])) assert validate_against_expected_gradient(GN[1], dW), \ "dW=\n%s\nGN[1]=\n%sdiff=\n%s\n" % (dW, GN[1], (dW-GN[1])) # Constraint 7: gradient descent progressing with the new objective L(Yn+1) < L(Yn) # pylint: disable=not-callable assert np.all(np.abs(objective(matmul.function(X)) < L)) profiler.disable() profiler.print_stats(sort="cumtime")