def test_020_matmul_builder_to_fail_weight_spec(): """ Objective: Verify the Matmul.build() Expected: build() parse the spec and fail with invalid weight configurations """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): M = np.random.randint(1, 100) D = np.random.randint(1, 100) # NOT including bias # ---------------------------------------------------------------------- # Validate the correct specification. # NOTE: Invalidate one parameter at a time from the correct one. # Otherwise not sure what you are testing. # ---------------------------------------------------------------------- valid_matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he" } } try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_WEIGHTS][_SCHEME] = "invalid_scheme" try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid weight scheme") except AssertionError: pass profiler.disable() profiler.print_stats(sort="cumtime")
def disabled_test_020_matmul_round_trip(): """ TODO: Disabled as need to re-design numerical_jacobian for 32 bit float e.g TF. Objective: Verify the forward and backward paths at matmul. Expected: Forward path: 1. Matmul function(X) == X @ W.T 2. Numerical gradient should be the same with numerical Jacobian Backward path: 3. Analytical gradient dL/dX == dY @ W 4. Analytical dL/dW == X.T @ dY 5. Analytical gradients are similar to the numerical gradient ones Gradient descent 6. W is updated via the gradient descent. 7. Objective L is decreasing via the gradient descent. """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): # -------------------------------------------------------------------------------- # Instantiate a Matmul layer # -------------------------------------------------------------------------------- N: int = np.random.randint(1, NUM_MAX_BATCH_SIZE) M: int = np.random.randint(1, NUM_MAX_NODES) D: int = np.random.randint(1, NUM_MAX_FEATURES) W = weights.he(M, D + 1) name = "test_020_matmul_methods" def objective(X: np.ndarray) -> Union[float, np.ndarray]: """Dummy objective function to calculate the loss L""" return np.sum(X) # Test both static instantiation and build() if TYPE_FLOAT(np.random.uniform()) < 0.5: matmul = Matmul(name=name, num_nodes=M, W=W, log_level=logging.DEBUG) else: matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he", }, _OPTIMIZER: { _SCHEME: "sGd" } } matmul = Matmul.build(matmul_spec) matmul.objective = objective # ================================================================================ # Layer forward path # Calculate the layer output Y=f(X), and get the loss L = objective(Y) # Test the numerical gradient dL/dX=matmul.gradient_numerical(). # # Note that bias columns are added inside the matmul layer instance, hence # matmul.X.shape is (N, 1+D), matmul.W.shape is (M, 1+D) # ================================================================================ X = np.random.randn(N, D).astype(TYPE_FLOAT) Logger.debug("%s: X is \n%s", name, X) # pylint: disable=not-callable Y = matmul.function(X) # pylint: disable=not-callable L = matmul.objective(Y) # Constraint 1 : Matmul outputs Y should be [email protected] assert np.array_equal(Y, np.matmul(matmul.X, matmul.W.T)) # Constraint 2: Numerical gradient should be the same with numerical Jacobian GN = matmul.gradient_numerical() # [dL/dX, dL/dW] # DO NOT use matmul.function() as the objective function for numerical_jacobian(). # The state of the layer will be modified. # LX = lambda x: matmul.objective(matmul.function(x)) def LX(x): y = np.matmul(x, matmul.W.T) # pylint: disable=not-callable return matmul.objective(y) EGNX = numerical_jacobian(LX, matmul.X) # Numerical dL/dX including bias EGNX = EGNX[::, 1::] # Remove bias for dL/dX assert np.array_equal(GN[0], EGNX), \ "GN[0]\n%s\nEGNX=\n%s\n" % (GN[0], EGNX) # DO NOT use matmul.function() as the objective function for numerical_jacobian(). # The state of the layer will be modified. # LW = lambda w: matmul.objective(np.matmul(X, w.T)) def LW(w): Y = np.matmul(matmul.X, w.T) # pylint: disable=not-callable return matmul.objective(Y) EGNW = numerical_jacobian(LW, matmul.W) # Numerical dL/dW including bias assert np.array_equal(GN[1], EGNW) # No need to remove bias # ================================================================================ # Layer backward path # Calculate the analytical gradient dL/dX=matmul.gradient(dL/dY) with a dummy dL/dY. # ================================================================================ dY = np.ones_like(Y) dX = matmul.gradient(dY) # Constraint 3: Matmul gradient dL/dX should be dL/dY @ W. Use a dummy dL/dY = 1.0. expected_dX = np.matmul(dY, matmul.W) expected_dX = expected_dX[::, 1:: # Omit bias ] assert np.array_equal(dX, expected_dX) # Constraint 5: Analytical gradient dL/dX close to the numerical gradient GN. assert np.all(np.abs(dX - GN[0]) < GRADIENT_DIFF_ACCEPTANCE_VALUE), \ "dX need close to GN[0]. dX:\n%s\ndiff \n%s\n" % (dX, dX-GN[0]) # -------------------------------------------------------------------------------- # Gradient update. # Run the gradient descent to update Wn+1 = Wn - lr * dL/dX. # -------------------------------------------------------------------------------- # Python passes the reference to W, hence it is directly updated by the gradient- # descent to avoid a temporary copy. Backup W before to compare before/after. backup = copy.deepcopy(W) # Gradient descent and returns analytical dL/dX, dL/dW dS = matmul.update() dW = dS[0] # Constraint 6.: W has been updated by the gradient descent. assert np.any(backup != matmul.W), "W has not been updated " # Constraint 5: the numerical gradient (dL/dX, dL/dW) are closer to the analytical ones. assert validate_against_expected_gradient(GN[0], dX), \ "dX=\n%s\nGN[0]=\n%sdiff=\n%s\n" % (dX, GN[0], (dX-GN[0])) assert validate_against_expected_gradient(GN[1], dW), \ "dW=\n%s\nGN[1]=\n%sdiff=\n%s\n" % (dW, GN[1], (dW-GN[1])) # Constraint 7: gradient descent progressing with the new objective L(Yn+1) < L(Yn) # pylint: disable=not-callable assert np.all(np.abs(objective(matmul.function(X)) < L)) profiler.disable() profiler.print_stats(sort="cumtime")
def test_020_matmul_builder_to_succeed(): """ Objective: Verify the Matmul.build() Expected: build() parse the spec and succeed """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): M = np.random.randint(1, 100) D = np.random.randint(1, 100) # NOT including bias # ---------------------------------------------------------------------- # Validate the correct specification. # NOTE: Invalidate one parameter at a time from the correct one. # Otherwise not sure what you are testing. # ---------------------------------------------------------------------- lr = TYPE_FLOAT(np.random.uniform()) l2 = TYPE_FLOAT(np.random.uniform()) valid_matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he", }, _OPTIMIZER: { _SCHEME: "sGd", _PARAMETERS: { "lr": lr, "l2": l2 } } } try: matmul: Matmul = Matmul.build(valid_matmul_spec) assert matmul.optimizer.lr == lr assert matmul.optimizer.l2 == l2 except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_SCHEME] = "sgd" try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_SCHEME] = "SGD" try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) profiler.disable() profiler.print_stats(sort="cumtime")
def test_020_matmul_builder_to_fail_optimizer_spec(): """ Objective: Verify the Matmul.build() Expected: build() parse the spec and fail with invalid configurations """ profiler = cProfile.Profile() profiler.enable() for _ in range(NUM_MAX_TEST_TIMES): M = np.random.randint(1, 100) D = np.random.randint(1, 100) # NOT including bias # ---------------------------------------------------------------------- # Validate the correct specification. # NOTE: Invalidate one parameter at a time from the correct one. # Otherwise not sure what you are testing. # ---------------------------------------------------------------------- valid_matmul_spec = { _NAME: "test_020_matmul_builder_to_fail_matmul_spec", _NUM_NODES: M, _NUM_FEATURES: D, _WEIGHTS: { _SCHEME: "he" }, _OPTIMIZER: { _SCHEME: "sGd", _PARAMETERS: { "lr": TYPE_FLOAT(np.random.uniform()), "l2": TYPE_FLOAT(np.random.uniform()) } }, "log_level": logging.ERROR } try: Matmul.build(valid_matmul_spec) except Exception as e: raise RuntimeError("Matmul.build() must succeed with %s" % valid_matmul_spec) matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER] = "" try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid optimizer spec") except AssertionError: pass matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_SCHEME] = "invalid" try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid optimizer spec") except AssertionError: pass matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_PARAMETERS]["lr"] = np.random.uniform(-1, 0) try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid lr value") except AssertionError: pass matmul_spec = copy.deepcopy(valid_matmul_spec) matmul_spec[_OPTIMIZER][_PARAMETERS]["l2"] = np.random.uniform(-1, 0) try: Matmul.build(matmul_spec) raise RuntimeError( "Matmul.build() must fail with invalid l2 value") except AssertionError: pass profiler.disable() profiler.print_stats(sort="cumtime")