def _is_epoch_terminal(self, epoch, max_epoches, err): """SGD Terminality Check. Judges if an epoch is final. The given epoch is terminal if and only if at least one of the following conditions are met: 1. Maximum number of epoches has been reached. 2. Difference in loss between the last and second to last epoches is smaller than `MIN_DELTA_EPOCH`. 3. Loss increased in the last epoch rather than decreased. Args: epoch (int): Current iteration number. max_epoches (int): Maximum number of epoches allowed. err (list of float): Model loss across all epoches. Returns: bool: `True` if SGD should be interrupted, `False` otherwise. """ # Maximum number of epoches reached. if epoch >= max_epoches: return True # Not enough epoches to judge. if len(err) < 2: return False delta_epoch = compose(abs, np.subtract)(err[-1], err[-2]) """float: Difference in loss between the last and second to last epoches.""" return delta_epoch < MIN_DELTA_EPOCH or err[-1] > err[-2]
def test_random_model_regularization(self): """`Model.regularization`: Randomized Validator. Tests the behavior of `regularization` by feeding it randomly generated arguments. Raises: AssertionError: If `regularization` needs debugging. """ for i in range(self.n_tests): random_params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" # First, test `params` as a method argument. result = self.model.regularization(random_params) """float: Test input.""" self.assertIsInstance(result, float) # Finally, test `params` as attribute. self.model.params = random_params result = self.model.regularization() self.assertIsInstance(result, float)
def test_edge_cases_model_evaluate(self): """`Model.evaluate`: Edge Case Validator. Tests the behavior of `evaluate` with edge cases. Raises: Exception: If at least one `Exception` raised is not of the expected kind. """ X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" Y = random_matrix((self.data_shape[0], 1)) """np.matrix: Random-valued observation set.""" params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" with self.assertRaises(_InvalidFeatureSetError): # Empty matrix instead of matrix `X`. self.model.evaluate(np.matrix([[]]), Y, params=params) with self.assertRaises(_InvalidObservationSetError): # Empty matrix instead of matrix `Y`. self.model.evaluate(X, np.matrix([[]]), params=params) with self.assertRaises(_InvalidModelParametersError): # Empty matrix instead of matrix `Y`. self.model.evaluate(X, Y, params=(np.matrix([[]]), ))
def action(): """Initialize Parameters Update Action. Defines the routine to run after the feature sets have been validated. """ self.params = compose(tuple, map)(random_matrix, shape_fn(X))
def _best_fit_lines(self, raw_x, values, subplot): """Best-Fit Line Plotter. Plots the lines of best fit according to the given x- and y-values. Args: raw_x (list of float): X-values with possible duplicates. values (:obj:`list of float`): Contains all y-values. subplot (int): Index of suplot to use for plotting. Return: list of matplotlib.lines.Line2D: Best-fit lines. """ x = compose(list, np.unique)(raw_x) """list of float: X-values.""" ax = self._ax[subplot] """matplotlib.axes.Axes: Subplot.""" lines = [] """list of matplotlib.lines.Line2D: Best-fit lines.""" try: if len(values) == 0: raise ValueError("No y-values provided.") for i, (label, y) in enumerate(values.items()): color = self._generate_color() """(float, float, float): RGB color.""" linestyle = "-" if label == "observations" else "--" """str: Plot linestyle. See `matplotlib.plot.pyplot`.""" l, = ax.plot(x, compose(np.poly1d, np.polyfit)(raw_x, y, 1)(x), color=color, linestyle=linestyle) """matplotlib.lines.Line2D: Best-fit model plotted last.""" lines.append(l) except AttributeError: raise TypeError("Expected 'dict', saw '%s' instead." % type(values).__name__) return lines
def __str__(self): param_to_str = (lambda name: "`%s`: %s" % (name[1:], "not set" if not hasattr(self, name) or getattr(self, name) is None else getattr(self, name).shape)) """callable: Stringifies given parameter.""" param_str = compose(", ".join, map)(param_to_str, self._param_names) """str: Stringified parameters.""" return "%s: %s" % (self.__class__.__name__, param_str)
def _plot_feature(self, X, Y, feature, model): """Feature Plotter. Displays a scatter plot of the provided feature' predictions and the given observations. Args: X (np.matrix): Feature set. Shape: n x d. Y (np.matrix): Observation set. Shape: n x d. feature (int): Column number of target feature. model (ModelWrapper): Model to use for predictions. Returns: (list of float, :obj:`list of float`, float): The x and y values to plot, plus prediction error. Raises: IndexError: If `feature` is out of range. TypeError: If `feature` is not an integer. ValueError: If `feature` is negative or if `model` is not an instance of `ModelWrapper`. """ validate_datasets(X, Y) if not isinstance(feature, int): raise TypeError("Expected 'int', saw '%s' instead." % type(feature).__name__) if not isinstance(model, ModelWrapper): raise TypeError("Expected 'ModelWrapper', saw '%s' instead." % type(model).__name__) if feature < 0: raise ValueError("Negative feature indices are not allowed.") if feature >= X.shape[1]: raise IndexError("Feature index '%d' is out of range." % feature) aslist = lambda A: A.T[0, :].tolist()[0] """callable: Maps vectors to lists.""" x = aslist(X[:, feature]) """list of float: Feature's x values.""" Y_hat = model.predict(X) """np.matrix: Predicitons.""" y = map(aslist, [Y, Y_hat]) """list of (list of float): Feature's y raw values.""" values = compose(dict, zip)(["observations", "predictions"], y) """:obj:`list of float`: Feature's x and y values.""" return x, values, model.evaluate(X, Y)
def action(): """Regularization Update Action. Defines the routine to run after the feature sets and parameters have been validated. Returns: (float, np.matrix): The evaluation error along with the predicted observations. """ r = self._regularization """float: L2 regularization constant.""" left_multiply = lambda p: compose(p.T.dot, diagonal)(p.shape[0], r) """callable: Left multiplies a matrix with the specified diagonal matrix.""" penalizer = lambda p: float(left_multiply(p).dot(p)) """callable: Given a parameter, computes it's L2 penalty.""" return compose(sum, map)(penalizer, self.params)
def test_random_model_predict(self): """`Model.predict`: Randomized Validator. Tests the behavior of `predict` by feeding it randomly generated arguments. Raises: AssertionError: If `predict` needs debugging. """ for i in range(self.n_tests): X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" # First, test `params` as a method argument. Y_hat1 = self.model.predict(X, params=params) """np.matrix: Test input 1.""" # Gradients should be a tuple. self.assertIsInstance(Y_hat1, np.matrix) # All params should have a gradient. self.assertEqual(Y_hat1.shape, (X.shape[0], 1)) # Model parameters should not be set at this point. self.assertIsNone(self.model.params) # Finally, test `params` as attribute. self.model.params = params Y_hat2 = self.model.predict(X) """np.matrix: Test input 2.""" # Gradients should be a tuple. self.assertIsInstance(Y_hat2, np.matrix) # All params should have a gradient. self.assertEqual(Y_hat2.shape, (X.shape[0], 1)) # Model parameters should be set at this point. self.assertIsNotNone(self.model.params) # Norms of test inputs should match. self.assertEqual(np.linalg.norm(Y_hat1), np.linalg.norm(Y_hat2)) del self.model.params
def _generate_color(): """Color Generator. Generates random color. Returns: (float, float, float): Color in RGB format with scale [0, 1]. """ levels = range(32, 256, 32) """int: Possible RGB values.""" rgb = [choice(levels) for i in range(3)] """(int, int, int): RGB color with scale [0, 256].""" return compose(tuple, map)(lambda v: v / 255.0, rgb)
def test_random_model_params(self): """`Model.params`: Randomized Validator. Tests the behavior of `params` by feeding it randomly generated arguments. Raises: AssertionError: If `params` needs debugging. """ for i in range(self.n_tests): random_params = compose(tuple, map)(random_matrix, self.shapes) """list of np.matrix: Randomized set of parameters.""" self.model.params = random_params params = self.model.params """list of np.matrix: Deep copy of newly set parameters.""" # Parameters should be a tuple. self.assertIsInstance(params, tuple) # Number of parameters should match number of parameter dimensions. self.assertEqual(len(params), len(self.shapes)) for j in range(len(params)): # Each parameter should be a matrix. self.assertIsInstance(params[j], np.matrix) # Each parameter from input should match the correspoding # parameter copied with the getter method. self.assertEqual(np.linalg.norm(params[j]), np.linalg.norm(random_params[j])) # Model parameters should be initialized at this point. self.assertIsNotNone(self.model.params) del self.model.params # Model parameters should be uninitialized after deletion. self.assertIsNone(self.model.params)
def test_edge_cases_model_predict(self): """`Model.predict`: Edge Case Validator. Tests the behavior of `predict` with edge cases. Raises: Exception: If at least one `Exception` raised is not of the expected kind. """ X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" with self.assertRaises(_InvalidFeatureSetError): # Empty feature set. self.model.predict(np.matrix([[]]), params=params) with self.assertRaises(_InvalidModelParametersError): # Empty parameters. self.model.predict(X, params=(np.matrix([[]]), ))
def test_invalid_args_model_regularization(self): """`Model.regularization`: Argument Validator. Tests the behavior of `regularization` with invalid argument counts and values. Raises: Exception: If at least one `Exception` raised is not of the expected kind. """ params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" with self.assertRaises(_IncompleteModelError): # No arguments with no parameters set. self.model.regularization() with self.assertRaises(TypeError): # Too many arguments. self.model.regularization(params, params) with self.assertRaises(TypeError): # Too many arguments. self.model.regularization(params, params=params) with self.assertRaises(TypeError): # Invalid kwarg. self.model.regularization(params=params, key="value") with self.assertRaises(_InvalidModelParametersError): # List instead of parameter tuple `params`. self.model.regularization(list(params)) with self.assertRaises(_InvalidModelParametersError): # Empty list tuple instead of parameter tuple `params`. self.model.regularization(params=([], []))
def _train_helper(self, model, train_buckets, exact=False, **kwargs): """Model Trainer Helper. Trains model parameters depending either analytically or numerically. Args: model (Model): Model to be trained. train_buckets (list of np.matrix): Stochastic and randomized representation of dataset. exact (bool, optional): `True` if training should be done analytically, `False` otherwise. Defaults to `False`. **kwargs: Options that define SGD's behavior. Returns: tuple of np.matrix: Trained model parameters. """ concatenator = lambda A, B: append_bottom(A, B) """callable: Appends matrix `B` to the bottom of matrix `A`.""" splitter = lambda dataset: (dataset[:, :-1], dataset[:, -1]) """callable: Separates a dataset into feature and observation sets.""" datasets = shuffle_batches(train_buckets) """list of np.matrix: Re-ordered batches.""" model.init_params(train_buckets[0][:, :-1]) if exact: train_X, train_Y = compose(splitter, reduce)(concatenator, datasets) """(np.matrix, np.matrix): Training feature and observation sets.""" model.train(train_X, train_Y) else: model.params = self._sgd(model, map(splitter, datasets), **kwargs) return model.params
def _subplot(self, i, x, values, title=None, xlabel=None, ylabel=None, legend=True): """Subplotter. Plots the given data to the provided subplot. Args: i (int): Subplot index. x (list of float): X values. values (:obj:`list of float`): Dictionary where keys represent plot labels and values represent y values. title (str, optional): Subplot display name or `None` if no title should be displayed. Defaults to `None`. xlabel (str, optional): X-axis display title or `None` if no title should be displayed. Defaults to `None`. ylabel (str, optional): Y-axis display title or `None` if no title should be displayed. Defaults to `None`. Return: list of matplotlib.lines.Line2D: Plotted lines. """ capitalize = lambda s: s.capitalize() """callable: Maps strings to capitalized strings.""" ax = self._ax[i] """matplotlib.axes.Axes: Subplot.""" lines = [] """list of matplotlib.lines.Line2D: Plotted lines.""" for i, (label_raw, y) in enumerate(values.iteritems()): label = compose(" ".join, map)(capitalize, label_raw.split("_")) """str: Capitalized, separeted version of `label_raw`.""" color = self._generate_color() """(float, float, float): RGB color.""" marker = "o" if label_raw == "observations" else "*" """str: Matplotlib marker style.""" l, = ax.plot(x, y, color=color, marker=marker, linestyle="None", label=label, markeredgewidth=DEFAULT_MARKER_EDGE_WIDTH) """matplotlib.lines.Line2D: Best-fit model plotted last.""" lines.append(l) # Display title (if provided) if title is not None: ax.set_title(title) # Display axis labels (if provided) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) # Remove ticks from both the right y-axis and the top x-axis. ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') ax.margins(DEFAULT_MARGINS) if legend: ax.legend(loc="best", numpoints=DEFAULT_NUMPOINTS) return lines
def _cross_validate(self, X, Y, model, k=DEFAULT_SGD_K, **kwargs): """K-Fold Cross Validation. Creates as many buckets with `k` elements as possible from the given datasets and trains the model with all possible bucket permutations i.e. reserves a single bucket for testing and the rest for training. Args: X (np.matrix): Training feature set. Shape: n x d. Y (np.matrix): Training observation set. Shape: n x 1. model (Model): Mathematical model to train. k (int, optional): Number of data points per bucket. Defaults to `DEFAULT_SGD_K`. exact (bool, optional): `True` if training should be done analytically, or `False` if it should be done stochastically. Defaults to `False.` Returns: float: Training error (i.e. average testing error across all bucket permutations). """ buckets = batches(X, Y, k) """list of np.matrix: Dataset batches with at least `k` data point each.""" err = [] """list of float: Testing errors of all permutations.""" optimal_params = None """tuple of np.matrix: Parameters with the smallest testing error.""" min_err = float("inf") """float: Testing error of best parameters encountered.""" splitter = lambda dataset: (dataset[:, :-1], dataset[:, -1]) """callable: Separates a dataset into feature and observation sets.""" for i in range(len(buckets)): train = deepcopy(buckets) """list of np.matrix: Training buckets.""" test_X, test_Y = compose(splitter, train.pop)(i) """(np.matrix, np.matrix): Testing feature and observation sets.""" params = self._train_helper(model, train, **kwargs) """tuple of np.matrix: Trained model parameters.""" training_err = model.evaluate(test_X, test_Y, params=params, regularize=False)[0] """float: Loss after training according to reserved testing bucket.""" err.append(training_err) # Set current trained parameters as optimal if `training_err` is # smaller than all other training errors encountered before. if training_err < min_err: min_err = training_err optimal_params = params # Only update model parameters if a good match is found. if optimal_params is not None: model.params = optimal_params return np.mean(err)
def test_random_model_numerical_gradient(self): """`Model.numerical_gradient`: Randomized Validator. Tests the behavior of `numerical_gradient` by feeding it randomly generated arguments. Raises: AssertionError: If `numerical_gradient` needs debugging. """ for i in range(self.n_tests): X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" Y = random_matrix((self.data_shape[0], 1)) """np.matrix: Random-valued observation set.""" random_params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" # First, test `params` as a method argument. result1 = self.model.numerical_gradient(X, Y, params=random_params) """float: Test input 1.""" # Gradients should be a tuple. self.assertIsInstance(result1, tuple) # All params should have a gradient. self.assertEqual(len(result1), len(self.shapes)) # All gradients should matrices. for g in result1: self.assertIsInstance(g, np.matrix) # Model parameters should not be set at this point. self.assertIsNone(self.model.params) # Finally, test `params` as attribute. self.model.params = random_params result2 = self.model.numerical_gradient(X, Y) """float: Test input 2.""" # Gradients should be a tuple. self.assertIsInstance(result2, tuple) # All params should have a gradient. self.assertEqual(len(result2), len(self.shapes)) # All gradients should matrices. for g in result2: self.assertIsInstance(g, np.matrix) # Model parameters should be set at this point. self.assertIsNotNone(self.model.params) norm1 = compose(sum, map)(np.linalg.norm, result1) """float: Sum of `result1`'s gradient norms.""" norm2 = compose(sum, map)(np.linalg.norm, result2) """float: Sum of `result2`'s gradient norms.""" # Norms of test inputs should match. self.assertEqual(norm1, norm2) del self.model.params
def test_random_model_evaluate(self): """`Model.evaluate`: Randomized Validator. Tests the behavior of `evaluate` by feeding it randomly generated arguments. Raises: AssertionError: If `evaluate` needs debugging. """ for i in range(self.n_tests): X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" Y = random_matrix((self.data_shape[0], 1)) """np.matrix: Random-valued observation set.""" random_params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" # First, test `params` as a method argument. result1 = self.model.evaluate(X, Y, params=random_params) """float: Test input 1.""" # Gradients should be a tuple. self.assertIsInstance(result1, tuple) # All params should have a gradient. self.assertEqual(len(result1), 2) err1, Y_hat1 = result1 """(float, np.matrix): Evaluation error and predicted observations of test 1.""" # Evaluation error should be a float. self.assertEqual(type(err1), np.float64) # Prediction set should be a matrix. self.assertIsInstance(Y_hat1, np.matrix) # Model parameters should not be set at this point. self.assertIsNone(self.model.params) # Finally, test `params` as attribute. self.model.params = random_params result2 = self.model.evaluate(X, Y) """float: Test input 2.""" # Gradients should be a tuple. self.assertIsInstance(result2, tuple) # All params should have a gradient. self.assertEqual(len(result2), 2) err2, Y_hat2 = result2 """(float, np.matrix): Evaluation error and predicted observations of test 2.""" # Evaluation error should be a float. self.assertEqual(type(err2), np.float64) # Prediction set should be a matrix. self.assertIsInstance(Y_hat2, np.matrix) # Model parameters should be set at this point. self.assertIsNotNone(self.model.params) # Evaluation errors should match. self.assertEqual(err1, err2) # Norms of test inputs should match. self.assertEqual(np.linalg.norm(Y_hat1), np.linalg.norm(Y_hat2)) r = self.model.regularization() """float: L2 parameter regularization.""" (err_no_reg, Y_hat_no_reg) = self.model.evaluate(X, Y, regularize=False) """(float, np.matrix): Evaluation error and predicted observations of test 2.""" # Evaluation with no regularization should comply with the following # equation. self.assertEqual(err1, err_no_reg + r) # Predicted observations should still be identical, though. self.assertEqual(np.linalg.norm(Y_hat1), np.linalg.norm(Y_hat_no_reg)) del self.model.params
def test_invalid_args_model_predict(self): """`Model.predict`: Argument Validator. Tests the behavior of `predict` with invalid argument counts and values. Raises: Exception: If at least one `Exception` raised is not of the expected kind. """ n, d = self.data_shape """(int, int): Number of data points and number of features.""" X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" with self.assertRaises(TypeError): # No arguments. self.model.predict() with self.assertRaises(TypeError): # Too many arguments. self.model.predict(X, X, params=params) with self.assertRaises(_IncompleteModelError): # Params not set. self.model.predict(X) with self.assertRaises(TypeError): # Invalid kwarg. self.model.predict(X, params=params, key="value") with self.assertRaises(_InvalidFeatureSetError): # `None` instead of feature set `X`. self.model.predict(None, params=params) with self.assertRaises(_InvalidFeatureSetError): # ndarray instead of feature set `X`. self.model.predict(np.zeros((n, d)), params=params) with self.assertRaises(_InvalidModelParametersError): # Incompatible observation set. self.model.predict(X, params=(random_matrix((n + 1, 1)), )) with self.assertRaises(_IncompleteModelError): # None instead of model parameters `params`. self.model.predict(X, params=None) with self.assertRaises(_InvalidModelParametersError): # List instead of model parameters tuple `params`. self.model.predict(X, params=list(params)) with self.assertRaises(_InvalidModelParametersError): # List of ndarray instead of np.matrix tuple `params`. self.model.predict(X, params=compose(tuple, map)(np.zeros, self.shapes))
def test_invalid_args_model_evaluate(self): """`Model.evaluate`: Argument Validator. Tests the behavior of `evaluate` with invalid argument counts and values. Raises: Exception: If at least one `Exception` raised is not of the expected kind. """ n, d = self.data_shape """(int, int): Number of data points and number of features.""" X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" Y = random_matrix((n, 1)) """np.matrix: Random-valued observation set.""" params = compose(tuple, map)(random_matrix, self.shapes) """tuple of np.matrix: Random-valued parameters.""" with self.assertRaises(TypeError): # No arguments. self.model.evaluate() with self.assertRaises(TypeError): # Too many arguments. self.model.evaluate(X, Y, Y, params=params) with self.assertRaises(_IncompleteModelError): # Params not set. self.model.evaluate(X, Y) with self.assertRaises(TypeError): # Invalid kwarg. self.model.evaluate(X, Y, params=params, key="value") with self.assertRaises(_InvalidFeatureSetError): # `None` instead of feature set `X`. self.model.evaluate(None, Y, params=params) with self.assertRaises(_InvalidFeatureSetError): # ndarray instead of feature set `X`. self.model.evaluate(np.zeros((n, d)), Y, params=params) with self.assertRaises(_InvalidObservationSetError): # `None` instead of observation set `Y`. self.model.evaluate(X, None, params=params) with self.assertRaises(_InvalidObservationSetError): # ndarray instead of observation set `Y`. self.model.evaluate(X, np.zeros((n, 1)), params=params) with self.assertRaises(_IncompatibleDataSetsError): # Incompatible feature set. self.model.evaluate(random_matrix((d, n)), Y, params=params) with self.assertRaises(_IncompatibleDataSetsError): # Incompatible observation set. self.model.evaluate(X, random_matrix((n + 1, 1)), params=params) with self.assertRaises(_IncompleteModelError): # None instead of model parameters `params`. self.model.evaluate(X, Y, params=None) with self.assertRaises(_InvalidModelParametersError): # List instead of model parameters tuple `params`. self.model.evaluate(X, Y, params=list(params)) with self.assertRaises(_InvalidModelParametersError): # List of ndarray instead of np.matrix tuple `params`. self.model.evaluate(X, Y, params=compose(tuple, map)(np.zeros, self.shapes)) with self.assertRaises(TypeError): # None instead of string `loss_fn`. self.model.evaluate(X, Y, params=params, loss_fn=None) with self.assertRaises(TypeError): # Integer instead of string `loss_fn`. self.model.evaluate(X, Y, params=params, loss_fn=123) with self.assertRaises(AttributeError): # Non-existent loss function. self.model.evaluate(X, Y, params=params, loss_fn="non-existent")