def test_random_appendargs(self): """`general.appendargs`: Randomized Validator. Tests the behavior of `appendargs` by feeding it randomly generated arguments. Raises: AssertionError: If `appendargs` needs debugging. """ concat = lambda s1, s2: s1 + s2 """callable: Appends one string to the other.""" # appendargs with no arguments should return `None`. self.assertIsNone(appendargs(None)) for i in range(self.n_tests): c_arg = compose(str, _np.random.uniform)(0.0, 100.0, size=self.n) """list of float: Argument to be appended.""" arg = compose(str, _np.random.uniform)(0.0, 100.0, size=self.n) """list of float: Test argument.""" target = arg + c_arg """float: Expected output from appendargsd function.""" adder = appendargs(concat, c_arg) """callable: Test input.""" # Adder should be a function. self.assertIsInstance(adder, type(compose)) result = adder(arg) """str: Adder output.""" # Programatic adder result should match manual sum. self.assertEqual(result, target)
def factors(n): """Factor Finder. Computes all the factors of the given number. See: https://stackoverflow.com/questions/6800193/what-is-the-most-efficient-way-of-finding-all-the-factors-of-a-number-in-python Args: n (int): Number whose factors want to be found. Returns: list of int: `n`'s factors (if any). Raises: TypeError: If `n` is not an integer. """ if type(n) != int: raise TypeError("Expected 'int', saw '%s' instead." % type(n).__name__) candidates = range(1, int(n ** 0.5) + 1) """list of int: All integers within the upper bound given by the square root of `n`.""" factors = [[i, n // i] for i in candidates if n % i == 0] """list of int: `n`'s factors with possible duplicates for perfect squares.""" try: return compose(list, set, reduce)(list.__add__, factors) except TypeError: raise ValueError("Factors of input '%s' are not computable." % str(n))
def test_random_compose(self): """`general.compose`: Randomized Validator. Tests the behavior of `compose` by feeding it randomly generated arguments. Raises: AssertionError: If `compose` needs debugging. """ def special_sum(*args, **kwargs): return sum(list(args) + kwargs.values()) negate = lambda x: -1.0 * x """callable: Negates nuemeric input.""" functions = str, negate, _sqrt, abs, special_sum """tuple of callable: Functions that will get composed during each random iteration of the test.""" # Compose with no arguments should return `None`. self.assertIsNone(compose()) for i in range(self.n_tests): args = _np.random.uniform(0.0, 100.0, size=self.n) kwargs = { str(k): k for k in _np.random.uniform(0.0, 100.0, size=self.n) } target = str(negate(_sqrt(abs(sum(args + kwargs.values()))))) """str: Expected output from composed function.""" composed_fn = compose(*functions) """callable: Test input.""" # Composition should be a function. self.assertEqual(type(composed_fn), type(compose)) result = composed_fn(*args, **kwargs) """str: Composition output.""" # Programatic composition result should match manual composition # output. self.assertEqual(result, target)
def test_random_append_bottom(self): """`linalg.append_bottom`: Randomized Validator. Tests the behavior of `append_bottom` by feeding it randomly generated arguments. Raises: AssertionError: If `append_bottom` needs debugging. """ for i in range(self.n_tests): X = random_matrix(self.data_shape) """np.matrix: Random-valued feature set.""" v = random_matrix((1, self.data_shape[1])) """np.matrix: Random-valued row vector.""" result = append_bottom(X, v) """np.matrix: Test input.""" # Result should be a matrix. self.assertIsInstance(result, _np.matrix) to_norm = lambda (A, axis): _np.linalg.norm(A, axis=axis) """callable: Takes in a matrix returns the norm along the specified axis.""" norm_normalizer = lambda n: [n] if type(n) == _np.float64 else n """callable: Make sure that all norms are lists. In particular, treats the row vector norm as a single row of a regular matrix.""" norms = map(to_norm, [(X, 1), (v, None), (result, 1)]) """list: Contains the row norms of both the input and the augmented result.""" # Change the sign of the augmented matrix's norm to compute norm # deltas and infer errors from there. norms[2] *= -1.0 delta = compose(abs, sum, map)(sum, map(norm_normalizer, norms)) """float: Absolute difference between row norms of input and those of the augmented matrix.""" # The row norms of input should match those of the augmented matrix. self.assertLessEqual(delta, self.zero_cutoff) # The vector norm of `v` should match that of the bottommost row # vector in the augmented matrix. self.assertAlmostEqual(_np.linalg.norm(v), _np.linalg.norm(result[-1, :]))
def _append_helper(X, v, position): """Matrix Concatenator. Appends the given row/column vector(s) at the specified position of a matrix. Args: X (np.matrix): Feature set to be augmented. v (np.matrix): Vector(s) (as a matrix). position (str): 'bottom', 'left', 'right', or 'top'. Returns: np.matrix: The augmented feature set. Raises: IncompatibleDataSetsError: If the feature set's and vector's dimensions do not match. InvalidFeatureSetError: If the given feature set or vector are invalid. """ map(validate_feature_set, [X, v]) switcher=dict(bottom=((X, v), 0), left=((v, X), 1), right=((X, v), 1), top=((v, X), 0)) """:obj:`((np.matrix, np.matrix), int)`: Helper to determine how to concatenate the vector and the matrix.""" args = switcher.get(position) """((np.matrix, np.matrix), int): The explicit matrix pair to determine the order of concatenation and values 0 or 1 to determine its direction. See `np.concatenate`.""" if not args: raise ValueError("Invalid append position.") index = int(position == 'bottom' or position == 'top') """int: Index into both the matrix's and the vector's `shape` attributes. Determines which dimensions should be aligned.""" if X.shape[index] != v.shape[index]: raise _IncompatibleDataSetsError(X, v, "concatenation") return compose(_np.matrix, _np.concatenate)(*args)
def test_invalid_args_compose(self): """`general.compose`: Argument Validator. Tests the behavior of `compose` with invalid argument counts and values. Raises: Exception: If at least one `Exception` raised is not of the expected kind. """ with self.assertRaises(TypeError): # No arguments. compose()("arg") with self.assertRaises(TypeError): # Non-function arguments. compose(map, reduce, filter, 123)("arg") with self.assertRaises(TypeError): # With **kwargs. compose(map, reduce, key1="key1", key2="key2")("arg")
def shuffle_batches(batches): """Batch Shuffler. Re-orders the datapoints in the given batch set into a new set of batches. Args: batches: Batch set to re-order. Returns: list of np.matrix: Shuffled batches. Raises: InvalidFeatureSetError: If not all the batches are compatible and valid. TypeError: If `batches` is not an iterable of np.matrix instances. """ if hasattr(batches, 'shape'): raise TypeError("Expected 'list' of 'np.matrix', " "saw 'np.matrix' instead.") extract_length = lambda b: b.shape[0] """callable: Returns the number of rows in the given matrix.""" try: lengths = map(extract_length, batches) """list of (int, int): Matrix dimensions of all batches.""" except AttributeError: raise TypeError("Expected iterable of np.matrix instances.") d_hat = batches[0].shape[1] """int: Number of features `d` plus 1.""" datapoints = [] """list of np.matrix: List representation of all data points.""" shuffled_batches = [] """list of np.matrix: Newly re-ordered batches.""" for b in batches: for i, datapoint in enumerate(b): datapoints.append(datapoint) shuffle(datapoints) if d_hat < 2: reason = ("No features found in given dataset of shape '(%d, %d)'." % batches[0].shape) raise InvalidFeatureSetError(batches[0], reason=reason) while len(lengths): length = lengths.pop() """(int, int): Current batch's matrix dimensions.""" batch = compose(np.matrix, np.zeros)((length, d_hat)) """int: Current batch.""" for k in range(length): datapoint = datapoints.pop() try: batch[k, :] = datapoint[0, :] except ValueError as e: reason = ("No features found in given dataset of shape " "'(%d, %d)'." % (batch.shape[0], datapoint.shape[1])) raise InvalidFeatureSetError(batch, reason=reason) shuffled_batches.append(batch) return shuffled_batches
def reduce_dimensions(X, Y, min=DEFAULT_MIN_FEATURE_CORRELATION, names=None): """Data Point Dimensionality Reducer. Args: X (np.matrix): Feature set. Shape: n x d. Y (np.matrix): Observation set. Shape: n x 1. min (float, optional): Determines the minimum correlation value for a feature to be considered relevant. Defaults to `DEFAULT_MIN_FEATURE_CORRELATION`. names (list of str): Feature names. Defaults to `None`. Returns: Reduced feature set `np.matrix` if no feature names are provided, a tuple with the reduced feature set and feature names otherwise. Raises: ValueError: If no features have a correlation to `Y` greater than or equal to `min`. Todo: Get rid of `pandas`. """ validate_datasets(X, Y) if type(min) != int and type(min) != float: raise TypeError( "Expected 'float' or 'int' for `f`, saw '%s' instead." % type(min).__name__) if min < 0.0 or min > 1.0: raise ValueError("Minimum correlation has to be a float in the range " "(0.0, 1.0), not '%f'." % min) filter_irrelevant = lambda (i, c): i != '_Y' and abs(c) >= min """callable: Returns `True` if the given index belongs to a feature with a large enough correlation, `False` otherwise.""" df = pd.DataFrame(X) """DataFrame: Pandas feature snapshot.""" # Set observations to special key '_Y'. df["_Y"] = np.asarray(Y) try: indices = list( zip(*filter(filter_irrelevant, df.corr()["_Y"].iteritems()))[0]) """list of int: Indices of relevant features.""" X_hat = compose(np.matrix, np.zeros)((X.shape[0], len(indices))) """np.matrix: Reduced feature set.""" k = 0 """int: Feature number into reduce matrix.""" for i in range(X.shape[1]): if len(indices) and i == indices[0]: X_hat[:, k] = X[:, indices.pop(0)] k += 1 elif names is not None: names.pop(k) return X_hat except IndexError as e: raise ValueError("No features satisfy the given minimum correlation.")
def batches(X, Y, k): """K-Batch Creator. Partitions the given sets of features and observations into batches of at least `k` elements. The number of data points does not differ by more than one data point from one batch to the other. Args: X (np.matrix): Feature set. Shape: n x d. Y (np.matrix): Observation set. Shape: n x 1. k (int): Minimum number of data points per batch. Raises: AttributeError: If `X` is not a valid matrix. ValueError: If `X` or `Y` are empty matrices or if `k` is not a natural number. TypeError: If `X` or `Y` are not valid matrices or if `k` is not an int. Returns: list of np.matrix: Partitions of at least `k` elements per batch. """ validate_datasets(X, Y) if type(k) != int: raise TypeError("Expected an 'int' for `k`, saw '%s' instead." % type(k).__name__) if k <= 0: raise ValueError("Value of `k` not greater than 0: %d." % k) n, d = X.shape """(int, int): Number of data points and number of features.""" indices = [i for i in range(n)] """list of int: Shuffled indiced of data points.""" shuffle(indices) batches = [] """list of np.matrix: All batches.""" n_training_points = compose(int, np.floor)(float(n) / compose(float, min)(n, k)) """int: Number of data points destined for training.""" i = None """int: Current data point index.""" for q in range(n_training_points): tot = compose(appendargs(min, k), len)(indices) """int: Number of data points to add to current batch.""" batch = np.zeros((tot, d + 1)) """int: Current batch.""" for j in range(tot): i = indices.pop() batch[j, :] = np.concatenate((X[i, :], Y[i, :]), 1) compose(batches.append, np.matrix)(batch) if len(batches) == 1: n_left = len(indices) if n_left == 0: raise ValueError("Unable to partition %d data points into length " "%d batches." % (n, k)) batch = np.zeros((n_left, d + 1)) """int: Current batch.""" batch = np.concatenate( [np.concatenate((X[i, :], Y[i, :]), 1) for i in indices], 0) compose(batches.append, np.matrix)(batch) else: j = 0 """int: Current batch offset.""" while len(indices) > 0: i = indices.pop() datapoint = compose(np.matrix, np.concatenate)((X[i, :], Y[i, :]), 1) """np.matrix: Remaining data point.""" m = j % len(batches) """int: Current batch index.""" batches[m] = compose(np.matrix, np.concatenate)((batches[m], datapoint)) j += 1 return batches