def test_random_appendargs(self):
        """`general.appendargs`: Randomized Validator.

        Tests the behavior of `appendargs` by feeding it randomly generated arguments.

        Raises:
            AssertionError: If `appendargs` needs debugging.

        """
        concat = lambda s1, s2: s1 + s2
        """callable: Appends one string to the other."""

        # appendargs with no arguments should return `None`.
        self.assertIsNone(appendargs(None))

        for i in range(self.n_tests):
            c_arg = compose(str, _np.random.uniform)(0.0, 100.0, size=self.n)
            """list of float: Argument to be appended."""
            arg = compose(str, _np.random.uniform)(0.0, 100.0, size=self.n)
            """list of float: Test argument."""

            target = arg + c_arg
            """float: Expected output from appendargsd function."""

            adder = appendargs(concat, c_arg)
            """callable: Test input."""

            # Adder should be a function.
            self.assertIsInstance(adder, type(compose))

            result = adder(arg)
            """str: Adder output."""

            # Programatic adder result should match manual sum.
            self.assertEqual(result, target)
def factors(n):
    """Factor Finder.

    Computes all the factors of the given number.

    See:
        https://stackoverflow.com/questions/6800193/what-is-the-most-efficient-way-of-finding-all-the-factors-of-a-number-in-python

    Args:
        n (int): Number whose factors want to be found.

    Returns:
        list of int: `n`'s factors (if any).

    Raises:
        TypeError: If `n` is not an integer.

    """
    if type(n) != int:
        raise TypeError("Expected 'int', saw '%s' instead." % type(n).__name__)

    candidates = range(1, int(n ** 0.5) + 1)
    """list of int: All integers within the upper bound given by the square root
    of `n`."""
    factors = [[i, n // i] for i in candidates if n % i == 0]
    """list of int: `n`'s factors with possible duplicates for perfect
    squares."""

    try:
        return compose(list, set, reduce)(list.__add__, factors)
    except TypeError:
        raise ValueError("Factors of input '%s' are not computable." % str(n))
    def test_random_compose(self):
        """`general.compose`: Randomized Validator.

        Tests the behavior of `compose` by feeding it randomly generated arguments.

        Raises:
            AssertionError: If `compose` needs debugging.

        """
        def special_sum(*args, **kwargs):
            return sum(list(args) + kwargs.values())

        negate = lambda x: -1.0 * x
        """callable: Negates nuemeric input."""

        functions = str, negate, _sqrt, abs, special_sum
        """tuple of callable: Functions that will get composed during each
        random iteration of the test."""

        # Compose with no arguments should return `None`.
        self.assertIsNone(compose())

        for i in range(self.n_tests):
            args = _np.random.uniform(0.0, 100.0, size=self.n)
            kwargs = { str(k): k for k in _np.random.uniform(0.0, 100.0, size=self.n) }

            target = str(negate(_sqrt(abs(sum(args + kwargs.values())))))
            """str: Expected output from composed function."""

            composed_fn = compose(*functions)
            """callable: Test input."""

            # Composition should be a function.
            self.assertEqual(type(composed_fn), type(compose))

            result = composed_fn(*args, **kwargs)
            """str: Composition output."""

            # Programatic composition result should match manual composition
            # output.
            self.assertEqual(result, target)
Beispiel #4
0
    def test_random_append_bottom(self):
        """`linalg.append_bottom`: Randomized Validator.

        Tests the behavior of `append_bottom` by feeding it randomly generated
        arguments.

        Raises:
            AssertionError: If `append_bottom` needs debugging.

        """
        for i in range(self.n_tests):
            X = random_matrix(self.data_shape)
            """np.matrix: Random-valued feature set."""
            v = random_matrix((1, self.data_shape[1]))
            """np.matrix: Random-valued row vector."""

            result = append_bottom(X, v)
            """np.matrix: Test input."""

            # Result should be a matrix.
            self.assertIsInstance(result, _np.matrix)

            to_norm = lambda (A, axis): _np.linalg.norm(A, axis=axis)
            """callable: Takes in a matrix returns the norm along the specified
            axis."""

            norm_normalizer = lambda n: [n] if type(n) == _np.float64 else n
            """callable: Make sure that all norms are lists. In particular,
            treats the row vector norm as a single row of a regular matrix."""

            norms = map(to_norm, [(X, 1), (v, None), (result, 1)])
            """list: Contains the row norms of both the input and the
            augmented result."""

            # Change the sign of the augmented matrix's norm to compute norm
            # deltas and infer errors from there.
            norms[2] *= -1.0

            delta = compose(abs, sum, map)(sum, map(norm_normalizer, norms))
            """float: Absolute difference between row norms of input and those
            of the augmented matrix."""

            # The row norms of input should match those of the augmented matrix.
            self.assertLessEqual(delta, self.zero_cutoff)

            # The vector norm of `v` should match that of the bottommost row
            # vector in the augmented matrix.
            self.assertAlmostEqual(_np.linalg.norm(v),
                                   _np.linalg.norm(result[-1, :]))
def _append_helper(X, v, position):
    """Matrix Concatenator.

    Appends the given row/column vector(s) at the specified position of a
    matrix.

    Args:
        X (np.matrix): Feature set to be augmented.
        v (np.matrix): Vector(s) (as a matrix).
        position (str): 'bottom', 'left', 'right', or 'top'.

    Returns:
        np.matrix: The augmented feature set.

    Raises:
        IncompatibleDataSetsError: If the feature set's and vector's dimensions
            do not match.
        InvalidFeatureSetError: If the given feature set or vector are invalid.

    """
    map(validate_feature_set, [X, v])

    switcher=dict(bottom=((X, v), 0), left=((v, X), 1), right=((X, v), 1),
                  top=((v, X), 0))
    """:obj:`((np.matrix, np.matrix), int)`: Helper to determine how to
    concatenate the vector and the matrix."""

    args = switcher.get(position)
    """((np.matrix, np.matrix), int): The explicit matrix pair to determine the
    order of concatenation and values 0 or 1 to determine its direction. See
    `np.concatenate`."""

    if not args:
        raise ValueError("Invalid append position.")

    index = int(position == 'bottom' or position == 'top')
    """int: Index into both the matrix's and the vector's `shape` attributes.
    Determines which dimensions should be aligned."""

    if X.shape[index] != v.shape[index]:
        raise _IncompatibleDataSetsError(X, v, "concatenation")

    return compose(_np.matrix, _np.concatenate)(*args)
    def test_invalid_args_compose(self):
        """`general.compose`: Argument Validator.

        Tests the behavior of `compose` with invalid argument counts and values.

        Raises:
            Exception: If at least one `Exception` raised is not of the expected
                kind.

        """
        with self.assertRaises(TypeError):
            # No arguments.
            compose()("arg")

        with self.assertRaises(TypeError):
            # Non-function arguments.
            compose(map, reduce, filter, 123)("arg")

        with self.assertRaises(TypeError):
            # With **kwargs.
            compose(map, reduce, key1="key1", key2="key2")("arg")
def shuffle_batches(batches):
    """Batch Shuffler.

    Re-orders the datapoints in the given batch set into a new set of batches.

    Args:
        batches: Batch set to re-order.

    Returns:
        list of np.matrix: Shuffled batches.

    Raises:
        InvalidFeatureSetError: If not all the batches are compatible and valid.
        TypeError: If `batches` is not an iterable of np.matrix instances.

    """
    if hasattr(batches, 'shape'):
        raise TypeError("Expected 'list' of 'np.matrix', "
                        "saw 'np.matrix' instead.")

    extract_length = lambda b: b.shape[0]
    """callable: Returns the number of rows in the given matrix."""

    try:
        lengths = map(extract_length, batches)
        """list of (int, int): Matrix dimensions of all batches."""
    except AttributeError:
        raise TypeError("Expected iterable of np.matrix instances.")

    d_hat = batches[0].shape[1]
    """int: Number of features `d` plus 1."""

    datapoints = []
    """list of np.matrix: List representation of all data points."""
    shuffled_batches = []
    """list of np.matrix: Newly re-ordered batches."""

    for b in batches:
        for i, datapoint in enumerate(b):
            datapoints.append(datapoint)

    shuffle(datapoints)

    if d_hat < 2:
        reason = ("No features found in given dataset of shape '(%d, %d)'." %
                  batches[0].shape)
        raise InvalidFeatureSetError(batches[0], reason=reason)

    while len(lengths):
        length = lengths.pop()
        """(int, int): Current batch's matrix dimensions."""
        batch = compose(np.matrix, np.zeros)((length, d_hat))
        """int: Current batch."""

        for k in range(length):
            datapoint = datapoints.pop()

            try:
                batch[k, :] = datapoint[0, :]
            except ValueError as e:
                reason = ("No features found in given dataset of shape "
                          "'(%d, %d)'." % (batch.shape[0], datapoint.shape[1]))
                raise InvalidFeatureSetError(batch, reason=reason)

        shuffled_batches.append(batch)

    return shuffled_batches
def reduce_dimensions(X, Y, min=DEFAULT_MIN_FEATURE_CORRELATION, names=None):
    """Data Point Dimensionality Reducer.

    Args:
        X (np.matrix): Feature set. Shape: n x d.
        Y (np.matrix): Observation set. Shape: n x 1.
        min (float, optional): Determines the minimum correlation value for a
            feature to be considered relevant. Defaults to
            `DEFAULT_MIN_FEATURE_CORRELATION`.
        names (list of str): Feature names. Defaults to `None`.

    Returns:
        Reduced feature set `np.matrix` if no feature names are provided, a
            tuple with the reduced feature set and feature names otherwise.

    Raises:
        ValueError: If no features have a correlation to `Y` greater than or
            equal to `min`.

    Todo:
        Get rid of `pandas`.

    """
    validate_datasets(X, Y)

    if type(min) != int and type(min) != float:
        raise TypeError(
            "Expected 'float' or 'int' for `f`, saw '%s' instead." %
            type(min).__name__)

    if min < 0.0 or min > 1.0:
        raise ValueError("Minimum correlation has to be a float in the range "
                         "(0.0, 1.0), not '%f'." % min)

    filter_irrelevant = lambda (i, c): i != '_Y' and abs(c) >= min
    """callable: Returns `True` if the given index belongs to a feature with a
    large enough correlation, `False` otherwise."""

    df = pd.DataFrame(X)
    """DataFrame: Pandas feature snapshot."""

    # Set observations to special key '_Y'.
    df["_Y"] = np.asarray(Y)

    try:
        indices = list(
            zip(*filter(filter_irrelevant,
                        df.corr()["_Y"].iteritems()))[0])
        """list of int: Indices of relevant features."""

        X_hat = compose(np.matrix, np.zeros)((X.shape[0], len(indices)))
        """np.matrix: Reduced feature set."""

        k = 0
        """int: Feature number into reduce matrix."""

        for i in range(X.shape[1]):
            if len(indices) and i == indices[0]:
                X_hat[:, k] = X[:, indices.pop(0)]
                k += 1
            elif names is not None:
                names.pop(k)

        return X_hat
    except IndexError as e:
        raise ValueError("No features satisfy the given minimum correlation.")
def batches(X, Y, k):
    """K-Batch Creator.

    Partitions the given sets of features and observations into batches of at
    least `k` elements. The number of data points does not differ by more than
    one data point from one batch to the other.

    Args:
        X (np.matrix): Feature set. Shape: n x d.
        Y (np.matrix): Observation set. Shape: n x 1.
        k (int): Minimum number of data points per batch.

    Raises:
        AttributeError: If `X` is not a valid matrix.
        ValueError: If `X` or `Y` are empty matrices or if `k` is not a natural
            number.
        TypeError: If `X` or `Y` are not valid matrices or if `k` is not an int.

    Returns:
        list of np.matrix: Partitions of at least `k` elements per batch.

    """
    validate_datasets(X, Y)

    if type(k) != int:
        raise TypeError("Expected an 'int' for `k`, saw '%s' instead." %
                        type(k).__name__)

    if k <= 0:
        raise ValueError("Value of `k` not greater than 0: %d." % k)

    n, d = X.shape
    """(int, int): Number of data points and number of features."""

    indices = [i for i in range(n)]
    """list of int: Shuffled indiced of data points."""

    shuffle(indices)

    batches = []
    """list of np.matrix: All batches."""
    n_training_points = compose(int,
                                np.floor)(float(n) / compose(float, min)(n, k))
    """int: Number of data points destined for training."""
    i = None
    """int: Current data point index."""

    for q in range(n_training_points):
        tot = compose(appendargs(min, k), len)(indices)
        """int: Number of data points to add to current batch."""
        batch = np.zeros((tot, d + 1))
        """int: Current batch."""

        for j in range(tot):
            i = indices.pop()
            batch[j, :] = np.concatenate((X[i, :], Y[i, :]), 1)

        compose(batches.append, np.matrix)(batch)

    if len(batches) == 1:
        n_left = len(indices)

        if n_left == 0:
            raise ValueError("Unable to partition %d data points into length "
                             "%d batches." % (n, k))

        batch = np.zeros((n_left, d + 1))
        """int: Current batch."""

        batch = np.concatenate(
            [np.concatenate((X[i, :], Y[i, :]), 1) for i in indices], 0)
        compose(batches.append, np.matrix)(batch)
    else:
        j = 0
        """int: Current batch offset."""

        while len(indices) > 0:
            i = indices.pop()

            datapoint = compose(np.matrix, np.concatenate)((X[i, :], Y[i, :]),
                                                           1)
            """np.matrix: Remaining data point."""
            m = j % len(batches)
            """int: Current batch index."""

            batches[m] = compose(np.matrix,
                                 np.concatenate)((batches[m], datapoint))
            j += 1

    return batches