def test_get_error():
    model = helpers.SetOutputModel([1])
    assert validation.get_error(
        model,
        numpy.array([[1]]),
        numpy.array([[0]]),
        error_func=MeanSquaredError()) == 1.0
    assert validation.get_error(
        model,
        numpy.array([[1]]),
        numpy.array([[1]]),
        error_func=MeanSquaredError()) == 0.0
    assert validation.get_error(
        model,
        numpy.array([[1]]),
        numpy.array([[0.5]]),
        error_func=MeanSquaredError()) == 0.25
    assert validation.get_error(
        model,
        numpy.array([[1], [1]]),
        numpy.array([[1], [0]]),
        error_func=MeanSquaredError()) == 0.5
    assert validation.get_error(
        model,
        numpy.array([[1], [1]]),
        numpy.array([[0.5], [0.5]]),
        error_func=MeanSquaredError()) == 0.25
Beispiel #2
0
def test_dropout_mlp():
    # Run for a couple of iterations
    # assert that new error is less than original
    model = mlp.DropoutMLP((2, 8, 2))
    dataset = datasets.get_and()

    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=20)
    assert validation.get_error(model, *dataset) < error
Beispiel #3
0
def test_rbf():
    # Run for a couple of iterations
    # assert that new error is less than original
    model = rbf.RBF(2, 4, 2, scale_by_similarity=True)
    dataset = datasets.get_xor()

    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=10)
    assert validation.get_error(model, *dataset) < error
Beispiel #4
0
def test_mlp_classifier():
    # Run for a couple of iterations
    # assert that new error is less than original
    model = mlp.MLP(
        (2, 2, 2), transfers=SoftmaxTransfer(), error_func=CrossEntropyError())
    dataset = datasets.get_xor()

    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=20)
    assert validation.get_error(model, *dataset) < error
Beispiel #5
0
def test_LinearRegressionModel():
    # Run for a couple of iterations
    # assert that new error is less than original
    model = LinearRegressionModel(2, 2)
    # NOTE: We use and instead of xor, because xor is non-linear
    dataset = datasets.get_and()

    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=10)
    assert validation.get_error(model, *dataset) < error
Beispiel #6
0
def test_neuralfield_convergence():
    # Run until convergence
    # assert that network can converge
    dataset = datasets.get_xor()

    model = ill.make_neuralfield(2, grid_spacing=XOR_SPACING)

    error = validation.get_error(model, *dataset)
    model.train(*dataset, error_break=0.002)
    assert validation.get_error(
        model, *dataset) < 0.02, "Training should reach low error"
Beispiel #7
0
def test_ill_mlp_exact_target():
    # Run for a couple of iterations
    # assert that new error is less than original
    dataset = datasets.get_xor()

    model = ill.ILL(MLP((2, 2, 2)), grid_spacing=XOR_SPACING, learn_exact=True)

    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=10)
    assert validation.get_error(model, *
                                dataset) < error, "Training decreases error"
Beispiel #8
0
def test_ill_mlp_convergence_exact_target():
    # Run until convergence
    # assert that network can converge
    dataset = datasets.get_xor()

    model = ill.ILL(MLP((2, 2, 2)), grid_spacing=XOR_SPACING, learn_exact=True)

    error = validation.get_error(model, *dataset)
    model.train(*dataset, retries=5, error_break=0.002)
    assert validation.get_error(
        model, *dataset) < 0.02, "Training should reach low error"
def test_get_error_unusual_targets_shape():
    from learning import error

    model = multioutputs.MultiOutputs([
        helpers.SetOutputModel([1.0]),
        helpers.SetOutputModel([1.0, 1.0, 1.0])
    ])
    assert validation.get_error(model, [[]], [[[1.0], [1.0, 1.0, 1.0]]],
                                error_func=MeanSquaredError()) == 0.0
    assert validation.get_error(model, [[]], [[[1.0], [0.0, 0.0, 0.0]]],
                                error_func=MeanSquaredError()) == 0.5
Beispiel #10
0
def test_neuralfield():
    # Run for a couple of iterations
    # assert that new error is less than original
    dataset = datasets.get_xor()

    model = ill.make_neuralfield(2, grid_spacing=XOR_SPACING)

    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=10)
    assert validation.get_error(model, *
                                dataset) < error, "Training decreases error"
Beispiel #11
0
def test_rbf_convergence():
    # Run until convergence
    # assert that network can converge
    model = rbf.RBF(2, 4, 2, scale_by_similarity=True)
    dataset = datasets.get_xor()

    model.train(*dataset, retries=5, error_break=0.002)
    assert validation.get_error(model, *dataset) <= 0.02
Beispiel #12
0
def test_pbnn_convergence():
    # Run until convergence
    # assert that network can converge
    model = PBNN()
    dataset = datasets.get_xor()

    model.train(*dataset)
    assert validation.get_error(model, *dataset) <= 0.02
Beispiel #13
0
def test_mlp_convergence():
    # Run until convergence
    # assert that network can converge
    model = mlp.MLP((2, 4, 2))
    dataset = datasets.get_xor()

    model.train(*dataset, retries=5, error_break=0.002)
    assert validation.get_error(model, *dataset) <= 0.02
Beispiel #14
0
def test_mlp_classifier_convergence():
    # Run until convergence
    # assert that network can converge
    model = mlp.MLP(
        (2, 3, 2), transfers=SoftmaxTransfer(), error_func=CrossEntropyError())
    dataset = datasets.get_and()

    model.train(*dataset, retries=5, error_break=0.002)
    assert validation.get_error(model, *dataset) <= 0.02
Beispiel #15
0
def test_LinearRegressionModel_convergence():
    # Run until convergence
    # assert that model can converge
    model = LinearRegressionModel(2, 2)
    # NOTE: We use and instead of xor, because xor is non-linear
    dataset = datasets.get_and()

    model.train(*dataset)
    # NOTE: This linear model cannot achieve 0 MSE
    assert validation.get_error(model, *dataset) <= 0.1
Beispiel #16
0
def test_ill_mlp_dim_reduction_tuple(monkeypatch):
    REDUCED_DIMENSIONS = 1

    dataset = datasets.get_xor()

    model = ill.ILL(MLP((2, 2, 2)),
                    grid_spacing=XOR_SPACING,
                    dim_reduction=(2, REDUCED_DIMENSIONS))

    # Points should have reduced dimensions
    points = _get_neighborhood_points(model, dataset, monkeypatch)
    for point in points:
        assert len(point) == REDUCED_DIMENSIONS

    # Should be able to train
    error = validation.get_error(model, *dataset)
    model.train(*dataset, iterations=10)
    assert validation.get_error(model, *
                                dataset) < error, "Training decreases error"
Beispiel #17
0
def test_dropout_mlp_convergence():
    # Run until convergence
    # assert that network can converge
    # Since XOR does not really need dropout, we use high probabilities
    model = mlp.DropoutMLP(
        (2, 8, 2), input_active_probability=1.0, hidden_active_probability=0.9)
    dataset = datasets.get_and()  # Easier and dataset for lienar output

    # Error break lower than cutoff, since dropout may have different error
    # after training
    model.train(*dataset, retries=5, error_break=0.002, error_improve_iters=50)

    # Dropout sacrifices training accuracy for better generalization
    # so we don't worry as much about convergence
    assert validation.get_error(model, *dataset) <= 0.1
Beispiel #18
0
def test_Model_stochastic_train():
    """Train with stochastic gradient descent."""
    from learning import transfer, error, validation, MLP

    dataset = datasets.get_iris()

    model = MLP((len(dataset[0][0]), 2, len(dataset[1][0])),
                transfers=transfer.SoftmaxTransfer(),
                error_func=error.CrossEntropyError())

    # Model should be able to converge with mini-batches
    model.stochastic_train(
        *dataset,
        error_break=0.02,
        pattern_selection_func=lambda X, Y: base.select_sample(X, Y, size=30),
        train_kwargs={
            'iterations': 100,
            'error_break': 0.02
        })

    assert validation.get_error(model, *dataset) <= 0.03
Beispiel #19
0
    def stochastic_train(self,
                         input_matrix,
                         target_matrix,
                         max_iterations=100,
                         error_break=0.002,
                         pattern_selection_func=select_sample,
                         train_kwargs={'iterations': 100}):
        """Train model on multiple subsets of the given dataset.

        Use for stochastic gradient descent.

        Args:
            input_matrix: A matrix with samples in rows and attributes in columns.
            target_matrix: A matrix with samples in rows and target values in columns.
            max_iterations: Maximum number of times that Model.train is called.
            error_break: Training will end once error is less than this, on entire dataset.
            pattern_select_func: Function that takes (input_matrix, target_matrix),
                and returns a selection of rows. Use partial function to embed arguments.
        """
        for iteration in range(1, max_iterations + 1):
            train_error, converged = self.train(
                *pattern_selection_func(input_matrix, target_matrix),
                **train_kwargs)

            if train_error is not None:
                # Break early to prevent overtraining
                if (train_error <= error_break
                        # Perform a second test on whole dataset
                        # TODO: Use user provided error function
                        and validation.get_error(
                            self, input_matrix, target_matrix) <= error_break):
                    return True

        # Override iteration from inner loop, with iteration number from outer loop
        self.iteration = iteration

        return False
Beispiel #20
0
    def _train_attempt(self, input_matrix, target_matrix, iterations,
                       error_break, error_stagnant_distance,
                       error_stagnant_threshold, error_improve_iters,
                       pattern_select_func, post_pattern_callback):
        """Attempt to train this model.

        Return True if model converged (error <= error_break)
        """
        # Initialize error history with errors that are
        # unlikely to be close in reality
        error_history = [1e10] * error_stagnant_distance

        # Initialize best error for error_decrease_iters
        best_error = float('inf')
        iters_since_improvement = 0

        for self.iteration in range(1, iterations + 1):
            selected_patterns = pattern_select_func(input_matrix,
                                                    target_matrix)

            # Learn each selected pattern
            error = self.train_step(*selected_patterns)

            # Logging and breaking
            if self.logging:
                print "Iteration {}, Error: {}".format(self.iteration, error)

            if error is not None:
                # Break early to prevent overtraining
                if (error <= error_break
                        # Perform a second test on whole dataset
                        # incase model is training on mini-batches
                        # TODO: Should use user provided error function?
                        and validation.get_error(
                            self, input_matrix, target_matrix) <= error_break):
                    return True

                # Skip the rest if we're already out of iterations (optimization)
                # Useful for situations where we only run 1 iteration
                if self.iteration == iterations:
                    return False

                # Break if no progress is made
                if _all_close(error_history, error, error_stagnant_threshold):
                    # Break if not enough difference between all resent errors
                    # and current error
                    return False
                error_history.append(error)
                error_history.pop(0)

                # Break if best error has not improved within n iterations
                # Keep track of best error, and iterations since best error has improved
                if error < best_error:
                    best_error = error
                    iters_since_improvement = 0
                else:
                    iters_since_improvement += 1
                # If it has been too many iterations since improvement, break
                if iters_since_improvement >= error_improve_iters:
                    return False

        return False
Beispiel #21
0
    def train(self,
              input_matrix,
              target_matrix,
              iterations=1000,
              retries=0,
              error_break=0.002,
              error_stagnant_distance=5,
              error_stagnant_threshold=0.00001,
              error_improve_iters=20,
              pattern_select_func=select_iterative,
              post_pattern_callback=None):
        """Train model to converge on a dataset.

        Note: Override this method for batch learning models.

        Args:
            input_matrix: A matrix with samples in rows and attributes in columns.
            target_matrix: A matrix with samples in rows and target values in columns.
            iterations: Max iterations to train model.
            retries: Number of times to reset model and retries if it does not converge.
                Convergence is defined as reaching error_break.
            error_break: Training will end once error is less than this.
            error_stagnant_distance: Number of iterations during which error must change by at least
                error_stagnant_threshold, or training ends.
            error_stagnant_threshold: Threshold by which error must change within
                error_stagnant_distance iterations, or training ends.
            error_improve_iters: Best error must decrease within this many iterations,
                or training ends.
            pattern_select_func: Function that takes (input_matrix, target_matrix),
                and returns a selection of rows. Use partial function to embed arguments.
        """
        # Make sure matrix parameters are np arrays
        self._reset_bookkeeping()
        self._post_pattern_callback = post_pattern_callback  # For calling in other method

        # Initialize variables for retries
        best_try = (float('inf'), None)  # (error, serialized_model)

        # Learn on each pattern for each iteration
        for attempt in range(retries + 1):
            success = self._train_attempt(
                input_matrix, target_matrix, iterations, error_break,
                error_stagnant_distance, error_stagnant_threshold,
                error_improve_iters, pattern_select_func,
                post_pattern_callback)

            # Skip all the tracking and whatnot if there are no retries (optimization)
            if retries == 0:
                return

            # End if model converged
            # No need to use best attempt (since this is the first to reach best error)
            if success:
                return

            # TODO: Should use user provided error function
            attempt_error = validation.get_error(self, input_matrix,
                                                 target_matrix)

            # End when out of retries, use best attempt so far
            if attempt >= retries:
                if attempt_error < best_try[0]:
                    # Last attempt was our best
                    return
                else:
                    # Use best attempt
                    best_model = self.unserialize(best_try[1])
                    self.__dict__ = best_model.__dict__
                    return

            # Keep track of best attempt
            if attempt_error < best_try[0]:
                best_try = (attempt_error, self.serialize())

            # Reset for next attempt
            self.reset()