コード例 #1
0
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression, debug):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.init_estimate = None
        self.regression = regression
        self.debug = debug
        self.multipliers = []
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=min_impurity,
                                  max_depth=self.max_depth)
            self.trees.append(tree)
コード例 #2
0
 def __init__(self, n_iterations, batch_size, optimizer, val_error=False):
     self.n_iterations = n_iterations
     self.optimizer = optimizer
     self.val_error = val_error
     self.layers = []
     self.errors = {"training": [], "validation": []}
     self.cross_ent = CrossEntropy()
     self.batch_size = batch_size
コード例 #3
0
class GradientBoosting(object):
    """Super class of GradientBoostingClassifier and GradientBoostinRegressor. 
    Uses a collection of regression trees that trains on predicting the gradient
    of the loss function. 

    Parameters:
    -----------
    n_estimators: int
        The number of classification trees that are used.
    learning_rate: float
        The step length that will be taken when following the negative gradient during
        training.
    min_samples_split: int
        The minimum number of samples needed to make a split when building a tree.
    min_impurity: float
        The minimum impurity required to split the tree further. 
    max_depth: int
        The maximum depth of a tree.
    regression: boolean
        True or false depending on if we're doing regression or classification.
    debug: boolean
        True or false depending on if we wish to display the training progress.
    """
    def __init__(self, n_estimators, learning_rate, min_samples_split,
                 min_impurity, max_depth, regression, debug):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.min_samples_split = min_samples_split
        self.min_impurity = min_impurity
        self.max_depth = max_depth
        self.init_estimate = None
        self.regression = regression
        self.debug = debug
        self.multipliers = []
        self.bar = progressbar.ProgressBar(widgets=bar_widgets)

        # Square loss for regression
        # Log loss for classification
        self.loss = SquareLoss()
        if not self.regression:
            self.loss = CrossEntropy()

        # Initialize regression trees
        self.trees = []
        for _ in range(n_estimators):
            tree = RegressionTree(min_samples_split=self.min_samples_split,
                                  min_impurity=min_impurity,
                                  max_depth=self.max_depth)
            self.trees.append(tree)

    def fit(self, X, y):
        y_pred = np.full(np.shape(y), np.mean(y, axis=0))

        for i in self.bar(range(self.n_estimators)):
            tree = self.trees[i]
            gradient = self.loss.gradient(y, y_pred)
            tree.fit(X, gradient)
            update = tree.predict(X)
            # Update y prediction
            y_pred -= np.multiply(self.learning_rate, update)

    def predict(self, X):
        y_pred = np.array([])
        # Make predictions
        for i, tree in enumerate(self.trees):
            update = tree.predict(X)
            update = np.multiply(self.learning_rate, update)
            # prediction = np.array(prediction).reshape(np.shape(y_pred))
            y_pred = -update if not y_pred.any() else y_pred - update

        if not self.regression:
            # Turn into probability distribution
            y_pred = np.exp(y_pred) / np.expand_dims(
                np.sum(np.exp(y_pred), axis=1), axis=1)
            # Set label to the value that maximizes probability
            y_pred = np.argmax(y_pred, axis=1)
        return y_pred
コード例 #4
0
class MultilayerPerceptron():
    """Multilayer Perceptron classifier.

    Parameters:
    -----------
    n_iterations: float
        The number of training iterations the algorithm will tune the weights for.
    optimizer: class
        The weight optimizer that will be used to tune the weights in order of minimizing
        the loss.
    val_error: boolean
        Whether to save some training data as validation data in order of evaluating how
        the model generalizes as training progresses.
    """
    def __init__(self, n_iterations, batch_size, optimizer, val_error=False):
        self.n_iterations = n_iterations
        self.optimizer = optimizer
        self.val_error = val_error
        self.layers = []
        self.errors = {"training": [], "validation": []}
        self.cross_ent = CrossEntropy()
        self.batch_size = batch_size

    def add(self, layer):
        layer.set_optimizer(self.optimizer)
        self.layers.append(layer)

    def fit(self, X, y):
        X_train = X
        y_train = y

        if self.val_error:
            # Split the data into training and validation sets
            X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.1)
            y_validate = categorical_to_binary(y_validate)

        # Convert the nominal y values to binary
        y_train = categorical_to_binary(y_train)

        n_samples, n_features = np.shape(X_train)
        n_batches = int(n_samples / self.batch_size)

        bar = progressbar.ProgressBar(widgets=bar_widgets)

        for i in bar(range(self.n_iterations)):
            X_, y_ = shuffle_data(X_train, y_train)

            batch_t_error = 0   # Mean batch training error
            batch_v_error = 0   # Mean batch validation error
            for idx in np.array_split(np.arange(n_samples), n_batches):
                X_batch, y_batch = X_[idx], y_[idx]

                # Calculate output
                y_pred = self._forward_pass(X_batch)

                # Calculate the cross entropy training loss
                loss = np.mean(self.cross_ent.loss(y_batch, y_pred))
                batch_t_error += loss

                loss_grad = self.cross_ent.gradient(y_batch, y_pred)

                # Update the NN weights
                self._backward_pass(loss_grad=loss_grad)

                if self.val_error:
                    # Calculate the validation error
                    y_val_pred = self._forward_pass(X_validate)
                    loss = np.mean(self.cross_ent.loss(y_validate, y_val_pred))
                    batch_v_error += loss

            batch_t_error /= n_batches
            batch_v_error /= n_batches
            self.errors["training"].append(batch_t_error)
            self.errors["validation"].append(batch_v_error)

    def _forward_pass(self, X):
        # Calculate the output of the NN. The output of layer l1 becomes the
        # input of the following layer l2
        layer_output = X
        for layer in self.layers:
            layer_output = layer.forward_pass(layer_output)

        return layer_output

    def _backward_pass(self, loss_grad):
        # Propogate the gradient 'backwards' and update the weights
        # in each layer
        acc_grad = loss_grad
        for layer in reversed(self.layers):
            acc_grad = layer.backward_pass(acc_grad)

    def plot_errors(self):
        if self.errors["training"]:
            n = len(self.errors["training"])
            if self.errors["validation"]:
                # Training and validation error plot
                training, = plt.plot(range(n), self.errors["training"], label="Training Error")
                validation, = plt.plot(range(n), self.errors["validation"], label="Validation Error")
                plt.legend(handles=[training, validation])
            else:
                training, = plt.plot(range(n), self.errors["training"], label="Training Error")
                plt.legend(handles=[training])
            plt.title("Error Plot")
            plt.ylabel('Error')
            plt.xlabel('Iterations')
            plt.show()

    # Use the trained model to predict labels of X
    def predict(self, X):
        output = self._forward_pass(X)
        # Predict as the indices of the highest valued outputs
        y_pred = np.argmax(output, axis=1)
        return y_pred