Beispiel #1
0
    def test(self,
             dataset,
             repetitions=1,
             preliminary_search=False,
             to_fix=[]):
        """
        This function implements the testing procedure for the monks' datasets.
        It permits two kind of search for the best hyperparameters. The first
        kind of search simply performs a search using one HyperGrid, and
        selecting the set of hyperparameters which returns the best result.
        The second kind of search performs a deeper search, by searching first
        the best value for some hyperparameters, fixing them, and searching
        again the values for the remaing ones.

        Parameters
        ----------
        dataset: int or list
            either a single index or a list of indexes, each one representing
            a dataset in self.monks

        repetitions: int
            cross validation's repetitions
            (Default value = 1)

        preliminary_search: bool
            whether or not to execute a preliminary search for the best value
            for some hyperparameters, fix them, and search again for the
            remaining hyperparameters
            (Default value = False)

        to_fix: list
            a list of hyperparameters that must be fixed
            (Default value = [])

        size: int
            the new hypergrid's size for the new search for the best
            hyperparameters in the preliminary_search function
            (Default value = 0)

        Returns
        -------
        """
        if type(dataset) == int:
            assert dataset >= 0 and dataset <= 2
            dataset = [dataset]
        else:
            assert len(dataset) > 0 and len(dataset) <= 3

        for ds in dataset:
            print 'TESTING MONK DATASET {}\n'.format(ds + 1)

            self.train_set = pd.\
                read_csv(self.monks[ds][0], names=['class'] +
                         ['x{}'.format(j) for j in range(17)]).values
            self.test_set = pd.\
                read_csv(self.monks[ds][1], names=['class'] +
                         ['x{}'.format(j) for j in range(17)]).values

            self.grid = val.HyperGrid(self.param_ranges,
                                      size=self.grid_size,
                                      seed=datetime.now())
            self.selection = val.ModelSelectionCV(self.grid,
                                                  repetitions=repetitions)

            # PRELIMINARY SEARCH FOR SOME OF THE PARAMETERS ###################

            assert len(to_fix) != 0
            self.preliminary_search(ds, to_fix, repetitions)

            # SEARCHING FOR THE OTHER PARAMETERS ##############################

            self.selection.search(
                self.train_set[:, 1:],
                self.train_set[:, 0].reshape(-1, 1),
                save_results=True,
                fname='../data/model_selection_results_monk_{}.json'.format(
                    ds + 1),
                par_name='REMAINING PARAMETERS')

            best_model = self.selection.\
                select_best_model(
                    self.train_set[:, 1:],
                    self.train_set[:, 0].reshape(-1, 1),
                    fname='../data/model_selection_results_monk_{}.json'.
                    format(ds + 1))

            y_pred = best_model.predict(self.test_set[:, 1:])
            y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1,
                                         y_pred).reshape(-1, 1)
            print '\n\n\n'
            bca = metrics.BinaryClassifierAssessment(
                self.test_set[:, 0].reshape(-1, 1), y_pred)

            self.save_best_result(ds, best_model, bca)
            self.plot_best_result(ds, best_model)

            self.param_ranges = self.param_ranges_backup.copy()
Beispiel #2
0
    def validate(self, X, y, neural_net, nfolds, plot_curves=False, **kwargs):
        """
        This function implements the core of the k-fold cross validation
        algorithm. For each fold, the neural network is trained using the
        training set created for that fold, and is tested on the respective
        test set. Finally, the error between the test's target and the
        predicted one is collected.

        Parameters
        ----------
        X. numpy.ndarray
            the design matrix

        y: numpy.ndarray
            the target column vector

        neural_net: nn.NeuralNetwork
            the neural network that has to be cross validated

        nfolds: int
            the number of folds to be applied in the algorithm

        plot_curves: bool
            whether or not to plot the learning curve for each one of the
            cross validation's iterations

        kwargs: dict
            a dictionary which contains the parameters for the neural
            network's initialization

        Returns
        -------
        """
        for i in tqdm(np.arange(nfolds),
                      desc='{}-FOLD CROSS VALIDATION PROGRESS'.format(nfolds)):

            train_set = np.vstack(
                [self.folds[j] for j in np.arange(len(self.folds)) if j != i])

            X_train, y_train = np.hsplit(train_set, [X.shape[1]])
            X_va, y_va = np.hsplit(self.folds[i], [X.shape[1]])

            neural_net.train(X_train, y_train, X_va, y_va)

            # assessment = self.model_assessment(X_va, y_va, model=neural_net)
            assessment = {'mse': neural_net.error_per_epochs_va[-1]}
            self.results.append(assessment)
            # self.results.append(loss)

            fold_results = {
                'id_fold': i + 1,
                'mse_tr': neural_net.error_per_epochs[-1],
                'mse_va': neural_net.error_per_epochs_va[-1],
                'mee_tr': neural_net.mee_per_epochs[-1],
                'mee_va': neural_net.mee_per_epochs_va[-1],
                'error_per_epochs': neural_net.error_per_epochs,
                'error_per_epochs_va': neural_net.error_per_epochs_va,
                'mee_per_epochs': neural_net.mee_per_epochs,
                'mee_per_epochs_va': neural_net.mee_per_epochs_va,

                # 'accuracy_per_epochs': neural_net.accuracy_per_epochs,
                # 'accuracy_per_epochs_va': neural_net.accuracy_per_epochs_va,
                'hyperparams': neural_net.get_params()
            }
            if neural_net.task == 'classifier':
                y_pred = neural_net.predict(X_va)
                y_pred = np.apply_along_axis(lambda x: 0 if x < .5 else 1, 1,
                                             y_pred).reshape(-1, 1)

                # y_pred = np.round(y_pred)
                bca = metrics.BinaryClassifierAssessment(y_pred,
                                                         y_va,
                                                         printing=False)
                fold_results['accuracy'] = bca.accuracy
                fold_results['f1_score'] = bca.f1_score

            if neural_net.task == 'regression':
                # add mean euclidean error
                pass

            self.fold_results.append(fold_results)
            neural_net.reset()

            if plot_curves:
                plt.plot(range(len(neural_net.error_per_epochs)),
                         neural_net.error_per_epochs,
                         label='FOLD {}, VALIDATION ERROR: {}'.format(
                             i, round(assessment['mse'], 2)))

        self.aggregated_results = self.aggregate_assessments()

        if plot_curves:
            plt.title('LEARNING CURVES FOR A {}-FOLD CROSS VALIDATION.\nMEAN '
                      'VALIDATION ERROR {}, VARIANCE {}.'.format(
                          nfolds,
                          round(self.aggregated_results['mse']['mean'], 2),
                          round(self.aggregated_results['mse']['std'], 2)),
                      fontsize=8)
            plt.ylabel('ERROR PER EPOCH')
            plt.xlabel('EPOCHS')
            plt.grid()
            plt.legend(fontsize=8)
            plt.savefig('../images/{}_fold_cross_val_lcs.pdf'.format(nfolds),
                        bbox_inches='tight')
            plt.close()

        return self.aggregated_results
Beispiel #3
0
    def optimize(self,
                 nn,
                 X,
                 y,
                 X_va,
                 y_va,
                 max_epochs,
                 error_goal,
                 plus=True,
                 strong=False,
                 **kwargs):
        """
        This function implements the optimization procedure following the
        Conjugate Gradient Descent, as described in the paper 'A new conjugate
        gradient algorithm for training neural networks based on a modified
        secant equation.'.

        Parameters
        ----------
        nn: nn.NeuralNetwork
            the neural network which has to be optimized

        X: numpy.ndarray
            the design matrix

        y: numpy.ndarray
            the target column vector

        max_epochs: int
            the maximum number of iterations for optimizing the network

        error_goal: float
            the stopping criteria based on a threshold for the maximum error
            allowed

        plus: bool
            whether or not to use the modified HS formula
            (Default value = True)

        strong: bool
            whether or not to use the strong Armijo-Wolfe condition
            (Default value = False)

        kwargs: dict
            additional parameters

        Returns
        -------
        """
        start_time = dt.datetime.now()
        k = 0
        g_prev = 0

        y_pred, y_pred_va = None, None
        bin_assess, bin_assess_va = None, None

        while True:
            start_iteration = dt.datetime.now()
            dataset = np.hstack((X, y))
            np.random.shuffle(dataset)
            X, y = np.hsplit(dataset, [X.shape[1]])

            # BACK-PROPAGATION ALGORITHM ######################################

            self.error = self.forward_propagation(nn, X, y)
            self.back_propagation(nn, X, y)
            self.error_per_epochs.append(self.error)

            y_pred = self.h[-1].reshape(-1, 1)

            g = self.flat_weights(self.delta_W, self.delta_b)

            if self.error < error_goal:  # mod
                self.statistics['epochs'] = (k + 1)
                self.statistics['time_train'] = \
                    (dt.datetime.now() - start_time).total_seconds() * 1000
                self.time_per_epochs.append(
                    (dt.datetime.now() - start_time).total_seconds() * 1000)
                self.error_per_epochs_va.append(self.error_per_epochs_va[-1])
                return 1

            elif np.all(g <= 1e-10):
                self.statistics['epochs'] = (k + 1)
                self.statistics['time_train'] = \
                    (dt.datetime.now() - start_time).total_seconds() * 1000
                self.time_per_epochs.append(
                    (dt.datetime.now() - start_time).total_seconds() * 1000)

                return None

            flatted_weights = self.flat_weights(nn.W, nn.b)
            flatted_copies = self.flat_weights(nn.W_copy, nn.b_copy)

            if k == 0:
                self.error_prev = self.error
                g_prev, d_prev, w_prev = 0, -g, 0
                if self.beta_m == 'cd':
                    g_prev = g

            # TODO refactoring chiamata del calcolo per beta
            if self.beta_m == 'fr' or self.beta_m == 'pr' or self.beta_m == 'cd':
                beta = self.get_beta(g, g_prev, self.beta_m, plus=plus)
            elif self.beta_m == 'hs' or self.beta_m == 'dy':
                beta = self.get_beta(g,
                                     g_prev,
                                     self.beta_m,
                                     plus=plus,
                                     d_prev=d_prev)
            elif self.beta_m == 'dl':
                beta = self.get_beta(g,
                                     g_prev,
                                     self.beta_m,
                                     plus=plus,
                                     w=flatted_weights,
                                     w_prev=w_prev,
                                     d_prev=d_prev,
                                     t=self.t)
            else:
                beta = self.get_beta(g,
                                     g_prev,
                                     self.beta_m,
                                     plus=plus,
                                     d_prev=d_prev,
                                     error=self.error,
                                     error_prev=self.error_prev,
                                     w=flatted_weights,
                                     w_prev=w_prev,
                                     rho=self.rho)
            d = self.get_direction(k, g, beta, d_prev=d_prev, method=self.d_m)

            eta = self.line_search(nn, X, y, flatted_weights, d,
                                   np.asscalar(g.T.dot(d)), self.error)

            # WEIGHTS' UPDATE #################################################

            new_W = flatted_copies + (eta * d)
            nn.W, nn.b = self.unflat_weights(new_W, nn.n_layers, nn.topology)
            nn.update_copies()

            g_prev, d_prev, w_prev = g, d, flatted_copies
            self.error_prev = self.error

            # IN LOCO VALIDATION ##############################################

            if X_va is not None:
                error_va = self.forward_propagation(nn, X_va, y_va)
                self.error_per_epochs_va.append(error_va)
                y_pred_va = self.h[-1].reshape(-1, 1)

            # ACCURACY ESTIMATION #############################################

            if nn.task == 'classifier':
                y_pred_bin = np.apply_along_axis(lambda x: 0 if x < .5 else 1,
                                                 1, y_pred).reshape(-1, 1)

                y_pred_bin_va = np.apply_along_axis(
                    lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1)

                bin_assess = metrics.BinaryClassifierAssessment(y,
                                                                y_pred_bin,
                                                                printing=False)
                bin_assess_va = metrics.BinaryClassifierAssessment(
                    y_va, y_pred_bin_va, printing=False)

                self.accuracy_per_epochs.append(bin_assess.accuracy)
                self.accuracy_per_epochs_va.append(bin_assess_va.accuracy)
                self.f1_score_per_epochs.append(bin_assess.f1_score)
                self.f1_score_per_epochs_va.append(bin_assess_va.f1_score)

                if (bin_assess_va.accuracy > self.max_accuracy):
                    self.max_accuracy = bin_assess_va.accuracy
                    self.statistics['acc_epoch'] = k

            norm_gradient = np.linalg.norm(self.g)
            self.gradient_norm_per_epochs.append(norm_gradient)

            if (k > 0 and (norm_gradient <= self.convergence_goal)) or\
                    (max_epochs is not None and k == max_epochs):
                self.statistics['epochs'] = (k + 1)
                self.statistics['ls'] = self.ls_it / (k + 1)
                self.statistics['time_train'] = \
                    (dt.datetime.now() - start_time).total_seconds() * 1000
                self.time_per_epochs.append(
                    (dt.datetime.now() - start_time).total_seconds() * 1000)
                return 1

            self.statistics['epochs'] = (k + 1)
            self.statistics['ls'] = self.ls_it / (k + 1)
            self.time_per_epochs.append(
                (dt.datetime.now() - start_time).total_seconds() * 1000)
            self.statistics['time_train'] = \
                (dt.datetime.now() - start_time).total_seconds() * 1000

            k += 1

        return 0
Beispiel #4
0
    def optimize(self, nn, X, y, X_va, y_va, epochs=None):
        """
        This functions implements the optimization routine.

        Parameters
        ----------
        nn: nn.NeuralNetwork:
            a reference to the network

        X : numpy.ndarray
            the design matrix

        y : numpy.ndarray
            the target column vector

        X_va: numpy.ndarray
            the design matrix used for the validation

        y_va: numpy.ndarray
            the target column vector used for the validation

        epochs: int
            the optimization routine's maximum number of epochs
            (Default value = None)
        """

        bin_assess, bin_assess_va = None, None
        start_time = dt.datetime.now()
        e = 0
        while True:
            start_iteration = dt.datetime.now()
            error_per_batch = []
            y_pred, y_pred_va = None, None

            dataset = np.hstack((X, y))
            np.random.shuffle(dataset)
            X, y = np.hsplit(dataset, [X.shape[1]])

            for b_start in np.arange(0, X.shape[0], X.shape[0]):
                # BACK-PROPAGATION ALGORITHM ##################################

                x_batch = X[b_start:b_start + X.shape[0], :]
                y_batch = y[b_start:b_start + X.shape[0], :]

                # MOMENTUM CHECK ##############################################

                if self.momentum['type'] == 'nesterov':
                    for layer in range(nn.n_layers):
                        nn.W[layer] += self.momentum['alpha'] * \
                                       self.velocity_W[layer]

                error = self.forward_propagation(nn, x_batch, y_batch)
                self.error_per_batch.append(error)
                error_per_batch.append(error)
                y_pred = self.h[-1].reshape(-1, 1)

                if error < self.convergence_goal or e >= 10000:
                    self.statistics['epochs'] = e
                    self.statistics['time_train'] = \
                        (dt.datetime.now() - start_time).total_seconds() * 1000
                    return 1

                self.back_propagation(nn, x_batch, y_batch)

                # WEIGHTS' UPDATE #############################################

                for layer in range(nn.n_layers):
                    weight_decay = reg.regularization(nn.W[layer],
                                                      self.reg_lambda,
                                                      self.reg_method)

                    self.velocity_b[layer] = (self.momentum['alpha'] *
                                              self.velocity_b[layer]) \
                        - (self.eta / x_batch.shape[0]) * \
                        self.delta_b[layer]
                    nn.b[layer] += self.velocity_b[layer]

                    self.velocity_W[layer] = (self.momentum['alpha'] *
                                              self.velocity_W[layer]) \
                        - (self.eta / x_batch.shape[0]) * \
                        self.delta_W[layer]

                    nn.W[layer] += self.velocity_W[layer] - weight_decay

            self.error_per_epochs.append(np.sum(error_per_batch))

            # IN LOCO VALIDATION ##############################################

            if X_va is not None:
                error_va = self.forward_propagation(nn, X_va, y_va)
                self.error_per_epochs_va.append(error_va)
                y_pred_va = self.h[-1].reshape(-1, 1)

            # PERFORMANCE ESTIMATION ##########################################

            if nn.task == 'classifier':
                y_pred_bin = np.apply_along_axis(lambda x: 0 if x < .5 else 1,
                                                 1, y_pred).reshape(-1, 1)

                y_pred_bin_va = np.apply_along_axis(
                    lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1)

                bin_assess = metrics.BinaryClassifierAssessment(y,
                                                                y_pred_bin,
                                                                printing=False)
                bin_assess_va = metrics.BinaryClassifierAssessment(
                    y_va, y_pred_bin_va, printing=False)

                self.accuracy_per_epochs.append(bin_assess.accuracy)
                self.accuracy_per_epochs_va.append(bin_assess_va.accuracy)
                self.f1_score_per_epochs.append(bin_assess.f1_score)
                self.f1_score_per_epochs_va.append(bin_assess_va.f1_score)

                if (bin_assess_va.accuracy > self.max_accuracy):
                    self.max_accuracy = bin_assess_va.accuracy
                    self.statistics['acc_epoch'] = e

            # GRADIENT'S NORM STORING #########################################
            norm_gradient = np.linalg.norm(self.g)
            self.gradient_norm_per_epochs.append(norm_gradient)
            self.time_per_epochs.append(
                (dt.datetime.now() - start_time).total_seconds() * 1000)
            e += 1

            if (norm_gradient <= self.convergence_goal) or \
                    (epochs is not None and e == epochs):
                self.statistics['epochs'] = e
                self.statistics['time_train'] = \
                    (dt.datetime.now() - start_time).total_seconds() * 1000
                return 0

        self.statistics['epochs'] = e
        self.statistics['time_train'] = (dt.datetime.now() -
                                         start_time).total_seconds() * 1000
Beispiel #5
0
    def train(self, X, y, X_va=None, y_va=None):
        """
        This function implements the neural network's training routine.

        Parameters
        ----------
        X : numpy.ndarray
            the design matrix

        y : numpy.ndarray
            the target column vector

        X_va: numpy.ndarray
            the design matrix used for the validation
            (Default value = None)

        y_va: numpy.ndarray
            the target column vector used for the validation
            (Default value = None)

        Returns
        -------
        """
        velocity_W = [0 for i in range(self.n_layers)]
        velocity_b = [0 for i in range(self.n_layers)]

        self.error_per_epochs = []
        self.error_per_epochs_old = []
        self.error_per_batch = []
        self.mee_per_epochs = []
        if X_va is not None:
            self.error_per_epochs_va = []
            self.mee_per_epochs_va = []
        else:
            self.error_per_epochs_va = None
            self.mee_per_epochs_va = None

        if self.task == 'classifier':
            self.accuracy_per_epochs = []
            self.accuracy_per_epochs_va = []

        self.stop_GL = None
        self.stop_PQ = None
        stop_GL = False
        stop_PQ = False

        # for e in tqdm(range(self.epochs), desc='TRAINING'):
        for e in range(self.epochs):
            error_per_batch = []

            dataset = np.hstack((X, y))
            np.random.shuffle(dataset)
            X, y = np.hsplit(dataset, [X.shape[1]])

            for b_start in np.arange(0, X.shape[0], self.batch_size):
                # BACK-PROPAGATION ALGORITHM ##################################

                x_batch = X[b_start:b_start + self.batch_size, :]
                y_batch = y[b_start:b_start + self.batch_size, :]

                error = self.forward_propagation(x_batch, y_batch)
                self.error_per_batch.append(error)
                error_per_batch.append(error)

                self.back_propagation(x_batch, y_batch)

                # WEIGHTS' UPDATE #############################################

                for layer in range(self.n_layers):
                    weight_decay = reg.regularization(self.W[layer],
                                                      self.reg_lambda,
                                                      self.reg_method)

                    velocity_b[layer] = (self.alpha * velocity_b[layer]) \
                        - (self.eta / x_batch.shape[0]) * self.delta_b[layer]
                    self.b[layer] += velocity_b[layer]

                    velocity_W[layer] = (self.alpha * velocity_W[layer]) \
                        - (self.eta / x_batch.shape[0]) * self.delta_W[layer]

                    self.W[layer] += velocity_W[layer] - weight_decay

                ###############################################################

            # COMPUTING OVERALL MSE ###########################################

            self.error_per_epochs_old.append(
                np.sum(error_per_batch)/X.shape[0])

            y_pred = self.predict(X)
            self.error_per_epochs.append(metrics.mse(y, y_pred))
            self.mee_per_epochs.append(metrics.mee(y, y_pred))
            if X_va is not None:
                y_pred_va = self.predict(X_va)
                self.error_per_epochs_va.append(
                    metrics.mse(y_va, y_pred_va))
                self.mee_per_epochs_va.append(
                    metrics.mee(y_va, y_pred_va))

            if self.task == 'classifier':
                y_pred_bin = np.apply_along_axis(
                    lambda x: 0 if x < .5 else 1, 1, y_pred).reshape(-1, 1)

                y_pred_bin_va = np.apply_along_axis(
                    lambda x: 0 if x < .5 else 1, 1, y_pred_va).reshape(-1, 1)

                bin_assess = metrics.BinaryClassifierAssessment(
                    y, y_pred_bin, printing=False)
                bin_assess_va = metrics.BinaryClassifierAssessment(
                    y_va, y_pred_bin_va, printing=False)

                self.accuracy_per_epochs.append(bin_assess.accuracy)
                self.accuracy_per_epochs_va.append(bin_assess_va.accuracy)

            # CHECKING FOR EARLY STOPPING #####################################

            if self.early_stop is not None \
               and e > self.early_stop_min_epochs \
               and (e + 1) % 5 == 0:

                generalization_loss = 100 \
                    * ((self.error_per_epochs_va[e] /
                        min(self.error_per_epochs_va))
                       - 1)

                # GL method
                if generalization_loss > self.epsilon:
                    stop_GL = True

                # PQ method
                if self.early_stop != 'GL':  # PQ or 'testing'

                    min_e_per_strip = min(
                        self.error_per_epochs_va[e - 4:e + 1])
                    sum_per_strip = sum(self.error_per_epochs_va[e - 4:e + 1])
                    progress = 1000 * \
                               ((sum_per_strip / (5 * min_e_per_strip)) - 1)

                    progress_quotient = generalization_loss / progress

                    if progress_quotient > self.epsilon:
                        stop_PQ = True

                # stopping
                if stop_GL and self.stop_GL is None:
                    self.stop_GL = e
                if stop_PQ and self.stop_PQ is None:
                    self.stop_PQ = e

                if self.early_stop != 'testing' and (stop_GL or stop_PQ):
                    break
Beispiel #6
0
    nn.error_per_epochs_va[:epochs_plot],
    nn.get_params(),
    task='validation',
    accuracy_h_plot=True,
    accuracy_per_epochs=nn.accuracy_per_epochs[:epochs_plot],
    accuracy_per_epochs_va=nn.accuracy_per_epochs_va[:epochs_plot],
    fname=preliminary_name)

# u.plot_learning_curve(nn, fname='../images/monks_learning_curve.pdf')
# u.plot_learning_curve(nn, fname='../images/monks_{}_{}_{}.pdf'.format(dataset, 'stochastic', 'notearly', 'relu'))

y_pred = nn.predict(X_test)
y_pred = np.apply_along_axis(lambda x: 0
                             if x < .5 else 1, 1, y_pred).reshape(-1, 1)
# y_pred = np.round(y_pred)
bca = metrics.BinaryClassifierAssessment(y_pred, y_test, printing=True)

y_pred_test = np.round(nn.predict(X_test))
metrics.BinaryClassifierAssessment(y_test, y_pred_test)

###########################################################

nn.h[0].shape
nn.h[1]

nn.W[0]

np.round(nn.h[0], 2)

###########################################################
# EXPERIMENT GRID SEARCH