def plot_validation_curve(model, x_train, y_train, h_param, h_range, k=10, log_scale=False, scorer=None): train_score, val_score = validation_curve(model, x_train, y_train, h_param, h_range, cv=k, scoring=scorer) validation_curve_data = {'train': train_score, 'test': val_score} for legend, scores in validation_curve_data.items(): if log_scale: plt.semilogx(h_range, np.abs(scores.mean(axis=1)), label=legend) else: plt.plot(h_range, np.abs(scores.mean(axis=1)), label=legend) plt.xlabel(h_param, labelpad=20) plt.ylabel('score', labelpad=20) plt.legend() plt.show()
def plot_learning_curve(model, x_train, y_train, train_sizes_ratio=np.linspace(0.1, 1.0, 10), k=10, scorer=None): """ :param model: :param x_train: :param y_train: :param train_sizes_ratio: :param k: :param scorer: :return: """ N, train_score, val_score = learning_curve(model, x_train, y_train, train_sizes=train_sizes_ratio, cv=k, scoring=scorer) learning_curve_data = {'train': train_score, 'test': val_score} for legend, scores in learning_curve_data.items(): plt.plot(N, np.abs(scores.mean(axis=1)), label=legend) plt.xlabel('train_sizes', labelpad=20) plt.legend() plt.show()
def train_gridsearch(data, model, param_grid, metric, k=10, p=3, v=True): # Model name model_label = model_name(model) # Get training & testing data x_train, y_train = data['train'] x_test, y_test = data['test'] # Define refit condition (first metric if evaluationg multiple metrics else False) refit_cond = metric[0] if type(metric) is list else True # Build grid search gridsearch = GridSearchCV(model, param_grid, cv=k, scoring=metric, refit=refit_cond) # Time the model training start_training = datetime.datetime.now() # Train model with grid search gridsearch.fit(x_train, y_train) end_training = datetime.datetime.now() # Compute training time training_time = end_training - start_training # Format training time training_time_str = format_run_time(training_time) # Trained_model trained_model = gridsearch.best_estimator_ # Get scores from cross validation cv_scores = {} if type(metric) is list: for scorer_label in metric: if scorer_label.startswith('neg'): formatted_label = "".join([ w[0] for w in scorer_label.replace('neg_', '').split('_') ]) formatted_score = round( np.abs(gridsearch.cv_results_[f'mean_test_{scorer_label}']) [0], p) cv_scores[formatted_label] = formatted_score else: cv_scores[scorer_label] = round( gridsearch.cv_results_[f'mean_test_{scorer_label}'][0], p) else: cv_scores[metric] = round( gridsearch.cv_results_[f'mean_test_score'][0], p) # Get scores from testing set testing_set_scores = get_model_scores(trained_model, x_test, y_test, list(cv_scores.keys()), p, v) # Display cross validation mean scores if v: print_score_results(cv_scores, set_type='train') # Build model dictionary which contains GridSearchCV & model instances (with model name) model_data = { 'gs': gridsearch, # GridSearchCV trained instance 'model': trained_model, # Model trained instance 'model_name': model_label } # Model name # Build additional evaluation data dictionary additional_evaluation_data = { 'time': training_time_str, # Training time 'n_features': x_train.shape[1], # Selected features 'learning_potential': None } # Learning potential # Build results dictionary (merge dictionnaries) results = dict(**model_data, **testing_set_scores, **additional_evaluation_data) return results
def plot_factorial_planes(self, n_plan=None, X_projected=None, labels=None, alpha=1, illustrative_var=None, illustrative_var_title=None, save_as_img=False, plot_size=(10, 8)): """ :param: axis_nb: the total number of axes to display (default is kaiser criterion divided by 2) """ X_projected = self.X_projected if X_projected is None else X_projected factorial_plan_nb = self.default_factorial_plan_nb if n_plan is None else n_plan axis_ranks = [(x, x + 1) for x in range(0, factorial_plan_nb, 2)] for d1, d2 in axis_ranks: if d2 < self.n_comp: fig = plt.figure(figsize=plot_size) # Display data points if illustrative_var is None: plt.scatter(X_projected[:, d1], X_projected[:, d2], alpha=alpha) else: illustrative_var = np.array(illustrative_var) for value in np.unique(illustrative_var): selected = np.where(illustrative_var == value) plt.scatter(X_projected[selected, d1], X_projected[selected, d2], alpha=alpha, label=value) plt.legend(title=illustrative_var_title if illustrative_var_title is not None else None) # Display data points labels if labels is not None: for i, (x, y) in enumerate(X_projected[:, [d1, d2]]): plt.text(x, y, labels[i], fontsize='12', ha='center', va='bottom') # Fix factorial plan limits boundary = np.max(np.abs(X_projected[:, [d1, d2]])) * 1.1 plt.xlim([-boundary, boundary]) plt.ylim([-boundary, boundary]) # Display horizontal & vertical lines plt.plot([-100, 100], [0, 0], color='grey', ls='--') plt.plot([0, 0], [-100, 100], color='grey', ls='--') # Axes labels with % explained variance plt.xlabel('F{} ({}%)'.format(d1 + 1, round(100 * self.evr[d1], 1)), labelpad=20) plt.ylabel('F{} ({}%)'.format(d2 + 1, round(100 * self.evr[d2], 1)), labelpad=20) plt.title("Projection des individus (sur F{} et F{})".format( d1 + 1, d2 + 1), pad=20) if save_as_img: plt.tight_layout() plt.savefig( 'factorial_plan_{}.jpg'.format(1 if d1 == 0 else d1)) plt.show(block=False)