def _plot_train_val_score(model, title=None, figsize=(12,4)):
    """Plots training and validation score on single plot."""
    # Format plot title
    if title is None:
        title = model.history.params.get('name') + "\n" + \
            "Training and Validation Scores" +\
            '\n' + proper(model.metric)    
    # Extract training and validation score                    
    d = {'Epoch': model.history.epoch_log['epoch'],
            'Training': model.history.epoch_log['train_score'],
            'Validation': model.history.epoch_log.get('val_score')}
    df = pd.DataFrame(data=d)
    df = pd.melt(df, id_vars='Epoch', value_vars=['Training',
                                                'Validation'],
                var_name=['Dataset'], value_name='Score')  
    # Extract row with best score by dataset for scatterplot
    if RegressionMetricFactory()(model.metric).mode == 'max': 
        best_score = df.loc[df.groupby('Dataset').Score.idxmax()]
    else:
        best_score = df.loc[df.groupby('Dataset').Score.idxmin()]    
    # Initialize figure and axes with appropriate figsize and title
    fig, ax = _init_image(x='Epoch', y='Score', figsize=figsize,
                                    title=title)
    # Render score lineplot
    ax = sns.lineplot(x='Epoch', y='Score', hue='Dataset', data=df, 
                        legend='full', ax=ax)
    # Render scatterplot showing minimum score points
    ax = sns.scatterplot(x='Epoch', y='Score', hue='Dataset', 
                            data=best_score, legend=False, ax=ax)
    return fig, ax, title
    def score(self, X, y):
        """Computes a score for the current model, given inputs X and output y.

        The score uses the class associated the metric parameter from class
        instantiation.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Feature matrix for which predictions will be rendered.

        y : numpy array, shape (n_samples,)
            Target values             

        Returns
        -------
        float
            Returns the score for the designated metric.
        """
        self._validate_data(X, y)
        y_pred = self.predict(X)
        if self.metric:
            score = self.scorer(y=y, y_pred=y_pred)
        else:
            score = RegressionMetricFactory()(metric=self.DEFAULT_METRIC)(
                y=y, y_pred=y_pred)
        return score
Exemple #3
0
def model_higher_is_better(request):
    model = LinearRegression(metric=request.param, early_stop=True,
                            val_size=0.3, precision=0.1,
                            patience=2)
    model.cost_function = RegressionCostFactory()(cost='quadratic')
    model.scorer = RegressionMetricFactory()(metric=request.param)                            
    return model
def _plot_train_score(model, title=None, figsize=(12,4)):
    """Plots training score."""
    if title is None:
        title = model.history.params.get('name') + "\n" + \
            "Training Scores" +\
            '\n' + proper(model.metric)    
    # Extract training score                    
    d = {'Epoch': model.history.epoch_log['epoch'],
            'Score': model.history.epoch_log['train_score']}
    df = pd.DataFrame(data=d)
    # Extract row with best score for scatterplot    
    if RegressionMetricFactory()(model.metric).mode == 'max': 
        best_score = df.loc[df.Score.idxmax()]
    else:
        best_score = df.loc[df.Score.idxmin()]    
    best_score = pd.DataFrame({"Epoch": [best_score['Epoch']],
                             "Score": [best_score['Score']]})        
    # Initialize figure and axes with appropriate figsize and title
    fig, ax = _init_image(x='Epoch', y='Score', figsize=figsize,
                                    title=title)
    # Render score lineplot
    ax = sns.lineplot(x='Epoch', y='Score', data=df, ax=ax)
    # Render scatterplot showing minimum score points
    ax = sns.scatterplot(x='Epoch', y='Score', data=best_score, ax=ax)
    return fig, ax, title    
Exemple #5
0
def _plot_train_score(model, title=None, figsize=(12, 4)):
    """Plots training score."""
    # Extract training score
    d = {
        'Epoch': model.history.epoch_log['epoch'],
        'Score': model.history.epoch_log['train_score']
    }
    df = pd.DataFrame(data=d)
    # Extract row with best score for scatterplot
    if RegressionMetricFactory()(model.metric).mode == 'max':
        best_score = df.loc[df.Score.idxmax()]
    else:
        best_score = df.loc[df.Score.idxmin()]
    best_score = pd.DataFrame({
        "Epoch": [best_score['Epoch']],
        "Score": [best_score['Score']]
    })
    # Extract learning rate data for plotting along secondary y-axis
    lr = {
        'Epoch': model.history.epoch_log['epoch'],
        'Learning Rate': model.history.epoch_log['learning_rate']
    }
    lr = pd.DataFrame(data=lr)
    # Initialize figure and axes with appropriate figsize and title
    fig, ax = _init_image(x='Epoch', y='Score', figsize=figsize, title=title)
    ax2 = ax.twinx()
    # Render score lineplot
    ax = sns.lineplot(x='Epoch', y='Score', data=df, ax=ax)
    # Render scatterplot showing minimum score points
    ax = sns.scatterplot(x='Epoch', y='Score', data=best_score, ax=ax)
    # Show learning rate along secondary y-axis
    ax2 = sns.lineplot(x='Epoch', y='Learning Rate', data=lr, ax=ax2)
    return fig, ax
 def _get_scorer(self):
     """Obtains the scoring function associated with the metric parameter."""
     if self.metric is not None:
         scorer = RegressionMetricFactory()(metric=self.metric)
         if not isinstance(scorer, RegressionMetric):
             msg = str(
                 self.metric) + ' is not a supported regression metric.'
             raise ValueError(msg)
         else:
             self.metric_name = scorer.label
             return scorer
Exemple #7
0
def _plot_train_val_score(model, title=None, figsize=(12, 4)):
    """Plots training and validation score on single plot."""
    # Extract training and validation score
    d = {
        'Epoch': model.history.epoch_log['epoch'],
        'Training': model.history.epoch_log['train_score'],
        'Validation': model.history.epoch_log.get('val_score')
    }
    df = pd.DataFrame(data=d)
    df = pd.melt(df,
                 id_vars='Epoch',
                 value_vars=['Training', 'Validation'],
                 var_name=['Dataset'],
                 value_name='Score')
    # Extract row with best score by dataset for scatterplot
    if RegressionMetricFactory()(model.metric).mode == 'max':
        best_score = df.loc[df.groupby('Dataset').Score.idxmax()]
    else:
        best_score = df.loc[df.groupby('Dataset').Score.idxmin()]
    # Extract learning rate data for plotting along secondary y-axis
    lr = {
        'Epoch': model.history.epoch_log['epoch'],
        'Learning Rate': model.history.epoch_log['learning_rate']
    }
    lr = pd.DataFrame(data=lr)
    # Initialize figure and axes with appropriate figsize and title
    fig, ax = _init_image(x='Epoch', y='Score', figsize=figsize, title=title)
    ax2 = ax.twinx()
    # Render score lineplot
    ax = sns.lineplot(x='Epoch',
                      y='Score',
                      hue='Dataset',
                      data=df,
                      legend='full',
                      ax=ax)
    # Render scatterplot showing minimum score points
    ax = sns.scatterplot(x='Epoch',
                         y='Score',
                         hue='Dataset',
                         data=best_score,
                         legend=False,
                         ax=ax)
    # Show learning rate along secondary y-axis
    ax2 = sns.lineplot(x='Epoch', y='Learning Rate', data=lr, ax=ax2)
    return fig, ax
Exemple #8
0
def models_by_metric(request):
    model = LinearRegression(metric=request.param)
    model.cost_function = RegressionCostFactory()(cost='quadratic')
    model.scorer = RegressionMetricFactory()(metric=request.param)    
    return model        
Exemple #9
0
def residuals(model, X, y, type='standardized', hist=True, title=None, 
              figsize=(12,6), directory=None, filename=None):
    """Plots residuals versus actual."""
    # Validate request
    if not isinstance(model, Estimator):
        raise ValueError("Model is not a valid Estimator or subclass object.")
    if not isinstance(figsize, tuple):
        raise TypeError("figsize is not a valid tuple.")  
    
    # Format title
    if type == ''
    if title is None:
        title = model.history.params.get('name') + "\n" + \
            "Residuals Plot"      

    # Compute training predictions, residuals, and R2
    X_train, y_train = model.X, model.y    
    y_pred = model.predict(X_train)
    residuals = y_train - y_pred
    
    # Compute R2
    r2 = RegressionMetricFactory()(metric='r2')(y_train, y_pred)
    
    # Compute Leverage
    leverage = (X_train * np.linalg.pinv(X_train).T).sum(1)
    
    # Compute degrees of freedom and MSE
    rank = np.linalg.matrix_rank(X_train)
    df = X.shape[0] - rank
    mse = np.dot(residuals, residuals) / df

    # Calculate standardized and studentized residuals
    standardized_residuals = residuals / np.sqrt(mse*(1-leverage))
    studentized_residuals = residuals / np.sqrt(mse)/ np.sqrt(1-leverage)
    
    # Initialize figure and axes with appropriate figsize and title
    fig, ax = _init_image(x='$\\hat{y}$', y='Studentized Residuals', figsize=figsize,
                                    title=title)            
    # Set labels
    ax.set_xlabel('$\\hat{y}$')
    ax.set_ylabel('Studentized Residuals')                                    

    # Render scatterplot of residuals vs predicted
    label = "Train $R^2 = {:0.3f}$".format(r2)    
    ax = sns.residplot(y_pred, studentized_residuals, lowess=True,
                       line_kws={'color': 'red', 'lw': 1, 'alpha':0.8})
    #ax.scatter(y_pred, studentized_residuals, label=label)
    ax.legend()
        
    # Add residuals historgram
    if hist:
        hax = histogram_ax(ax)
        hax.hist(studentized_residuals, bins=50, orientation='horizontal')
    
    # Save figure if directory is not None
    if directory is not None:
        title = title.replace('\n', ' ') + '.png'
        save_plot(fig, directory, filename, title)

    # Show plot
    fig.tight_layout()
    plt.show()                                             
        
# --------------------------------------------------------------------------- #
#                           PREDICTION ERROR PLOT                             #
# --------------------------------------------------------------------------- #
def prediction_error(model, X, y, shared_limits=True, title=None, 
                     bestfit=True, identity=True, figsize=(8,8), 
                     directory=None, filename=None):
    """Plots residuals versus actual."""
    # Validate request
    if X.shape[0] != y.shape[0]:
        raise ValueError("X and y have incompatible shapes. X.shape = %s "
                         " y.shape = %s" %(str(X.shape), str(y.shape)))
    if not isinstance(model, Estimator):
        raise ValueError("Model is not a valid Estimator or subclass object.")
    if not isinstance(figsize, tuple):
        raise TypeError("figsize is not a valid tuple.")  
    # Format title
    if title is None:
        title = model.history.params.get('name') + "\n" + \
            "Prediction Error Plot"   

    # Compute predictions and and R2
    y_pred = model.predict(X)
    r2 = RegressionMetricFactory()(metric='r2')(y, y_pred)

    # Get Datapoints
    y_min = min(min(y), min(y_pred))
    y_max = max(max(y), max(y_pred))

    # Initialize figure and axes with appropriate figsize and title
    xlim = (y_min, y_max)
    ylim = (y_min, y_max)
    fig, ax = _init_image(x='$y$', y='$\\hat{y}',figsize=figsize, xlim=xlim, ylim=ylim, 
                          title=title)            
    ax.set_xlabel('y')
    ax.set_ylabel('$\\hat{y}$')    

    # Render prediction versus actual
    ax = sns.scatterplot(x=y, y=y_pred, ax=ax)

    # Render identity line
    if identity:
        x_i = [y_min,y_max]
        y_i = [y_min,y_max]
        ax = sns.lineplot(x=x_i,y=y_i, dashes=[(2,2)], legend='full', label='Identity Line')
    
    # Render best fit line
    if bestfit:        
        X = np.array(y).reshape(-1,1)
        Y = y_pred.reshape(-1,1)
        lr = LinearRegression()
        lr.fit(X,Y)        
        bias = lr.intercept_
        coef = lr.coef_
        def f(x):
            y = bias + coef * x
            return y.flatten()
        x = np.linspace(y_min, y_max,100)
        y = f(x)
        ax = sns.lineplot(x=x,y=y, dashes=True, ax=ax, legend='full', label='Best Fit Line')

    # Add R2 Score to plot
    r2_text = "Coefficient of Determination (R2): " + str(round(r2,4))
    ax.text(0.3, 0.96, r2_text, fontsize=12, transform=ax.transAxes)

    # Fix axis limits
    ax.set_xlim(y_min, y_max)
    ax.set_ylim(y_min, y_max)
        
    # Save figure if directory is not None
    if directory is not None:
        title = title.replace('\n', ' ') + '.png'
        save_plot(fig, directory, filename, title)

    # Show plot
    fig.tight_layout()
    plt.show()                                             
        
# --------------------------------------------------------------------------- #
#                            VARIOUS ROUTINES                                 #
# --------------------------------------------------------------------------- #
def save_plot(fig, directory, filename=None, title=None):
    """Save plot with title to designated directory and filename."""
    if filename is None and title is None:
        raise ValueError("Must provide filename or title to save plot.")
    if filename is None:
        filename = title.replace('\n', ' ') + " .png"
    save_fig(fig, directory, filename)
    return directory, filename

def _init_image(x, y, figsize=(12, 4), xlim=None, ylim=None, title=None, log=False):
    """Creates and sets the axis aesthetics, labels, scale, and limits."""

    # Initialize plot
    fig, ax = plt.subplots(figsize=figsize)
    # Set aesthetics
    sns.set(style="whitegrid", font_scale=1)
    ax.set_facecolor('w')
    ax.tick_params(colors='k')
    ax.xaxis.label.set_color('k')
    ax.yaxis.label.set_color('k')
    ax.set_title(title, color='k', fontsize=16)
    # Set labels
    ax.set_xlabel(proper(x))
    ax.set_ylabel(proper(y))
    # Change to log scale if requested
    if log:
        ax.set_xscale('log')
    # Set x and y axis limits
    if xlim:
        ax.set_xlim(xlim)
    if ylim:
        ax.set_ylim(ylim)

    return fig, ax

        
         
Exemple #10
0
def prediction_error(model, X, y, shared_limits=True, title=None, 
                     bestfit=True, identity=True, figsize=(8,8), 
                     directory=None, filename=None):
    """Plots residuals versus actual."""
    # Validate request
    if X.shape[0] != y.shape[0]:
        raise ValueError("X and y have incompatible shapes. X.shape = %s "
                         " y.shape = %s" %(str(X.shape), str(y.shape)))
    if not isinstance(model, Estimator):
        raise ValueError("Model is not a valid Estimator or subclass object.")
    if not isinstance(figsize, tuple):
        raise TypeError("figsize is not a valid tuple.")  
    # Format title
    if title is None:
        title = model.history.params.get('name') + "\n" + \
            "Prediction Error Plot"   

    # Compute predictions and and R2
    y_pred = model.predict(X)
    r2 = RegressionMetricFactory()(metric='r2')(y, y_pred)

    # Get Datapoints
    y_min = min(min(y), min(y_pred))
    y_max = max(max(y), max(y_pred))

    # Initialize figure and axes with appropriate figsize and title
    xlim = (y_min, y_max)
    ylim = (y_min, y_max)
    fig, ax = _init_image(x='$y$', y='$\\hat{y}',figsize=figsize, xlim=xlim, ylim=ylim, 
                          title=title)            
    ax.set_xlabel('y')
    ax.set_ylabel('$\\hat{y}$')    

    # Render prediction versus actual
    ax = sns.scatterplot(x=y, y=y_pred, ax=ax)

    # Render identity line
    if identity:
        x_i = [y_min,y_max]
        y_i = [y_min,y_max]
        ax = sns.lineplot(x=x_i,y=y_i, dashes=[(2,2)], legend='full', label='Identity Line')
    
    # Render best fit line
    if bestfit:        
        X = np.array(y).reshape(-1,1)
        Y = y_pred.reshape(-1,1)
        lr = LinearRegression()
        lr.fit(X,Y)        
        bias = lr.intercept_
        coef = lr.coef_
        def f(x):
            y = bias + coef * x
            return y.flatten()
        x = np.linspace(y_min, y_max,100)
        y = f(x)
        ax = sns.lineplot(x=x,y=y, dashes=True, ax=ax, legend='full', label='Best Fit Line')

    # Add R2 Score to plot
    r2_text = "Coefficient of Determination (R2): " + str(round(r2,4))
    ax.text(0.3, 0.96, r2_text, fontsize=12, transform=ax.transAxes)

    # Fix axis limits
    ax.set_xlim(y_min, y_max)
    ax.set_ylim(y_min, y_max)
        
    # Save figure if directory is not None
    if directory is not None:
        title = title.replace('\n', ' ') + '.png'
        save_plot(fig, directory, filename, title)

    # Show plot
    fig.tight_layout()
    plt.show()