def eval_model_ensemble(models,x,y_ref=None,is_class=False,verbose=False):
    """Evaluate ensemble of models.
    
    Parameters
    ----------
    models : single or iterable set of scikit-learn model instances
        model(s) to be evaluated
        
    x : numpy.array
        model inputs with m-row observations and n-column features
        
    y_ref : numpy.array (Default value = None)
        reference target output for observations in X
        
    is_class: bool (Default value = False)
        indication if classification problem (only needed when Y!=None)
        
    Returns
    -------
    model output : numpy array of len(X)
        if Y!=None: list of length 2 [mean error to reference Y, model outputs]
    
    """
    
    # model evaluation
    if len(np.array(x).shape)==1: # single observation input
        if data_func.is_iterable(models)==False: # single model
            y_pred = models.predict(x)
        else:
            y_pred = np.zeros(len(models))  # multiple models
            for m,mo in enumerate(models):
                y_pred[m] = mo.predict(x)
    elif len(np.array(x).shape)==2:
        if data_func.is_iterable(models)==False: # single model
            y_pred = models.predict(x)
        else:
            y_pred = np.zeros((len(x),len(models))) 
            for m,mo in enumerate(models):
                y_pred[:,m] = mo.predict(x)
    else:
        raise ValueError('Feature imput dimension greater than 2.')
            
    # error evaluation
    if y_ref==None:
        return y_pred
    else:
        if is_class==False: # regression problem
            y_err = np.mean(np.abs(y_pred-y_ref))
        else: # classification problem
            y_err = np.mean(np.abs(y_pred!=y_ref))
        if verbose==True:
            print '\nMean model error: {0}.'.format(np.round(y_err,2))
        return [y_pred, y_err]
Esempio n. 2
0
if config.do_model_fit == True:
    start_T = time.time()  # for time taking

    # initialisations for results
    feat_imp, feat_imp_sd, target_feat_corr =\
        np.zeros((len(config.time_shifts),\
        len(config.features))),np.zeros((len(config.time_shifts),len(config.features))),\
        np.zeros((len(config.time_shifts),len(config.features)))

    # loop over horizon lengths (shift values)
    for t, hor in enumerate(config.time_shifts):

        df_train = data_func.data_framer(data.raw_data.copy(),config.target,config.features,config.time_var,\
                                         config.start_time,config.end_time,shift=hor,\
                                         trafos=data.trafos,name_trafo=False)
        m_test_2 = int(np.round(len(df_train) *
                                config.test_fraction))  # training set size

        # model fit
        out_dict = ml_func.ML_train_tester(df_train,config.target,config.features,config.method,\
                                           is_class=config.is_class,m_test=m_test_2,n_boot=config.n_boot,\
                                           to_norm=config.to_norm,counter_fact=config.counter_fact,\
                                           verbose=config.verbose)

        # print model specification and progress
        if t == 0:
            print '\tModel specs:\n\n\t', out_dict['models'][0], '\n'
            print '\tHorizon ({0}):'.format(config.unit),
        print hor, '..',

        # feature importance & target feature correlations
        p = ml_func.get_feat_importance(out_dict['feat_weights'])
def bias_var_sd_R2(target,prediction,use_df=False,df=None,\
                    df_start=0,df_end=1,digits=3,ID='',verbose=True):
    """calculate Bias, Variance, Std dev and R-squared for prediction.

    Parameters
    ----------
    target : numpy.array or pandas.DataFrame
        reference values
        
    prediction : numpy.array or pandas.DataFrame
        modelled values
        
    use_df : bool, optional (Default value = False)
        if True, expect pandas.DataFrame as inputs (target and predictions)
        
    df : pandas.DataFrame, optional (Default value = None)
        input data (target and predictions)
        
    df_start : value, optional (Default value = 0)
        index start value for evaluation
        
    df_end : value, optional (Default value = 1)
        index end value for evaluation
        
    digits : int, optional (Default value = 3)
        number of digits to which results are rounded
        
    ID : str, optional (Default value = '')
        identifier
        
    verbose : bool, optional (Default value = True)
        if True, print results to screen.

    Returns
    -------
    Bias : float
        mean error
        
    Var: float
        sample variance of bias
        
    Std : float
        sample standard deviation of bias
        
    R2: float
        r-squared score

    """    
    
    if use_df==True:
        target     = df[df_start:df_end][target].values
        prediction = df[df_start:df_end][prediction].values
    
    # remove nan's
    is_OK       = ~np.isnan(target) & ~np.isnan(prediction)
    target     = target[is_OK]
    prediction = prediction[is_OK]
    
    # get values
    Bias = np.mean(np.abs(prediction-target))
    Var  = np.var(np.abs(prediction-target),ddof=1)
    Std  = np.std(np.abs(prediction-target),ddof=1)
    Corr = st.pearsonr(prediction,target)[0]
    R2   = skl_metrics.r2_score(target,prediction)
    
    # print results
    if verbose==True:
        print '\nBias-Var-Std-R2 Stats'
        print 'ID:', ID
        print '\tBias      : {0}'.format(np.round(Bias,  digits))
        print '\tVariance  : {0}'.format(np.round(Var,   digits))
        print '\tStd. dev. : {0}'.format(np.round(Std,   digits))
        print '\tPearson-r : {0}'.format(np.round(Corr,  digits))
        print '\tR-squared : {0}.\n'.format(np.round(R2, digits))
    return Bias, Var, Std, R2
def prec_rec_F1(target,prediction,use_df=False,df=None,df_start=0,df_end=1,\
                T=True,digits=3,ID='',verbose=True):
    """Precision, recall and F1 scores for binary classification results.

    Parameters
    ----------
    target : numpy.array or pandas.DataFrame
        reference values
        
    prediction : numpy.array or pandas.DataFrame
        model values
        
    use_df : bool, optional (Default value = False)
        if True, expect pandas.DataFrame as inputs (target and predictions)
        
    df : pandas.DataFrame, optional (Default value = None)
        input data (target and predictions)
        
    df_start : value, optional (Default value = 0)
        index start value for evaluation
        
    df_end : value, optional (Default value = 1)
        index end value for evaluation
        
    T : bool, optional (Default value = True)
        if True, True is True, else False is True ;-)
        
    digits : int, optional (Default value = 3)
        number of digits to which results are rounded
        
    ID : str, optional (Default value = '')
        identifier
        
    verbose : bool, optional (Default value = True)
        if True, print results to screen.

    Returns
    -------
    prec : float
        precision score
        
    rec : float
        recall score
        
    F1 : float
        F1-score

    """
    
    if use_df==True:
        target     = df[df_start:df_end][target].values
        prediction = df[df_start:df_end][prediction].values
    
    # remove nan's
    is_OK      = ~np.isnan(target) & ~np.isnan(prediction)
    target     = target[is_OK]
    prediction = prediction[is_OK]
    
    # get values
    acc     = 100*np.sum(prediction==target)/float(len(target))
    true_pos  = float(np.sum((prediction==T) & (target==T)))
    false_pos = np.sum((prediction==T) & (target!=T))
    false_neg = np.sum((prediction!=T) & (target==T))
    if true_pos>0:
        prec    = 100*true_pos/(true_pos+false_pos)
        rec     = 100*true_pos/(true_pos+false_neg)
        F1      = 2*prec*rec/(prec+rec)
    else:
        prec,rec,F1 = 0.,0.,0.
    
    # print results
    if verbose==True:
        print '\nBinary Classification Stats (%)'
        print 'ID: {0}\n'.format(ID)
        print '\taccuracy  : {0}'.format(np.round(acc,  digits))
        print '\tprecision : {0}'.format(np.round(prec, digits))
        print '\trecall    : {0}'.format(np.round(rec,  digits))
        print '\tF-1 score : {0}.\n'.format(np.round(F1,   digits))
    return prec,rec,F1
def ML_train_tester(df,target,features,method,m_test=1,n_boot=500,is_class=False,is_zero_one=False,\
                   to_norm=None,CV_name=None,CV_value=None,counter_fact=False,\
                   horizon=None,save_out=False,save_models=True,file_name='',verbose=False):
    """Machine learning wrapper for bootstrapped training and testing.

    Parameters
    ----------
    df : pandas.DataFrame (input data)
        
    target : str
        LHS variable
        
    features : list of str
        RHS variable(s)
        
    method : str
        model
        
    m_test : int or index mask, optional (Default value = 1, "jackknife")
        size of test data or index max of test set. If mask, n_boot is set to 1
        
    n_boot : int, optional (Default value = 500)
        number of bootstraps
        
    is_class : bool, optional (Default value = False)
        if True, maps to integer output
        
    is_zero_one : bool, optional (Default value = True)
        if True, maps to Boolean output
        
    to_norm : list, optional (Default value = None)
        variables to norm (z-scores)
        
    CV_name : str, optional (Default value = None)
        name of cross-validation parameter
        
    CV_value : float, optional (Default value = None)
        value for cross-validation parameter
        
    counter_fact : bool, optional (Default value = False)
        if True, variable importance by leaving one feature out at a time
        
    horizon : int, optional (Default value = None)
        lead-lag size for projection model (only used for VAR)
        
    save_out : bool, optional (Default value = False)
        if True save output to file
        
    save_models : bool, optional (Default value = True)
        if True, include  models in output file (could use lots of space)

    file_name : str, optional (Default value = '')
        name of output file
    verbose : bool, optional (Default value = False)
        if True, print basic fit results to screen

    Returns
    -------
    dict, keyed by
        testPred : numpy.array 
            prediction on test set
            
        testErr : numpy.array
            test error over all bootstraps
            
        meanTestErr : float
            mean error over all bootstraps
            
        ID : str
            identifier
            
        y_test : numpy.array
            test target over all bootstraps
            
        weights : numpy.array
            feature importances
            
        testInd : numpy.array
            indix mask of test samples for each bootstrap
            
        trainErr : numpy.array
            training error over all bootstraps
        
    """
    
    # definitions and initialisations
    m, n_col = len(df), len(features)+1
    if data_func.is_iterable(m_test)==True:
        n_boot=1
    elif m_test==1: 
        n_boot=m # one fit for each observation
    if method=='VAR': 
        n_boot=m_test=1
    # empty fields for bootstrapped model output
    test_ref_Y,   test_pred_Y  = np.array([]), np.array([]) # test target values and out-of-sample predictions
    train_ref_Y,  train_pred_Y = np.array([]), np.array([]) # training target values and in-sample predictions
    train_error,  test_error   = np.array([]), np.array([]) # in and out-of-sample errors
    boot_errors,  models       = np.array([]), np.array([]) # mean bootstrap error and bootstrap models
    feat_weights, test_indices = np.zeros((n_boot,n_col-1)), np.zeros((n_boot,m)) # weights for feature importance, test_index over bootstraps
    
    # input data
    inputs = df.copy()
    if not to_norm==None: # normalise data (z-scores)
        for var in to_norm:
            if var in inputs.columns:
                vals        = inputs[var].values
                inputs[var] = (vals-vals.mean(0))/vals.std(0,ddof=1)
            else:
                raise ValueError("Norm error: Variable '{0}' not in dataframe.".format(var))
    
    # loop over bootstrapped samples
    for t in range(n_boot):
        # get training and testing data
        if data_func.is_iterable(m_test)==True:
            df_train, df_test = inputs[~m_test], inputs[m_test]
            test_indices[t,:] = m_test
        else:
            df_train, df_test, is_train = train_test_split(inputs,m_test=m_test,t=t) # random split
            test_indices[t,:]           = ~is_train
        # get values
        x_train, y_train = df_train[features].values, df_train[target].values
        x_test,  y_test  = df_test[features].values,  df_test[target].values
        
        # set learning methods
        if not method=='VAR': # VAR part of statsmodels library (treated differently)
            ML = model_selection(method,n_HN=n_col-1,CV_name=CV_name,CV_value=CV_value) # n_HN only used for neural network
                                                                            # (nNeurons=nFeatures in each layer)
        else: # can only be used with m_test==1
            input_data = inputs[[target]+features].values
            ML         = model_selection(method,input_data)
            y_train    = y_test = input_data[:,0]
            if CV_name==None: model = ML.fit(maxlags=1) # model fit, defaults VAR with one lag
            else:      exec('model = ML.fit('+CV_name+'='+str(CV_value)+')')
        
        # fit model and train/test predictions
        if method=='VAR': # fit at method selection step (CV_name needed)
            in_pred  = np.zeros(m)*np.nan
            for r in range(m):
                start_values = input_data[r,:]
                fcast        = model.forecast(start_values.reshape((1,len(features)+1)),horizon)[-1,0]
                if r+horizon<m:
                    in_pred[r+horizon]  = fcast
            out_pred = in_pred
        else:
            model_clone  = skl_base.clone(ML)
            model        = ML.fit(x_train,y_train) # model fit
            out_pred     = model.predict(x_test)
            in_pred      = model.predict(x_train)
        
        # get discrete class output & get bootstrap error
        if is_class==True: # target should be an integer
            if is_zero_one==True: # map to Boolean
                in_pred  = data_func.to_zero_one(in_pred).astype(bool)
                out_pred = data_func.to_zero_one(out_pred).astype(bool)
            else: # map to integer
                in_pred  = np.round(in_pred).astype(int)
                out_pred = np.round(out_pred).astype(int)
            boot_errors = np.hstack((boot_errors,np.mean(out_pred!=y_test)))
        else:
            if method=='VAR':
                boot_errors = np.nanmean(np.abs(out_pred-y_test))
            else:
                boot_errors = np.hstack((boot_errors,np.mean(np.abs(out_pred-y_test))))
        models = np.hstack((models,model)) # store model
        
        # feature importance
        if counter_fact==False:
            if method in ['Tree-rgr','Tree-clf','Forest-rgr','Forest-clf']:
                feat_weights[t] = model.feature_importances_
        # feature importance through "counter_factual" analysis (leave one variable out and compare)
        elif counter_fact==True: # may slow things down
            for f,feat in enumerate(features):
                model_clone_II = skl_base.clone(model_clone)
                temp_features = list(features)
                temp_features.remove(feat)
                # get training and testing data
                x_train, x_test = df_train[temp_features].values, df_test[temp_features].values
                temp_model      = model_clone_II.fit(x_train,y_train)
                temp_pred       = temp_model.predict(x_test)
                if is_class==True:
                    feat_weights[t,f] = np.mean(temp_pred!=y_test)
                else:
                    feat_weights[t,f] = np.mean(np.abs(temp_pred-y_test))
        # train Ys
        train_pred_Y = np.hstack((train_pred_Y, in_pred))
        train_ref_Y  = np.hstack((train_ref_Y,  y_train))
        # test Ys
        test_pred_Y  = np.hstack((test_pred_Y,  out_pred))
        test_ref_Y   = np.hstack((test_ref_Y,   y_test))
    
    # get errors    
    if is_class==True:
        train_error  = np.mean(train_pred_Y!=train_ref_Y)
        test_error   = np.mean(test_pred_Y!=test_ref_Y)
    else:
        train_error  = np.mean(np.abs(train_pred_Y-train_ref_Y))
        test_error   = np.mean(np.abs(test_pred_Y-test_ref_Y))
    
    # verbose
    ID = target+'-'+method+'-'+str(m_test)+'-'+str(n_boot)
    if verbose==True:
        print '\nTraining Summary'
        print 'ID:',ID
        print '\tin-sample error:',round(train_error,3)
        print '\tout-of-sample error:',round(test_error,3)
        print '\terror variance:',round(np.std(boot_errors,ddof=1),3)
        print '\terror signal-to-noise:',
        print round(test_error/np.std(boot_errors,ddof=1),3)
    
    # package output
    out_dict = {'ID' : ID,\
                'mean_train_err' : train_error,  'mean_test_err' : test_error,\
                'train_pred_Y'   : train_pred_Y, 'test_pred_Y'   : test_pred_Y,\
                'train_ref_Y'    : train_ref_Y,  'test_ref_Y'    : test_ref_Y,\
                'feat_weights'   : feat_weights, 'test_ind'      : test_indices}
    if save_models==True:
        out_dict['models']=np.array(models)
    if save_out==True:
        pk.dump(out_dict,open(file_name,'wb'))
    if save_models==False: # if not saved, keep models in temp (full) output
        out_dict['models']=np.array(models)
    
    # return output dictionary
    return out_dict
Esempio n. 6
0
        
        # TRAINING
        # --------

        # start time
        if config.fixed_start==False and t>0:
            i_s   += config.time_step_size
            start  = data.data_shifted.index[i_s]
        else:
            i_s    = 0
            start  = data.data_shifted.index[i_s]
        
        # training data
        df_train = data_func.data_framer(data.data_shifted, config.target, config.features,\
                                         index=config.time_var, start_i=start, end_i=end, name_trafo=False)
        m_test_2 = int(np.round(len(df_train)*config.test_fraction)) # test data set fraction of total
        
        # model fit
        out_dict = ml_func.ML_train_tester(df_train, config.target, config.features, config.method,\
                                           is_class=config.is_class, m_test=m_test_2, n_boot=config.n_boot,\
                                           to_norm=config.to_norm, counter_fact=config.counter_fact,\
                                           verbose=config.verbose)
        # return model specification
        if t==0:
            print '\n\tModel specs:\n\n\t',out_dict['models'][0],'\n'
        
        # get variable importance
        p = ml_func.get_feat_importance(out_dict['feat_weights'])
        feat_imp[t,:], feat_imp_sd[t,:] = p[0], p[1]
        
        # VAR reference model
Esempio n. 7
0
def ML_heatmap(f1,f2,df,features,target,models=None,model_outputs=None,condition='median',\
               N=30,ranges=None,to_norm=None,color_norms=None,title='',\
               color_map='rainbow',save=False,save_name='ml_heatmap.png'):
    """Heatmap of conditional 2-D model prediction.

    Parameters
    ----------
    f1 : str
        name of first variable feature
        
    f2 : str
        name of second variable feature
        
    df : pandas.DataFrame
        input data
        
    features : list of str
        names of model features (RHS)
        
    target : str
        name of target variables (LHS)
        
    models : list-like, optional (Default value = None)
        models to be evaluated. If None, needs pre-computed model_outputs

    model_outputs : 2-d numpy.array (NxN), optional (Default value = None)
        pre-computed model_outputs for f1-f2 feature ranges and condition
        
    condition : str or values, optional (Default value = 'median')
        condition for non-variable features, options: median, mean, last or custom values
            
    N : int, optional (Default value = 30)
        raster density within ranges
        
    ranges : [f1_min,f1_max,f2_min,f2_max], optional (Default value = None)
        ranges of variable features
        
    to_norm : list of str, optional (Default value = None)
        variable names to be normalised (z-scores)
     
    color_norms : [vmin,vmax], optional (Default value = None)
        range to norm color scale
        
    title : str, optional (Default value = '')
        plot title
        
    color_map : str, optional (Default value = 'rainbow')
        colormap, see also https://matplotlib.org/examples/color/colormaps_reference.html
        
    save : bool, optional (Default value = True)
        if True, save plot
        
    save_name : str, optional (Default value = 'ml_heatmap.png')
        file name under which to save plot (incl directory)
        
        
    Note: plot can be further adjusted by modifying code below.
    
    Returns
    -------
    df : 2-d numpy.array (NxN)
        heatmap values

    """

    data = df.copy()
    # normalise input data
    if not to_norm == None:
        for var in to_norm:
            vals = data[var].values
            data[var] = (vals - vals.mean(0)) / vals.std(0, ddof=1)
    df1f2 = [min(data[f1]), max(data[f1]), min(data[f2]), max(data[f2])]
    if condition == 'median':
        inputs = data[features].median().values.reshape(1, -1)
        z = data[target].median()
    elif condition == 'mean':
        inputs = data[features].mean().values.reshape(1, -1)
        z = data[target].mean()
    elif condition == 'last':
        inputs = data[features].values[-1, :].reshape(1, -1)
        z = data[target].values[-1]
    elif type(condition) == int:
        inputs = data[features].values[condition, :].reshape(1, -1)
        z = data[target].values[condition]
    elif len(condition) == len(features):
        inputs = np.array(condition[1:]).reshape(1, -1)
        z = condition[0]
    else:
        raise (ValueError('No valid modelling condition given.'))
    if ranges == None:
        ranges = df1f2
    elif not len(ranges) == 4:
        raise (ValueError('Invalid feature ranges.'))
    # model prediction for models and feature ranges
    i1, i2 = features.index(f1), features.index(f2)
    y0, x0 = inputs[0][i1], inputs[0][i2]
    range1 = np.linspace(ranges[0], ranges[1], N)
    range2 = np.linspace(ranges[2], ranges[3], N)
    if model_outputs == None:
        output = np.zeros((len(models), N, N))
        for m, model in enumerate(models):
            for i, val1 in enumerate(range1):
                inputs[0, i1] = val1
                for j, val2 in enumerate(range2):
                    inputs[0, i2] = val2
                    output[m, i, j] = model.predict(inputs)
        output = np.mean(output[:, :, :], 0)  # model mean
    else:
        output = model_outputs
    # figure parameters
    if color_norms == None:
        vals = output.flatten()
        vmin = min(vals)
        vmax = max(vals)
    elif len(color_norms) == 2:
        vmin, vmax = color_norms
    else:
        raise (ValueError('Invalid color norm.'))
    # plot
    fig, ax = plt.subplots(figsize=(8, 6))
    # color map
    CMAP = cm = plt.get_cmap(color_map)
    cNorm = colors.Normalize(vmin=vmin, vmax=vmax)
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=CMAP)
    im = ax.imshow(output,
                   origin='lower',
                   cmap=color_map,
                   vmin=vmin,
                   vmax=vmax,
                   interpolation='hermite')
    ax.autoscale(False)

    # conditioning reference point
    x1 = (x0 - ranges[2]) * N / (ranges[3] - ranges[2]) - .5
    y1 = (y0 - ranges[0]) * N / (ranges[1] - ranges[0]) - .5
    ax.plot(x1, y1, 'wo', ms=20)
    # condition point
    COL = colorVal = scalarMap.to_rgba(z)
    ax.plot(x1, y1, 'o', c=COL, ms=20, markeredgecolor='w', mew=3)

    fsize = 15  # figure base fontsize
    plt.title(title, fontsize=fsize)
    plt.xlabel(f2, fontsize=fsize)
    plt.ylabel(f1, fontsize=fsize)
    #tix = [0,int((N-1)/4),int((N-1)/2),int(3*(N-1)/4),N-1]
    tix = [0, int((N - 1) / 4), int((N - 1) / 2), int(3 * (N - 1) / 4), N - 1]
    plt.xticks(tix, np.round(range2[tix], 1), fontsize=fsize - 2)
    plt.yticks(tix, np.round(range1[tix], 1), fontsize=fsize - 2)
    cbar = plt.colorbar(im)
    cbar.set_label(target, fontsize=fsize)
    if save == True:
        plt.savefig(save_name, dpi=200, bbox_inches='tight')
    plt.draw()

    return output
Esempio n. 8
0
def ML_projection_plot(df,fit_col,target,test_start=None,test_end=None,ref_col=None,ref_time=None,ref_level=None,\
                       pred_band=None,idx=None,two_class=False,y_lim=None,x_label='',y_label='',title='',\
                       save=True,save_name='ML_projections_plot.png'):
    """Plot machine learning projections for lead-lag model.

    Parameters
    ----------
    df : pandas.DataFrame
        output from projection exercise
        
    fit_col : str
        name of model output column
        
    target : str
        name of target column
        
    test_start : value (Default value = None)
        (time) index value where training period ends and test period starts
        
    test_end : value (Default value = None)
        (time) index value where test period ends
        
    ref_name : str, optional (Default value = None)
        name of reference data column
        
    ref_time : [value,label], optional (Default value = None)
        (time) index value to mark special point and its label
        
    ref_level : [float,label], optional (Default value = None)
        y-value for horizontal reference line and its label
        
    pred_band : [upper_name,lower_name,label], optional (Default value = None)
        column names for upper and lower values of prediction intervals and label 
        
    idx : str, optional (Default value = None)
        name of index variable
        
    two_class : bool, optional (Default value = False)
        if True, draw 0.5 decision line for (0,1)-classification problem
        
    y_lim : [min_value,max_value], optional (Default value = None)
        y-boundaries of plot
        
    title : str, optional (Default value = '')
        plot title
        
    x_label : str, optional (Default value = '')
        plot y-axis label
        
    y_label : str, optional (Default value = '')
        plot y-axis label
        
    save : bool, optional (Default value = True)
        if True, save plot
        
    save_name : str, optional (Default value = 'ML_projections.png')
        file name under which to save plot (incl directory)
        
    Note: plot can be further adjusted by modifying code below.
    
    """

    # set index if not given
    if not idx == None:
        df.set_index(idx, inplace=True)

    # lines plots and confidence intervals
    p=df[[target,fit_col]].plot(figsize=(11,6),linewidth=2.5,\
           style=['bo-','gs-'],ms=3.5,rot=0) # main model output

    x0 = int(p.get_xlim()[0])  # left boundary of x-axis (plot reference point)

    # prediction intervals
    if not pred_band == None:
        p.fill_between(range(len(df)),
                       df[pred_band[0]].values,
                       df[pred_band[1]].values,
                       color='r',
                       alpha=.4)
        pi_fill = patch.Patch(color='r', alpha=.4)

    # reference fit
    if not ref_col == None:
        df[ref_col].plot(linewidth=2.5,
                         style='kd-',
                         ms=3.5,
                         rot=0,
                         alpha=.35,
                         label=ref_col)

    # plot target and decision line
    if not ref_level == None:
        p.axhline(ref_level[0],
                  ls='--',
                  c='k',
                  lw=5,
                  alpha=.4,
                  label=ref_level[1])
    if two_class == True:
        p.axhline(.5, ls='--', c='k', lw=2, alpha=.6, label='decision line')
        p.set_yticks([0, 1])

    # indicate training and test periods
    if not test_start == None:
        t_s = x0 + list(df.index.values).index(test_start)
        p.axvline(t_s, ls='--', c='k', lw=3, label='test start')
    else:
        t_s = x0

    if not test_end == None:
        t_e = x0 + list(df.index.values).index(test_end)
        p.axvline(t_e, ls='-.', c='k', lw=3, label='test end')
    else:
        t_e = len(df) - 1

    # hightlight special point in time
    if not ref_time == None:
        t_ref = x0 + list(df.index.values).index(ref_time[0])
        p.axvline(t_ref, ls='-.', c='r', lw=3, label=ref_time[1])

    # error summaries
    box, fsize = {'facecolor': 'black', 'alpha': 0.1, 'pad': 12}, 15
    abs_fit_err = np.abs(df[target].values - df[fit_col].values)
    if not ref_col == None:
        abs_ref_err = np.abs(df[target].values - df[ref_col].values)

    # training period
    if t_s > x0:
        fit_train_err = np.round(np.nanmean(abs_fit_err[:t_s]), 2)
        p.text(.20,
               .67,
               'out-of-bag error\n\n          ' + str(fit_train_err),
               bbox=box,
               transform=p.transAxes,
               fontsize=fsize - 3)
        if not ref_col == None:
            ref_train_err = np.round(np.nanmean(abs_ref_err[:t_s]), 2)
            p.text(.20,
                   .06,
                   ref_col + ' out-of-bag\n\n        ' + str(ref_train_err),
                   bbox=box,
                   transform=p.transAxes,
                   fontsize=fsize - 5)

    # test period
    if not ref_time == None:  # split up error start: before and after t_ref
        fit_err_1 = np.round(np.nanmean(abs_fit_err[t_s:t_ref]), 2)
        fit_err_2 = np.round(np.nanmean(abs_fit_err[t_ref:t_e]), 2)
        p.text(.45,
               .67,
               'test error (I)\n\n      ' + str(fit_err_1),
               bbox=box,
               transform=p.transAxes,
               fontsize=fsize - 3)
        p.text(.67,
               .67,
               'test error (II)\n\n      ' + str(fit_err_2),
               bbox=box,
               transform=p.transAxes,
               fontsize=fsize - 3)
        if not ref_col == None:
            ref_err_1 = np.round(np.nanmean(abs_ref_err[t_s:t_ref]), 2)
            ref_err_2 = np.round(np.nanmean(abs_ref_err[t_ref:t_e]), 2)
            p.text(.45,
                   .06,
                   ref_col + ' error (I)\n\n      ' + str(ref_err_1),
                   bbox=box,
                   transform=p.transAxes,
                   fontsize=fsize - 5)
            p.text(.67,
                   .06,
                   ref_col + ' error (II)\n\n      ' + str(ref_err_2),
                   bbox=box,
                   transform=p.transAxes,
                   fontsize=fsize - 5)
    else:  # single error stats for test period
        fit_err = np.round(np.nanmean(abs_fit_err), 2)
        p.text(.45,
               .67,
               'test error\n\n  ' + str(fit_err),
               bbox=box,
               transform=p.transAxes,
               fontsize=fsize - 3)
        if not ref_col == None:
            ref_err = np.round(np.nanmean(abs_ref_err), 2)
            p.text(.45,
                   .06,
                   ref_col + ' error\n\n  ' + str(ref_err),
                   bbox=box,
                   transform=p.transAxes,
                   fontsize=fsize - 5)

    # labels, axes, legend
    p.set_xlabel(x_label, fontsize=fsize)
    p.set_ylabel(y_label, fontsize=fsize)
    p.set_title(title, fontsize=fsize)
    p.tick_params(axis='x', labelsize=fsize - 3)
    p.tick_params(axis='y', labelsize=fsize - 3)
    if not y_lim == None:
        p.set_ylim(y_lim)
    handles, labels = p.get_legend_handles_labels()
    if not pred_band == None:
        handles += [pi_fill]
        labels += [pred_band[2]]
    p.legend(handles,
             labels,
             loc='upper right',
             ncol=4,
             prop={'size': fsize - 2})

    # save figure
    if save == True:
        plt.savefig(save_name, dpi=200, bbox_inches='tight')
    plt.draw()