예제 #1
0
def plot_validation_curve(estimator, param_name, param_range, x, y, cv, x_train, y_train, cv_train, title, svm=0):
    plt.figure()
    plt.grid()
    plt.axvline(x=estimator.get_params()[param_name])
    train_scores, test_scores = validation_curve(estimator, x, y, param_name=param_name, param_range=param_range, cv=cv,
                                                 n_jobs=1)
    train_scores_mean = 1 - np.mean(train_scores, axis=1)
    test_scores_mean = 1 - np.mean(test_scores, axis=1)

    not_using, cv_scores = validation_curve(estimator, x_train, y_train, param_name=param_name, param_range=param_range,
                                            cv=cv_train,
                                            n_jobs=1)

    cv_scores_mean = 1 - np.mean(cv_scores, axis=1)

    plt.title(title)
    plt.xlabel(param_name)
    plt.ylabel("Error")

    if svm == 0:
        plt.plot(param_range, train_scores_mean, label="Training error", color="r")
        plt.plot(param_range, test_scores_mean, label="Testing error", color="g")
        plt.plot(param_range, cv_scores_mean, label="Cross-validation error", color="b")
    else:
        plt.semilogx(param_range, train_scores_mean, label="Training error", color="r")
        plt.semilogx(param_range, test_scores_mean, label="Testing error", color="g")
        plt.semilogx(param_range, cv_scores_mean, label="Cross-validation error", color="b")

    plt.legend(loc="best")
    plt.savefig(title)
    plt.close()
예제 #2
0
def test_validation_curve():
    '''
    test validation_curve with LinerSVC and different C
    :return:  None
    '''
    digits = load_digits()
    X,y=digits.data,digits.target
    param_name="C"
    param_range = np.logspace(-2, 2)
    train_scores, test_scores = validation_curve(LinearSVC(), X, y, param_name=param_name,
             param_range=param_range,cv=10, scoring="accuracy")

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    fig=plt.figure()
    ax=fig.add_subplot(1,1,1)

    ax.semilogx(param_range, train_scores_mean, label="Training Accuracy", color="r")
    ax.fill_between(param_range, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2, color="r")
    ax.semilogx(param_range, test_scores_mean, label="Testing Accuracy", color="g")
    ax.fill_between(param_range, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")

    ax.set_title("Validation Curve with LinearSVC")
    ax.set_xlabel("C")
    ax.set_ylabel("Score")
    ax.set_ylim(0,1.1)
    ax.legend(loc='best')
    plt.show()
def plot_validation_curve(clf, X_train, Y_train):
    param_range = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
    train_scores, test_scores = validation_curve(KNeighborsClassifier(algorithm = 'ball_tree',
                                                                    weights = 'uniform'),
                                                 X_train, Y_train, param_name = "n_neighbors",
                                                 param_range = param_range, cv = 7,
                                                 scoring = "accuracy", n_jobs = -1,
                                                 verbose = True)
    train_scores_mean = np.mean(train_scores, axis = 1)
    train_scores_std = np.std(train_scores, axis = 1)
    test_scores_mean = np.mean(test_scores, axis = 1)
    test_scores_std = np.mean(test_scores, axis = 1)
    
    plt.title("Validation Curve with KNN", size = 15)
    plt.xlabel("n_neighbors", size = 15)
    plt.ylabel("Score", size = 15)
    plt.xticks(size = 12)
    plt.yticks(size = 12)
    plt.ylim(0.5,1.0)
    plt.plot(param_range, train_scores_mean, label = "Training Score", color = "r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std, 
                     train_scores_mean + train_scores_std, alpha = 0.2, color = "r")
    plt.plot(param_range, test_scores_mean, label = "Cross-Validation Score",
                 color = "g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std, 
                     test_scores_mean + test_scores_std, alpha = 0.2, color = "g")
    plt.legend(loc = "best")
    plt.savefig('plot_validation_curve_rf_asis')
    plt.show()
    def plot_validation_curve(self, estimator, params, param_name, X, y, title, 
              xtricks =None, x_label = None, ylim = None, cv=None, n_jobs = 1):
        
        plt.figure(figsize = (6,8))
        plt.title(title)
        if ylim is not None:
            plt.ylim(*ylim)
        plt.title(title)#"Validation Curve with SVM"
        plt.xlabel(x_label)
        plt.ylabel("Accuracy")
        #param_range = np.logspace(-6, -1, 5)
        train_scores, test_scores = validation_curve(
                     estimator, X, y, param_name=param_name, param_range=params,
                     cv=cv, scoring="accuracy", n_jobs=n_jobs)
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)

        plt.plot(xtricks, train_scores_mean, "o-", label="Training accuracy", color="r")
        plt.fill_between(xtricks, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2, color="r")
        plt.plot(xtricks, test_scores_mean, "o-", label="Cross-validation accuracy",
             color="g")
        plt.fill_between(xtricks, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.2, color="g")
        plt.grid()    
        plt.legend(loc="best")
        plt.savefig("../Figures/" + title + ".png", bbox_inches="tight")
        return plt
예제 #5
0
def plot_validation_curve(model, X, y, scorer, param_name, param_range=np.linspace(0.1, 1, 5), cv=None, n_jobs=5,
    ylim=None, title="Xval. validation curve"):
    ''' Plot learning curve for model on data '''

    df = pd.DataFrame()
    df['param_range'] = param_range
    train_scores, test_scores = validation_curve(model, X, y, param_name=param_name, param_range=param_range,
        cv=cv, scoring=scorer, n_jobs=n_jobs)
    df['train_mean'] = 1 - np.mean(train_scores, axis=1)
    df['train_std'] = np.std(train_scores, axis=1)
    df['test_mean'] = 1 - np.mean(test_scores, axis=1)
    df['test_std'] = np.std(test_scores, axis=1)

    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel("Parameter value")
    plt.ylabel("Error (1-score)")
    plt.grid()
    plt.semilogx(param_range, df.train_mean, color="r", label="Training")
    plt.fill_between(param_range, df.train_mean - df.train_std, df.train_mean + df.train_std, alpha=0.1, color="r")
    plt.semilogx(param_range, df.test_mean, color="g", label="Test")
    plt.fill_between(param_range, df.test_mean - df.test_std, df.test_mean + df.test_std, alpha=0.1, color="g")
    plt.legend(loc="best")
    plt.show()
    return df, plt
def plot_validation_curve(estimator, X, y, param_name, param_range):
  
    train_scores, test_scores = validation_curve(estimator, X, y, param_name, param_range)
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.figure()
    plt.plot(param_range, train_mean, 
         color='blue', marker='o', 
         markersize=5, label='training accuracy')

    plt.fill_between(param_range, train_mean + train_std,
                 train_mean - train_std, alpha=0.15,
                 color='blue')

    plt.plot(param_range, test_mean, 
         color='green', linestyle='--', 
         marker='s', markersize=5, 
         label='validation accuracy')

    plt.fill_between(param_range, 
                 test_mean + test_std,
                 test_mean - test_std, 
                 alpha=0.15, color='green')

    plt.grid()
    plt.xscale('log')
    plt.legend(loc='lower right')
    plt.xlabel('Parameter C')
    plt.ylabel('Accuracy')
    plt.ylim([0., 1.0])
    plt.tight_layout()
    plt.show()
예제 #7
0
def RR_validationcurve(sspacing, tspacing, RR_lambda_opt, lambdas_range): 
    """
    Reconstruct all fields using RR and save to netcdf file

    Parameters
    ----------
    sspacing : 2D subsampling ratio in space (in one direction)

    tspacing : 1D subsampling ratio in time

    RR_alpha_opt : optimal regularization parameter given from RR_cv_estimate_alpha(sspacing, tspacing, alphas)
    
    """
    
    # lambdas_range= np.logspace(-2, 4, 28)

    #Load all training data
    (Xl_tr, mea_l, sig_l, Xh_tr,mea_h,sig_h) =  data_preprocess(sspacing, tspacing) 
    
    # validation curve
    from sklearn.linear_model import Ridge
    from sklearn.learning_curve import validation_curve
    
    train_MSE, test_MSE = validation_curve(Ridge(),Xl_tr, Xh_tr, param_name="alpha", param_range=lambdas_range, 
                                                 scoring = "mean_squared_error", cv=10)    
    
    # API always tries to maximize a loss function, so MSE is actually in the flipped sign
    train_MSE = -train_MSE
    test_MSE = -test_MSE
    # save to .mat file    
    import scipy.io as sio
    sio.savemat('/data/PhDworks/isotropic/regerssion/RR_crossvalidation.mat', 
                dict(lambdas_range=lambdas_range, train_MSE = train_MSE, test_MSE = test_MSE))    
    
    return (train_MSE, test_MSE)
예제 #8
0
def plot_validation_curve(classifier, X, y, param_name="gamma", param_range=None):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    plt.title("Validation Curve")
    plt.ylim((0,1))
    plt.xlabel("degree")
    plt.ylabel("Score")

    param_range = np.logspace(-6, 0, 5)
    train_scores, validation_scores = validation_curve(classifier, X, y, param_name, param_range=param_range)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    validation_scores_mean = np.mean(validation_scores, axis=1)
    validation_scores_std = np.std(validation_scores, axis=1)

    plt.semilogx(param_range, train_scores_mean, label="Training Score", color="g")
    plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.2, color="g")

    plt.semilogx(param_range, validation_scores_mean, label="Cross-validation Score", color="r")
    plt.fill_between(param_range, validation_scores_mean - validation_scores_std, validation_scores_mean + validation_scores_std,
                 alpha=0.2, color="r")

    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.65, box.height])
    plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.grid()

    plt.show()
예제 #9
0
파일: viz.py 프로젝트: veksev/spring16
def plot_validation_curve(clf, X, y, param, name=None):
    try:
        name = clf.__class__.__name__ if name is None else name
        if param is None:
                return
        scorer = metrics.make_scorer(metrics.average_precision_score)
        train_scores, test_scores = validation_curve(clf, X, y, cv=5,
                scoring=scorer, n_jobs=-1, param_name=param['name'],
                param_range=param['range'])
        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        test_scores_mean = np.mean(test_scores, axis=1)
        test_scores_std = np.std(test_scores, axis=1)

        plt.title('Validation Curve of {} varying {}'.format(name, param['name']))
        plt.xlabel(param['name'])
        plt.ylabel("Score")
        plt.ylim(-0.05, 1.05)
        plt.xlim(min(param['range']), max(param['range']))
        plt.plot(param['range'], train_scores_mean, label='Training score', color='r')
        plt.fill_between(param['range'], train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std, alpha=0.2, color='r')
        plt.plot(param['range'], test_scores_mean, label='Cross-validation score',
                     color="g")
        plt.fill_between(param['range'], test_scores_mean - test_scores_std,
                         test_scores_mean + test_scores_std, alpha=0.2, color='g')
        plt.legend(loc='lower right')
        plt.savefig(name+'_'+param['name']+'_validationcurve.png')
        plt.clf()
    except Exception as e:
        print('ERROR: {}, {}'.format(name, str((e))))
        pass
예제 #10
0
def plot_validation_curve(estimator, title, X, y, param_name, param_range,
							cv=10, scoring='accuracy', n_jobs=2):
	from sklearn.learning_curve import validation_curve
	train_scores, test_scores = validation_curve(
	    estimator, X, y, param_name, param_range,
	    cv=cv, scoring=scoring, n_jobs=n_jobs)
	train_scores_mean = np.mean(train_scores, axis=1)
	train_scores_std = np.std(train_scores, axis=1)
	test_scores_mean = np.mean(test_scores, axis=1)
	test_scores_std = np.std(test_scores, axis=1)

	plt.figure()
	plt.title(title)
	plt.xlabel(param_name)
	plt.ylabel("Score")
	plt.ylim(0.0, 1.1)
	plt.semilogx(param_range, train_scores_mean, label="Training score", color="r")
	plt.fill_between(param_range, train_scores_mean - train_scores_std,
	                 train_scores_mean + train_scores_std, alpha=0.2, color="r")
	plt.semilogx(param_range, test_scores_mean, label="Cross-validation score",
	             color="g")
	plt.fill_between(param_range, test_scores_mean - test_scores_std,
	                 test_scores_mean + test_scores_std, alpha=0.2, color="g")
	plt.legend(loc="best")
	plt.show()
예제 #11
0
def plot_validation_curve(clf, cv, X, y, param_name, param_range):

    train_scores, valid_scores = validation_curve(
        clf, X, y,
        param_name = param_name,
        param_range = param_range, cv = cv, scoring = my_pipeline_deviance_function, n_jobs=14, verbose = 2)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    valid_scores_mean = np.mean(valid_scores, axis=1)
    valid_scores_std = np.std(valid_scores, axis=1)

    # Plot parameter VS estimated error
    fig, ax = plt.subplots()
    ax.set_title('Validation curve')
    ax.set_xlabel(param_name, fontsize = 14)
    ax.set_ylabel("Loss (deviance)", fontsize = 14)
    ax.set_xlim(min(param_range), max(param_range))
    ax.plot(param_range, train_scores_mean, color="red", label="Training")
    ax.fill_between(param_range,
                    train_scores_mean - train_scores_std,
                    train_scores_mean + train_scores_std,
                    alpha=0.1, color="red")
    ax.plot(param_range, valid_scores_mean, color="green", label="CV")
    ax.fill_between(param_range,
                    valid_scores_mean - valid_scores_std,
                    valid_scores_mean + valid_scores_std,
                    alpha=0.1, color="green")
    ax.legend(loc="best")
    plt.savefig('validation_curve' + param_name + '.png')
예제 #12
0
def plot_validation_curve(estimator, X, y, param_name, param_range, 
                          title=None, ylim=None, cv=10, n_jobs=1,
                          scoring="accuracy"):
    
    train_scores, test_scores = validation_curve(estimator, X, y, 
                                                 param_name=param_name,
                                                 param_range=param_range,
                                                 cv=cv, scoring=scoring,
                                                 n_jobs=n_jobs)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.title("Validation Curve")
    plt.xlabel(param_name)
    plt.ylabel("Score")

    if ylim is not None:
        plt.ylim(*ylim)
    else:
        plt.ylim(0.0, 1.1)
    plt.semilogx(param_range, train_scores_mean, label="Training score", color="r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2, color="r")
    plt.semilogx(param_range, test_scores_mean, label="Cross-validation score",
                 color="g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")
    if title:
        plt.title(title)
    plt.legend(loc="best")
    plt.show()
    return plt
예제 #13
0
def ModelComplexity(X, y):
    """ Calculates the performance of the model as model complexity increases.
    The learning and testing errors rates are then plotted. """
                    
    # Create 10 cross-validation sets for training and testing
    cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0)

    # Calculate the training and testing scores
    #alpha_range = np.logspace(0.1, 1,num = 10, base = 0.1)
    alpha_range = np.arange(0.1, 1, 0.1)
    train_scores, test_scores = curves.validation_curve(Ridge(), X, y, \
         param_name = "alpha", param_range = alpha_range, cv = cv, scoring = 'r2')

    # Find the mean and standard deviation for smoothing
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    # Plot the validation curve
    pl.figure(3)
    pl.title('LinearRegression Complexity Performance')
    pl.plot(alpha_range, train_mean, 'o-', color = 'r', label = 'Training Score')
    pl.plot(alpha_range,test_mean, 'o-', color = 'g', label = 'Validation Score')
    pl.fill_between(alpha_range, train_mean - train_std, \
        train_mean + train_std, alpha = 0.15, color = 'r')
    pl.fill_between(alpha_range, test_mean - test_std, \
    test_mean + test_std, alpha = 0.15, color = 'g')
    
    # Visual aesthetics
    pl.legend(loc = 'lower right')
    pl.xlabel('alpha_range')
    pl.ylabel('Score')
    pl.ylim([0.5000,1.0000])
    pl.show()
예제 #14
0
def ModelComplexity(X, y):
    """ Calculates the performance of the model as model complexity increases.
        The learning and testing errors rates are then plotted. """

    # Create 10 cross-validation sets for training and testing
    cv = ShuffleSplit(X.shape[0], n_iter=10, test_size=0.2, random_state=0)

    # Vary the max_depth parameter from 1 to 10
    max_depth = np.arange(1, 11)
    # Calculate the training and testing scores
    train_scores, test_scores = curves.validation_curve(DecisionTreeRegressor(), X, y, \
        param_name = "max_depth", param_range = max_depth, cv = cv, scoring = 'r2')

    # Find the mean and standard deviation for smoothing
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    # Plot the validation curve
    pl.figure(figsize=(7, 5))
    pl.title('Decision Tree Regressor Complexity Performance')
    pl.plot(max_depth, train_mean, 'o-', color='r', label='Training Score')
    pl.plot(max_depth, test_mean, 'o-', color='g', label='Validation Score')
    pl.fill_between(max_depth, train_mean - train_std, \
        train_mean + train_std, alpha = 0.15, color = 'r')
    pl.fill_between(max_depth, test_mean - test_std, \
        test_mean + test_std, alpha = 0.15, color = 'g')

    # Visual aesthetics
    pl.legend(loc='lower right')
    pl.xlabel('Maximum Depth')
    pl.ylabel('Score')
    pl.ylim([-0.05, 1.05])
    pl.show()
def plot_validation_curve(logreg, X_train, Y_train):
    param_range = [0.05,0.1,0.5,1,5,10,20,50,100,250,500,1000,2500,5000,7500,10000]
    train_scores, test_scores = validation_curve(LogisticRegression(solver = 'newton-cg',
                                                                    fit_intercept = True,
                                                                    class_weight = None),
                                                 X_train, Y_train, param_name = "C",
                                                 param_range = param_range, cv = 7,
                                                 scoring = "accuracy", n_jobs = -1,
                                                 verbose = True)
    train_scores_mean = np.mean(train_scores, axis = 1)
    train_scores_std = np.std(train_scores, axis = 1)
    test_scores_mean = np.mean(test_scores, axis = 1)
    test_scores_std = np.mean(test_scores, axis = 1)
    
    plt.title("Validation Curve with Logistic Rgression", size = 15)
    plt.xlabel("C", size = 15)
    plt.ylabel("Score", size = 15)
    plt.xticks(size = 12)
    plt.yticks(size = 12)
    plt.ylim(0.94,0.946)
    plt.plot(param_range, train_scores_mean, label = "Training Score", color = "r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std, 
                     train_scores_mean + train_scores_std, alpha = 0.2, color = "r")
    plt.plot(param_range, test_scores_mean, label = "Cross-Validation Score",
                 color = "g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std, 
                     test_scores_mean + test_scores_std, alpha = 0.2, color = "g")
    plt.legend(loc = "best")
    plt.savefig('plot_validation_curve_rf_asis')
    plt.show()
예제 #16
0
def plot_val_curve(features, labels, model):
    p_range = np.logspace(-5, 5, 5)

    train_scores, test_scores = validation_curve(model,
                                                 features,
                                                 labels,
                                                 param_name="gamma",
                                                 param_range=p_range,
                                                 cv=6,
                                                 scoring="accuracy",
                                                 n_jobs=1)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.title("Validation Curve")
    plt.xlabel("$\gamma$")
    plt.ylabel("Score")
    plt.semilogx(p_range,
                 train_scores_mean,
                 label="Training score",
                 color="#E29539")
    plt.semilogx(p_range,
                 test_scores_mean,
                 label="Cross-validation score",
                 color="#94BA65")
    plt.legend(loc="best")
    plt.show()
예제 #17
0
def plot_training_curve(model, X, y):
    params = ["min_samples_leaf", "min_samples_split"]
    p_range = [2, 4, 8, 10, 12, 14, 16, 18, 20]
    # [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

    for param in params:
        print("plotting validation curve...")
        train_scores, valid_scores = validation_curve(
            model,
            X,
            y,
            param_name=param,
            param_range=p_range,
            cv=3,
            scoring='mean_absolute_error')
        train_scores_mean = np.absolute(np.mean(train_scores, axis=1))
        valid_scores_mean = np.absolute(np.mean(valid_scores, axis=1))
        plt.title("Validation Curve with GBM")
        plt.xlabel(param)
        plt.ylabel("MAE")

        plt.plot(p_range,
                 train_scores_mean,
                 label="Training Error",
                 color="r",
                 marker='o')
        plt.plot(p_range,
                 valid_scores_mean,
                 label="Cross-validation Error",
                 color="g",
                 marker='s')
        plt.legend(loc="best")
        plt.show()
예제 #18
0
def plot_validation_curve(estimator, X, y, title, param_name, param_range, cv = 10):
    train_scores, test_scores = validation_curve(
    estimator, X, y, param_name=param_name, param_range=param_range,
    cv=cv, scoring="accuracy", n_jobs=1)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.figure()
    plt.title(title)
    plt.xlabel(param_name)
    plt.ylabel("Score")
    plt.ylim(0.0, 1.1)
    #plt.semilogx(param_range, train_scores_mean, label="Training score", color="r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2, color="r")
    #plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")
    plt.plot(param_range, train_scores_mean, 'o-', color="r", label="Training score")
    plt.plot(param_range, test_scores_mean, 'o-', color="g", label="Cross-validation score")

    plt.legend(loc="best")
    return plt
def plot_validation_curve(estimator, X, y, param_name, param_range):
  
    train_scores, test_scores = validation_curve(estimator, X, y, param_name, param_range)
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.figure()
    plt.plot(param_range, train_mean, 
         color='blue', marker='o', 
         markersize=5, label='training accuracy')

    plt.fill_between(param_range, train_mean + train_std,
                 train_mean - train_std, alpha=0.15,
                 color='blue')

    plt.plot(param_range, test_mean, 
         color='green', linestyle='--', 
         marker='s', markersize=5, 
         label='validation accuracy')

    plt.fill_between(param_range, 
                 test_mean + test_std,
                 test_mean - test_std, 
                 alpha=0.15, color='green')

    plt.grid()
    plt.xscale('log')
    plt.legend(loc='lower right')
    plt.xlabel('Parameter C')
    plt.ylabel('Accuracy')
    plt.ylim([0., 1.0])
    plt.tight_layout()
    plt.show()
def plot_validation_curve(clf, X_train, Y_train):
    param_range = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
    train_scores, test_scores = validation_curve(KNeighborsClassifier(algorithm = 'ball_tree',
                                                                    weights = 'uniform'),
                                                 X_train, Y_train, param_name = "n_neighbors",
                                                 param_range = param_range, cv = 7,
                                                 scoring = "accuracy", n_jobs = -1,
                                                 verbose = True)
    train_scores_mean = np.mean(train_scores, axis = 1)
    train_scores_std = np.std(train_scores, axis = 1)
    test_scores_mean = np.mean(test_scores, axis = 1)
    test_scores_std = np.mean(test_scores, axis = 1)
    
    plt.title("Validation Curve with KNN", size = 15)
    plt.xlabel("n_neighbors", size = 15)
    plt.ylabel("Score", size = 15)
    plt.xticks(size = 12)
    plt.yticks(size = 12)
    plt.ylim(0.8,1.0)
    plt.plot(param_range, train_scores_mean, label = "Training Score", color = "r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std, 
                     train_scores_mean + train_scores_std, alpha = 0.2, color = "r")
    plt.plot(param_range, test_scores_mean, label = "Cross-Validation Score",
                 color = "g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std, 
                     test_scores_mean + test_scores_std, alpha = 0.2, color = "g")
    plt.legend(loc = "best")
    plt.savefig('plot_validation_curve_rf_asis')
    plt.show()
예제 #21
0
파일: utils.py 프로젝트: Pazitos10/DSS
def plot_validation_curve(estimator, X, y, param_name, param_range,
                      ylim=(0, 1.1), cv=None, n_jobs=-1, scoring=None, 
                      filename=None):
    plt.clf()
    estimator_name = type(estimator).__name__
    plt.title("Validation curves for %s on %s"
          % (param_name, estimator_name))
    plt.grid()
    plt.xlabel(param_name)
    plt.ylabel("Score")
    plt.xlim(min(param_range), max(param_range))
    plt.ylim(*ylim) 

    train_scores, test_scores = validation_curve(
        estimator, X, y, param_name, param_range,
        cv=cv, n_jobs=n_jobs, scoring=scoring)
    
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.semilogx(param_range, train_scores_mean, 'o-', color="r",
             label="Training score")
    plt.semilogx(param_range, test_scores_mean, 'o-', color="g",
             label="Cross-validation score")
    plt.legend(loc="best")
    print("Best test score: {:.4f}".format(test_scores_mean[-1]))
    save_plot("validation_curve_"+str(filename)+".png")
예제 #22
0
def main():

    S, col_names_S = load_data(config.paths.training_data,
                               config.paths.cache_folder)
    Xs, Ys, col_names_S = extract_xy(S, col_names_S)

    a = RandomForestClassifier(n_estimators=1)
    a.fit(Xs.toarray(), Ys.toarray().ravel())
    best_features = a.feature_importances_
    max_ind, max_val = max(enumerate(best_features), key=operator.itemgetter(1))
    print best_features
    print max_ind, max_val

    print Xs.shape
    print Ys.shape
    param_range = [1, 3, 5, 7, 10, 15, 20, 30, 60, 80]
    train_scores, test_scores = validation_curve(RandomForestClassifier(criterion='entropy'), Xs, Ys.toarray().ravel(),
                                                 'n_estimators', param_range)

    print train_scores
    print test_scores
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.title("Validation Curve for Random Forest")
    plt.xlabel("Number of Trees")
    plt.ylabel("Score")
    plt.plot(param_range, train_mean, label="Training Score", color='r')
    plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, alpha=0.2, color='r')
    plt.plot(param_range, test_mean, label="Test Score", color='b')
    plt.fill_between(param_range, test_mean - test_std, test_mean + test_std, alpha=0.2, color='b')
    plt.legend(loc="best")
    plt.show()
예제 #23
0
def validation_curve_analysis(estimator=None, param_name=None, param_range=None, issues_train=None,
                              priority_train=None):
    """
    Generates the validation curve for a specific estimator.
    :param estimator: Estimator.
    :param param_name: Name of the parameter.
    :param param_range: Range of the parameters to consider.
    :param issues_train: Train issues.
    :param priority_train: Train priorities.
    :return: None.
    """
    train_scores, test_scores = validation_curve(estimator=estimator, X=issues_train, y=priority_train,
                                                 param_name=param_name, param_range=param_range, cv=10)
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    _, _ = plt.subplots(figsize=(2.5, 2.5))
    plt.plot(param_range, train_mean, color='blue', marker='o', markersize=5, label='training accuracy')
    plt.fill_between(param_range, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')

    plt.plot(param_range, test_mean, color='green', linestyle='--', marker='s', markersize=5,
             label='validation accuracy')
    plt.fill_between(param_range, test_mean + test_std, test_mean - test_std, alpha=0.15, color='green')

    plt.grid()
    plt.xscale('log')
    plt.xlabel('Parameter ' + param_name)
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.show()
예제 #24
0
def deviance_curve(classifier, features, labels, metaparameter_name, param_range, metric='Accuracy',
                   n_folds=4, njobs=-1, fig_size=(16, 9)):

    training_scores, validation_scores = validation_curve(classifier,
                                                      features, labels,
                                                      metaparameter_name,
                                                      param_range,
                                                      n_jobs=njobs,
                                                      cv=n_folds, scoring=metric)

    training_scores_mean = np.mean(training_scores, axis=1)
    training_scores_std = np.std(training_scores, axis=1)
    validation_scores_mean = np.mean(validation_scores, axis=1)
    validation_scores_std = np.std(validation_scores, axis=1)
    sns.set_style("darkgrid")
    sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})
    plt.figure(num=None, figsize=fig_size, dpi=600, facecolor='w', edgecolor='k')
    plt.title("Validation Curve")
    plt.xlabel(metaparameter_name)
    plt.ylabel(metric)
    plt.xlim(np.min(param_range), np.max(param_range))
    plt.plot(param_range, training_scores_mean, label="Training " + metric, color="mediumblue")
    plt.fill_between(param_range, training_scores_mean - training_scores_std,
                     training_scores_mean + training_scores_std, alpha=0.2, color="lightskyblue")
    plt.plot(param_range, validation_scores_mean, label="validation " + metric,
                 color="coral")
    plt.fill_between(param_range, validation_scores_mean - validation_scores_std,
                     validation_scores_mean + validation_scores_std, alpha=0.2, color="lightcoral")
    plt.legend(loc="best")
    plt.show()
예제 #25
0
    def plot_validation_curve(self, estimator, X_train, y_train, param_range, param_name, cv=None, ylim=None, figure_title="Validation Curve"):
        """

        :param estimator:
        :param X_train: inputs
        :param y_train: class labels
        :param param_range: list , e.g. [1, 2, 3, 4]
        :param param_name: String
        :param cv: integer, number of folds
        :param y_axis_limit: list for the ylim , e.g. [0.8, 1.0]
        :return: plot figure
        """
        train_scores, test_scores = validation_curve(estimator, X_train, y_train, param_name, param_range, cv)
        train_mean = np.mean(train_scores, axis=1)
        train_std = np.std(train_scores, axis=1)
        test_mean = np.mean(test_scores, axis=1)
        test_std = np.std(test_scores, axis=1)
        plt.plot(param_range, train_mean,
                 color='blue', marker='o',
                 markersize=5,
                 label='training accuracy')
        plt.fill_between(param_range, train_mean + train_std, train_mean-train_std, alpha=0.15, color='blue')
        plt.plot(param_range, test_mean, color='green', linestyle='--', marker='s', markersize=5, label='validation accuracy')
        plt.fill_between(param_range, test_mean + test_std, test_mean - test_std, alpha=0.15, color='green')
        plt.title(figure_title)
        plt.grid()
        #plt.xscale('log')
        plt.legend(loc='lower right')
        plt.xlabel(param_name) ## or replace with a meaningful name ?
        plt.ylabel('Accuracy')
        if ylim is not None:
            plt.ylim(ylim)
        plt.show()
예제 #26
0
def plot_validation_curve(classifier,xTrain,yTrain,paramName,paramRange):

    train_scores, test_scores = validation_curve(
        classifier, xTrain, yTrain, param_name=paramName, param_range=paramRange,
        cv=3, scoring="log_loss", n_jobs=-1,verbose=1)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.title("Validation Curve")
    plt.xlabel(paramName)
    plt.ylabel("Score")

    plt.plot(paramRange, train_scores_mean, 'o-', label="Training score", color="r")
    plt.fill_between(paramRange, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2, color="r")

    plt.plot(paramRange, test_scores_mean, 'o-', label="Cross-validation score",
                 color="g")
    plt.fill_between(paramRange, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")
    plt.legend(loc="best")
    plt.show()
예제 #27
0
파일: HW4_wei.py 프로젝트: wjiang16/CMM
def tune_parameter():
    pipe_lr = Pipeline([('scl',StandardScaler()), ('clf', LogisticRegression(penalty='l2')) ])
    # print pipe_lr.get_params().keys()
    param_range =np.arange(0.0001,0.1,0.001)

    train_scores, test_scores = validation_curve(
        estimator= pipe_lr,
        X = df_train,
        y = y_train,
        param_name= 'clf__C',
        param_range= param_range,
        cv = 10
    )
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.plot(param_range, train_mean, color = 'blue',marker ='o', markersize = 5, label = 'training accuray')

    plt.fill_between(param_range, train_mean + train_std, train_mean - train_std, alpha = 0.15, color ='blue')

    plt.plot(param_range, test_mean, color = 'red',marker ='s', markersize = 5, label = 'validation accuray')

    plt.fill_between(param_range, test_mean + test_std, test_mean - test_std, alpha = 0.15, color ='red')

    plt.grid()

    # plt.xscale('log')
    plt.legend(loc = 'lower right')
    plt.xlabel('Parameter C')
    plt.ylabel('Accurary')
    plt.ylim([0,1])
    plt.show()
예제 #28
0
파일: cv.py 프로젝트: hideki1234/kaggle101
def main():
    # loading training data
    data = pd.read_csv('../input/train.csv')
    X_tr = data.values[:, 1:].astype(float)
    X_tr = normalizeX(X_tr)
    y_tr = data.values[:, 0]

    param_range = np.array(
            [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0])
    train_scores, test_scores = validation_curve(
            digit_recognizer(maxiter=100), X_tr, y_tr,
            param_name='lambda_', param_range=param_range,
            cv=3, scoring='accuracy', n_jobs=2)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.title("Validation Curve with Neural Network")
    plt.xlabel("$\lambda$")
    plt.ylabel("Score")
    plt.ylim(0.85, 1.05)
    plt.semilogx(param_range, train_scores_mean, label="Training score", color="r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2, color="r")
    plt.semilogx(param_range, test_scores_mean, label="Cross-validation score",
                 color="g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")
    plt.legend(loc="best")
    plt.show()
예제 #29
0
def plot_validation_curve(estimator, X, y, param_name, param_range, addition_graph_points, graph_title, graph_xlabel, graph_ylabel, ylim, cv=5, scoring="accuracy"):
	
	cv_train_scores, cv_test_scores = validation_curve(estimator, X, y, param_name=param_name, param_range=param_range, cv=cv, scoring=scoring)

	cv_train_scores_mean = np.mean(cv_train_scores, axis=1)
	cv_train_scores_std = np.std(cv_train_scores, axis=1)
	cv_test_scores_mean = np.mean(cv_test_scores, axis=1)
	cv_test_scores_std = np.std(cv_test_scores, axis=1)

	plt.title(graph_title)
	plt.xlabel(graph_xlabel)
	plt.ylabel(graph_ylabel)
	plt.ylim(*ylim)

	plt.fill_between(param_range, cv_train_scores_mean - cv_train_scores_std, cv_train_scores_mean + cv_train_scores_std, alpha=0.1, color="r")
	plt.fill_between(param_range, cv_test_scores_mean - cv_test_scores_std,cv_test_scores_mean + cv_test_scores_std, alpha=0.1, color="b")
	plt.plot(param_range, cv_train_scores_mean, 'o-', color="r", label="Cross Validation Training score")
	plt.plot(param_range, cv_test_scores_mean, 'o-', color="b",label="Cross Validation Test Score")
	
	for gp in addition_graph_points:
		plt.plot(param_range, gp['data'], 'o-', color=gp['color'],label=gp['label'])

	plt.legend(loc="best")
	plt.savefig('plots/'+graph_title+'.png')
	plt.close()
예제 #30
0
def plot_validation_curve(estimator, X, y, param_name, param_range=np.logspace(-6, -1, 5), title=None, ylim=None, cv=None, n_jobs=1, scoring=None):
    plt.figure(figsize=(20,10))
    #param_range = np.logspace(-6, -1, 5)
    train_scores, test_scores = validation_curve(
        estimator, X, y, param_name=param_name, param_range=param_range,
        cv=cv, scoring=scoring, n_jobs=n_jobs)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.title(title)
    plt.xlabel(param_name)
    plt.ylabel("Score")
    if ylim is not None:
        plt.ylim(*ylim)
    plt.semilogx(param_range, train_scores_mean, label="Training score", color="r")
    plt.fill_between(param_range, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2, color="r")
    plt.semilogx(param_range, test_scores_mean, label="Cross-validation score",
                 color="g")
    plt.fill_between(param_range, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")
    plt.legend(loc="best")
    plt.show()
    print("Stratified 3-Fold cross-validation \ntrain_scores_mean:")
    print(train_scores_mean)
    print("test_scores_mean:")
    print(test_scores_mean)
    return plt
def plot_val_curve(features, labels, model):
    p_range = np.logspace(-5, 5, 5)

    train_scores, test_scores = validation_curve(model,
                                                 features,
                                                 labels,
                                                 param_name='gamma',
                                                 param_range=p_range,
                                                 cv=6,
                                                 scoring='accuracy',
                                                 n_jobs=1)

    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    plt.title('Validation Curve')
    plt.xlabel('$\gamma$')
    plt.ylabel('Score')
    plt.semilogx(p_range,
                 train_scores_mean,
                 label='Training score',
                 color='#E29539')
    plt.semilogx(p_range,
                 test_scores_mean,
                 label='Cross-validation score',
                 color='#94BA65')
    plt.legend(loc='best')
    plt.savefig('figures/val_curve.png', transparent=True)
예제 #32
0
def addressing_over_under_fitting():
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data',
        header=None)
    X = df.loc[:, 2:].values
    y = df.loc[:, 1].values
    le = LabelEncoder()
    y = le.fit_transform(y)
    le.transform(['M', 'B'])
    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.20, random_state=1)
    pipe_lr = Pipeline([('scl', StandardScaler()),
                        ('clf', LogisticRegression(penalty='l2',
                                                   random_state=0))])

    param_range = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
    train_scores, test_scores = validation_curve(estimator=pipe_lr,
                                                 X=X_train,
                                                 y=y_train,
                                                 param_name='clf__C',
                                                 param_range=param_range,
                                                 cv=10)

    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.plot(param_range,
             train_mean,
             color='blue',
             marker='o',
             markersize=5,
             label='training accuracy')
    plt.fill_between(param_range,
                     train_mean + train_std,
                     train_mean - train_std,
                     alpha=0.15,
                     color='blue')
    plt.plot(param_range,
             test_mean,
             color='green',
             linestyle='--',
             marker='s',
             markersize=5,
             label='validation accuracy')
    plt.fill_between(param_range,
                     test_mean + test_std,
                     test_mean - test_std,
                     alpha=0.15,
                     color='green')
    plt.grid()
    plt.xscale('log')
    plt.legend(loc='lower right')
    plt.xlabel('Parameter C')
    plt.ylabel('Accuracy')
    plt.ylim([0.8, 1.0])
    plt.tight_layout()
    plt.savefig(PL6 + 'validation_curve.png', dpi=300)
예제 #33
0
def validation_curves(model, X, y, n_iter, test_size):
    n_Cs = 10
    Cs = np.logspace(-5, 5, n_Cs)
    cv = ShuffleSplit(X.shape[0], n_iter=n_iter, test_size=test_size, random_state=0)

    train_scores, test_scores = validation_curve(model, X, y, "C", Cs, cv=cv)

    return (Cs, train_scores, test_scores)
예제 #34
0
def model_complexity(features, targets, cv_data):
    '''Produces a figure displaying the model complexity curve--model score performance as a function of parameter tuning '''

    from predictor import performance_metric
    params = ['base_estimator', 'n_estimators', 'learning_rate']
    params_range = [ [DecisionTreeRegressor(max_depth = 2), DecisionTreeRegressor(max_depth = 4), DecisionTreeRegressor(max_depth = 8), DecisionTreeRegressor(max_depth = 16), DecisionTreeRegressor(max_depth = 32)], \
     np.arange(25, 250, 25), np.arange(0.5, 3, 0.5)]
    pcurve = zip(params, params_range)
    scoring_fnc = make_scorer(performance_metric, greater_is_better=False)
    k = 0
    com_fig, com_ax = plt.subplots(1, 3, figsize=(12, 4), dpi=50)
    for pname, prange in pcurve:
        train_scores, test_scores = curves.validation_curve(
            AdaBoostRegressor(),
            features,
            targets,
            param_name=pname,
            param_range=prange,
            cv=cv_data,
            scoring=scoring_fnc)
        train_mean = np.mean(abs(train_scores), axis=1)
        test_mean = np.mean(abs(test_scores), axis=1)
        train_std = np.std(abs(train_scores), axis=1)
        test_std = np.std(abs(test_scores), axis=1)
        if k == 0:
            prange = [2, 4, 8, 16, 32]
        com_ax[k].plot(prange,
                       train_mean,
                       'o-',
                       color='r',
                       label='Training Score')
        com_ax[k].plot(prange,
                       test_mean,
                       'o-',
                       color='g',
                       label='Validation Score')
        com_ax[k].set_ylim((0, 14))
        com_ax[k].fill_between(prange,
                               train_mean - train_std,
                               train_mean + train_std,
                               alpha=0.15,
                               color='r')
        com_ax[k].fill_between(prange,
                               test_mean - test_std,
                               test_mean + test_std,
                               alpha=0.15,
                               color='g')

        if k == 0:
            com_ax[k].set_xlabel('{} max_depth'.format(pname))
            com_ax[k].legend(['Training Score', 'Testing Score'],
                             bbox_to_anchor=(-.16, 1.05))
        else:
            com_ax[k].set_xlabel(pname)
        com_ax[k].set_ylabel('Adjusted Close Price Relative Difference (%)')
        k += 1
    com_fig.savefig('img/' + 'col_plots.png')
    return com_fig, com_ax
예제 #35
0
def my_visual_parameter_tuning(x_train, y_train, pipe, param_name, n_fold,
                               param_range):
    """Shows a plot to see which value for a parameter is the best.

    Args:
        x_train (DataSeries): X variable
        y_train (DataSeries): y variable
        pipe (Pipeline): Pipeline with the input set
        n_fold (int): How often the cross validation is done
        param_name (String): The name of the parameter that should be tested
        param_range (List): List of a range of parameter values
    Returns:
        bool: True if the function worked
    """
    # Visualization Check
    train_scores, test_scores = validation_curve(estimator=pipe,
                                                 X=x_train,
                                                 y=y_train,
                                                 param_name=param_name,
                                                 param_range=param_range,
                                                 cv=n_fold,
                                                 n_jobs=1,
                                                 verbose=1)
    # Get mean and SD
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)
    # Plot Training and test group accuracy
    plt.plot(param_range,
             train_mean,
             color='blue',
             marker='o',
             markersize=5,
             label='training accuracy')
    plt.fill_between(param_range,
                     train_mean + train_std,
                     train_mean - train_std,
                     alpha=0.15,
                     color='blue')
    plt.plot(param_range,
             test_mean,
             color='green',
             linestyle='--',
             marker='s',
             markersize=5,
             label='validation accuracy')
    plt.fill_between(param_range,
                     test_mean + test_std,
                     test_mean - test_std,
                     alpha=0.15,
                     color='green')
    plt.grid()
    plt.legend(loc='best')
    plt.xlabel('Parameter')
    plt.ylabel('Score')
    plt.show()
    return True
예제 #36
0
    def valid_curve(self, col2fit, score='accuracy', verbose=0):
        """
        Plots the validation curve
        """
        self.prepare_data(self.df_full, True, col2fit)
        train_values = self.df_full[col2fit].values
        target_values = self.df_full['rain'].values

        ## Number of cpu to use
        ## Making sure there is one free unless there is only one
        njobs = max(1, int(0.75*multiprocessing.cpu_count()))
        print '\n\nValidating with njobs = {}\n...\n'.format(njobs)

        ## Parameter info is hard-coded for now, should be improved...

        paramater4validation = "n_estimators"
        maxdepth = 15
        param_range = [10, 50, 100, 150, 200, 250, 300, 400, 600, 800, 1000, 1500]

        #paramater4validation = "max_depth"
        #nestimators = 150
        #param_range = [8, 10, 12, 14, 15, 16, 17, 18, 20, 24]
        
        print '\nValidating on {} with ranges:'.format(paramater4validation)
        print param_range

        print 'validating...'
        train_scores, test_scores = validation_curve(
            RandomForestClassifier(max_depth = maxdepth), train_values, target_values,
            param_name=paramater4validation, param_range=param_range,cv=10,
            scoring=score, verbose = verbose, n_jobs=njobs)
        
        #train_scores, test_scores = validation_curve(
        #    RandomForestClassifier(n_estimators = nestimators), train_values, target_values,
        #    param_name=paramater4validation, param_range=param_range,cv=10,
        #    scoring=score, verbose = verbose, n_jobs=njobs)

        ## plotting
        train_scores_mean = N.mean(train_scores, axis=1)
        train_scores_std = N.std(train_scores, axis=1)
        test_scores_mean = N.mean(test_scores, axis=1)
        test_scores_std = N.std(test_scores, axis=1)
        fig = plt.figure()
        plt.title("Validation Curve")
        plt.xlabel(paramater4validation)
        plt.ylabel(score)
        plt.plot(param_range, train_scores_mean, label="Training score", color="r")
        plt.fill_between(param_range, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2, color="r")
        plt.plot(param_range, test_scores_mean, label="Cross-validation score",
             color="g")
        plt.fill_between(param_range, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.2, color="g")
        plt.grid()
        plt.legend(loc='best')
        fig.show()
        raw_input('press enter when finished...')
예제 #37
0
파일: visualize.py 프로젝트: phani111/sml
def validationCurves(model, X_test, y_test, web):
    from sklearn.learning_curve import validation_curve
    import numpy as np

    param_range = np.arange(0, 5)

    # Generating Validation Curve
    sucess = False
    v_train_scores, v_test_scores = None, None
    for params in model.get_params().keys():  # TODO: Fix Weird Bug
        try:
            v_train_scores, v_test_scores = validation_curve(
                model, X_test, y_test, param_range=param_range)
            sucess = True
        except:
            pass
        if sucess: break

    fig, ax = plt.subplots()

    ax.set_xlabel("Validation examples")
    ax.set_ylabel("Score")
    print(v_train_scores)
    v_train_scores_mean = np.mean(v_train_scores, axis=1)
    v_train_scores_std = np.std(v_train_scores, axis=1)
    v_test_scores_mean = np.mean(v_test_scores, axis=1)
    v_test_scores_std = np.std(v_test_scores, axis=1)

    ax.fill_between(param_range,
                    v_train_scores_mean - v_train_scores_std,
                    v_train_scores_mean + v_train_scores_std,
                    alpha=0.1,
                    color="orange")
    ax.fill_between(param_range,
                    v_test_scores_mean - v_test_scores_std,
                    v_test_scores_mean + v_test_scores_std,
                    alpha=0.1,
                    color="purple")

    ax.plot(param_range,
            v_train_scores_mean,
            'o-',
            color="orange",
            label="Training score")

    ax.plot(param_range,
            v_test_scores_mean,
            'o-',
            color="purple",
            label="Cross-validation score")

    ax.legend(loc="best")
    if web:
        return fig

    plt.show()
    plt.close()
예제 #38
0
파일: ch9util.py 프로젝트: JASON9620/python
def validate(est, X, y, pname, prange):
    est_cp = deepcopy(est)

    return validation_curve(est_cp,
                            X,
                            y,
                            param_name=pname,
                            param_range=prange,
                            n_jobs=1)
예제 #39
0
    def drawValidationCurve(self):
        """
        To draw the validation curve
        :return:NA
        """
        X, y = self.X_train, self.y_train.ravel()
        indices = np.arange(y.shape[0])
        #np.random.shuffle(indices)
        X, y = X[indices], y[indices]

        train_sizes = range(2, 100, 2)
        train_scores, valid_scores = validation_curve(
            self.regr,
            X,
            y,
            "n_neighbors",
            train_sizes,
            cv=5,
            scoring='mean_squared_error')
        train_scores = -1.0 / 5 * train_scores
        valid_scores = -1.0 / 5 * valid_scores

        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        valid_scores_mean = np.mean(valid_scores, axis=1)
        valid_scores_std = np.std(valid_scores, axis=1)

        plt.fill_between(train_sizes,
                         train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std,
                         alpha=0.1,
                         color="r")
        plt.fill_between(train_sizes,
                         valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std,
                         alpha=0.1,
                         color="g")
        plt.plot(train_sizes,
                 train_scores_mean,
                 'o-',
                 color="r",
                 label="Training MSE")
        plt.plot(train_sizes,
                 valid_scores_mean,
                 '*-',
                 color="g",
                 label="Cross-validation MSE")

        plt.legend(loc="best")

        plt.xlabel('K Neighbors')
        plt.ylabel('MSE')
        plt.title(
            'Validation Curve with KNN Regression on the parameter of K Neighbors'
        )
        plt.grid(True)
        plt.show()
예제 #40
0
def validation_curve_analysis(estimator=None,
                              param_name=None,
                              param_range=None,
                              issues_train=None,
                              priority_train=None):
    """
    Generates the validation curve for a specific estimator.
    :param estimator: Estimator.
    :param param_name: Name of the parameter.
    :param param_range: Range of the parameters to consider.
    :param issues_train: Train issues.
    :param priority_train: Train priorities.
    :return: None.
    """
    train_scores, test_scores = validation_curve(estimator=estimator,
                                                 X=issues_train,
                                                 y=priority_train,
                                                 param_name=param_name,
                                                 param_range=param_range,
                                                 cv=10)
    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    _, _ = plt.subplots(figsize=(2.5, 2.5))
    plt.plot(param_range,
             train_mean,
             color='blue',
             marker='o',
             markersize=5,
             label='training accuracy')
    plt.fill_between(param_range,
                     train_mean + train_std,
                     train_mean - train_std,
                     alpha=0.15,
                     color='blue')

    plt.plot(param_range,
             test_mean,
             color='green',
             linestyle='--',
             marker='s',
             markersize=5,
             label='validation accuracy')
    plt.fill_between(param_range,
                     test_mean + test_std,
                     test_mean - test_std,
                     alpha=0.15,
                     color='green')

    plt.grid()
    plt.xscale('log')
    plt.xlabel('Parameter ' + param_name)
    plt.ylabel('Accuracy')
    plt.legend(loc='lower right')
    plt.show()
    def drawValidationCurve_maxdepth(self):
        """
        To draw the validation curve
        :return:NA
        """
        X, y = self.X_train, self.y_train.ravel()
        indices = np.arange(y.shape[0])
        #np.random.shuffle(indices)
        X, y = X[indices], y[indices]

        train_sizes = range(1, 60)
        train_scores, valid_scores = validation_curve(
            self.model,
            X,
            y,
            "max_depth",
            train_sizes,
            cv=5,
            scoring='mean_squared_error')
        train_scores = -1.0 / 5 * train_scores
        valid_scores = -1.0 / 5 * valid_scores

        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        valid_scores_mean = np.mean(valid_scores, axis=1)
        valid_scores_std = np.std(valid_scores, axis=1)

        plt.fill_between(train_sizes,
                         train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std,
                         alpha=0.1,
                         color="r")
        plt.fill_between(train_sizes,
                         valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std,
                         alpha=0.1,
                         color="g")
        plt.plot(train_sizes,
                 train_scores_mean,
                 'o-',
                 color="r",
                 label="Training MSE")
        plt.plot(train_sizes,
                 valid_scores_mean,
                 '*-',
                 color="g",
                 label="Cross-validation MSE")

        plt.legend(loc="best")

        plt.xlabel('Max Depth')
        plt.ylabel('MSE')
        plt.title(
            'Validation Curve with Random Forest Regression \non the parameter of Max Depth when n_estimators=32'
        )
        plt.grid(True)
        plt.show()
예제 #42
0
def test_validation_curve_clone_estimator():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)

    param_range = np.linspace(1, 0, 10)
    _, _ = validation_curve(
        MockEstimatorWithSingleFitCallAllowed(), X, y,
        param_name="param", param_range=param_range, cv=2
    )
예제 #43
0
def test_validation_curve_clone_estimator():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)

    param_range = np.linspace(1, 0, 10)
    _, _ = validation_curve(
        MockEstimatorWithSingleFitCallAllowed(), X, y,
        param_name="param", param_range=param_range, cv=2
    )
예제 #44
0
def test_validation_curve():
    X, y = make_classification(n_samples=2, n_features=1, n_informative=1,
                               n_redundant=0, n_classes=2,
                               n_clusters_per_class=1, random_state=0)
    param_range = np.linspace(0, 1, 10)
    train_scores, test_scores = validation_curve(MockEstimatorWithParameter(),
                                                 X, y, param_name="param",
                                                 param_range=param_range, cv=2)
    assert_array_almost_equal(train_scores.mean(axis=1), param_range)
    assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range)
예제 #45
0
def validation_curve_plot(clf_class, x, y, parameter_range, parameter_name):
    """
    returns validation curve plot
    clf = any algorithms can be used. Example: RandomForestClassifier()
    standardization = True or False. False as default. Better to standardize features before trainning for many algorithms
    """
    clf = clf_class
    param_range = parameter_range
    train_scores, test_scores = validation_curve(estimator=clf,
                                                 X=x,
                                                 y=y,
                                                 param_name=parameter_name,
                                                 param_range=param_range,
                                                 cv=10)

    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.plot(param_range,
             train_mean,
             color='blue',
             marker='o',
             markersize=5,
             label='training accuracy')

    plt.fill_between(param_range,
                     train_mean + train_std,
                     train_mean - train_std,
                     alpha=0.15,
                     color='blue')

    plt.plot(param_range,
             test_mean,
             color='green',
             linestyle='--',
             marker='s',
             markersize=5,
             label='validation accuracy')

    plt.fill_between(param_range,
                     test_mean + test_std,
                     test_mean - test_std,
                     alpha=0.15,
                     color='green')

    plt.grid()
    plt.xscale('log')
    plt.legend(loc='lower right')
    plt.xlabel('Parameter C')
    plt.ylabel('Accuracy')
    plt.ylim([0.8, 1.0])
    plt.tight_layout()
    plt.show()
예제 #46
0
def plot_validation_curve(estimator,
                          title,
                          X,
                          y,
                          param_name,
                          param_range,
                          ylim=None):
    """
    :param estimator: sklearn regressor object
    :param title: Title of the curve
    :param X: predictors
    :param y: response
    :param param_name: parameter of the regression obj to do cross validation on, ex:Number of trees for RF
    :param param_range: range of values of the parameter
    :param ylim:
    :return: plots the validation curve for the parameter
    """
    plt.figure()
    plt.title(title)
    if ylim is not None:
        plt.ylim(*ylim)
    plt.xlabel(param_name)
    plt.ylabel("RMSLE")
    rsmle_score = make_scorer(rsmle_, greater_is_better=True)
    train_scores, test_scores = validation_curve(estimator,
                                                 X,
                                                 y,
                                                 param_name=param_name,
                                                 param_range=param_range,
                                                 cv=5,
                                                 scoring=rsmle_score,
                                                 n_jobs=1)
    print "cross validation done...plotting the graph"
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    plt.grid()
    plt.plot(param_range, train_scores_mean, label="Training score", color="r")
    plt.fill_between(param_range,
                     train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std,
                     alpha=0.2,
                     color="r")
    plt.plot(param_range,
             test_scores_mean,
             label="Cross-validation score",
             color="g")
    plt.fill_between(param_range,
                     test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std,
                     alpha=0.2,
                     color="g")
    plt.legend(loc="best")
    plt.show()
예제 #47
0
def validation_curves(model, X, y, n_iter, test_size):
    n_Cs = 10
    Cs = np.logspace(-5, 5, n_Cs)
    cv = ShuffleSplit(X.shape[0],
                      n_iter=n_iter,
                      test_size=test_size,
                      random_state=0)

    train_scores, test_scores = validation_curve(model, X, y, 'C', Cs, cv=cv)

    return (Cs, train_scores, test_scores)
예제 #48
0
def test_validation_curve():
    '''
    测试 validation_curve 的用法 。验证对于 LinearSVC 分类器 , C 参数对于预测准确率的影响

    :return:  None
    '''
    ### 加载数据
    digits = load_digits()
    X, y = digits.data, digits.target
    #### 获取验证曲线 ######
    param_name = "C"
    param_range = np.logspace(-2, 2)
    train_scores, test_scores = validation_curve(LinearSVC(),
                                                 X,
                                                 y,
                                                 param_name=param_name,
                                                 param_range=param_range,
                                                 cv=10,
                                                 scoring="accuracy")
    ###### 对每个 C ,获取 10 折交叉上的预测得分上的均值和方差 #####
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)
    ####### 绘图 ######
    fig = plt.figure()
    ax = fig.add_subplot(1, 1, 1)

    ax.semilogx(param_range,
                train_scores_mean,
                label="Training Accuracy",
                color="r")
    ax.fill_between(param_range,
                    train_scores_mean - train_scores_std,
                    train_scores_mean + train_scores_std,
                    alpha=0.2,
                    color="r")
    ax.semilogx(param_range,
                test_scores_mean,
                label="Testing Accuracy",
                color="g")
    ax.fill_between(param_range,
                    test_scores_mean - test_scores_std,
                    test_scores_mean + test_scores_std,
                    alpha=0.2,
                    color="g")

    ax.set_title("Validation Curve with LinearSVC")
    ax.set_xlabel("C")
    ax.set_ylabel("Score")
    ax.set_ylim(0, 1.1)
    ax.legend(loc='best')
    plt.show()
    def drawValidationCurve(self):
        """
        To draw the validation curve
        :return:NA
        """
        X, y = self.X_train, self.y_train.ravel()
        indices = np.arange(y.shape[0])
        np.random.shuffle(indices)
        X, y = X[indices], y[indices]

        train_sizes = range(2, 60)
        train_scores, valid_scores = validation_curve(self.clf,
                                                      X,
                                                      y,
                                                      "max_depth",
                                                      train_sizes,
                                                      cv=5)

        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        valid_scores_mean = np.mean(valid_scores, axis=1)
        valid_scores_std = np.std(valid_scores, axis=1)

        plt.fill_between(train_sizes,
                         train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std,
                         alpha=0.1,
                         color="r")
        plt.fill_between(train_sizes,
                         valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std,
                         alpha=0.1,
                         color="g")
        plt.plot(train_sizes,
                 train_scores_mean,
                 'o-',
                 color="r",
                 label="Training Precision")
        plt.plot(train_sizes,
                 valid_scores_mean,
                 '*-',
                 color="g",
                 label="Cross-validation Precision")

        plt.legend(loc="best")

        plt.xlabel('Max Depth(log2(all features) to be considered) ')
        plt.ylabel('Precision')
        plt.title(
            'Validation Curve with Decision Tree on the parameter of Max Depth'
        )
        plt.grid(True)
        plt.show()
    def drawValidationCurve(self):
        """
        To draw the validation curve
        :return:NA
        """
        X, y = self.X_train, self.y_train.ravel()
        indices = np.arange(y.shape[0])
        np.random.shuffle(indices)
        X, y = X[indices], y[indices]

        train_sizes = range(2, 75)
        train_scores, valid_scores = validation_curve(self.ada,
                                                      X,
                                                      y,
                                                      "n_estimators",
                                                      train_sizes,
                                                      cv=5)

        train_scores_mean = np.mean(train_scores, axis=1)
        train_scores_std = np.std(train_scores, axis=1)
        valid_scores_mean = np.mean(valid_scores, axis=1)
        valid_scores_std = np.std(valid_scores, axis=1)

        plt.fill_between(train_sizes,
                         train_scores_mean - train_scores_std,
                         train_scores_mean + train_scores_std,
                         alpha=0.1,
                         color="r")
        plt.fill_between(train_sizes,
                         valid_scores_mean - valid_scores_std,
                         valid_scores_mean + valid_scores_std,
                         alpha=0.1,
                         color="g")
        plt.plot(train_sizes,
                 train_scores_mean,
                 'o-',
                 color="r",
                 label="Training Precision")
        plt.plot(train_sizes,
                 valid_scores_mean,
                 '*-',
                 color="g",
                 label="Cross-validation Precision")

        plt.legend(loc="best")

        plt.xlabel('Estimators')
        plt.ylabel('Precision')
        plt.title(
            'Validation Curve with AdaBoost-DecisionTree on the parameter of Estimators'
        )
        plt.grid(True)
        plt.show()
예제 #51
0
def test_validation_curve():
    X, y = make_classification(
        n_samples=2, n_features=1, n_informative=1, n_redundant=0, n_classes=2, n_clusters_per_class=1, random_state=0
    )
    param_range = np.linspace(0, 1, 10)
    with warnings.catch_warnings(record=True) as w:
        train_scores, test_scores = validation_curve(
            MockEstimatorWithParameter(), X, y, param_name="param", param_range=param_range, cv=2
        )
    if len(w) > 0:
        raise RuntimeError("Unexpected warning: %r" % w[0].message)

    assert_array_almost_equal(train_scores.mean(axis=1), param_range)
    assert_array_almost_equal(test_scores.mean(axis=1), 1 - param_range)
예제 #52
0
    def test_validation_curve(self):
        digits = datasets.load_digits()
        df = pdml.ModelFrame(digits)

        param_range = np.logspace(-2, -1, 2)

        svc = df.svm.SVC(random_state=self.random_state)
        result = df.learning_curve.validation_curve(svc, 'gamma',
                                                    param_range)
        expected = lc.validation_curve(svm.SVC(random_state=self.random_state),
                                       digits.data, digits.target,
                                       'gamma', param_range)

        self.assertEqual(len(result), 2)
        self.assert_numpy_array_almost_equal(result[0], expected[0])
        self.assert_numpy_array_almost_equal(result[1], expected[1])
예제 #53
0
def validation_curve_plot(clf_class, x, y, parameter_range, parameter_name):
    """
    returns validation curve plot
    clf = any algorithms can be used. Example: RandomForestClassifier()
    standardization = True or False. False as default. Better to standardize features before trainning for many algorithms
    """
    clf = clf_class
    param_range = parameter_range
    train_scores, test_scores = validation_curve(
                    estimator=clf,
                    X=x,
                    y=y,
                    param_name=parameter_name,
                    param_range=param_range,
                    cv=10)

    train_mean = np.mean(train_scores, axis=1)
    train_std = np.std(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    test_std = np.std(test_scores, axis=1)

    plt.plot(param_range, train_mean,
             color='blue', marker='o',
             markersize=5, label='training accuracy')

    plt.fill_between(param_range, train_mean + train_std,
                     train_mean - train_std, alpha=0.15,
                     color='blue')

    plt.plot(param_range, test_mean,
             color='green', linestyle='--',
             marker='s', markersize=5,
             label='validation accuracy')

    plt.fill_between(param_range,
                     test_mean + test_std,
                     test_mean - test_std,
                     alpha=0.15, color='green')

    plt.grid()
    plt.xscale('log')
    plt.legend(loc='lower right')
    plt.xlabel('Parameter C')
    plt.ylabel('Accuracy')
    plt.ylim([0.8, 1.0])
    plt.tight_layout()
    plt.show()
예제 #54
0
def plot_validation_curve(
        estimator, X, y, param_name, param_range, title="Validation Curve",
        ylim=None, semilog=False,
        cv=None, n_jobs=1, scoring=None, ax=None):
    # param_range = np.logspace(-6, -1, 5)
    train_scores, test_scores = validation_curve(
        estimator, X, y, param_name=param_name, param_range=param_range,
        cv=cv, scoring=scoring, n_jobs=n_jobs)
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    if ax is None:
        fig, ax1 = plt.subplots()
    else:
        ax1 = ax
    ax1.set_title(title)
    ax1.set_xlabel(param_name)
    ax1.set_ylabel("Score")
    ax1.grid()
    if ylim is not None:
        ax1.set_ylim(ylim)
    if semilog:
        ax1.semilogx(param_range, train_scores_mean, 'o-',
                     label="Training score",
                     color="r")
    else:
        ax1.plot(param_range, train_scores_mean, 'o-', label="Training score",
                 color="r")
    ax1.fill_between(param_range, train_scores_mean - train_scores_std,
                     train_scores_mean + train_scores_std, alpha=0.2,
                     color="r")
    if semilog:
        ax1.semilogx(param_range, test_scores_mean, 'o-',
                     label="Cross-validation score",
                     color="g")
    else:
        ax1.plot(param_range, test_scores_mean, 'o-',
                 label="Cross-validation score",
                 color="g")
    ax1.fill_between(param_range, test_scores_mean - test_scores_std,
                     test_scores_mean + test_scores_std, alpha=0.2, color="g")
    ax1.legend(loc="best")
    return ax1
예제 #55
0
def plotValidationCurve(estimator, title, X, y, param_name, param_range, cv=5):
    trainScores, testScores = validation_curve(estimator, X, y, param_name=param_name, param_range=param_range, cv=cv, scoring="accuracy", )

    trainScoresMean = np.mean(trainScores, axis=1)
    trainScoresStd = np.std(trainScores, axis=1)
    testScoresMean = np.mean(testScores, axis=1)
    testScoresStd = np.std(testScores, axis=1)

    sns.plt.title(title)
    sns.plt.xlabel(param_name)
    sns.plt.ylabel("Accuracy Score")
    sns.plt.ylim(0.0, 1.1)
    sns.plt.semilogx(param_range, trainScoresMean, label="Training score", color="r")
    sns.plt.fill_between(param_range, trainScoresMean - trainScoresStd, trainScoresMean + trainScoresStd, alpha=0.2, color="r")
    sns.plt.semilogx(param_range, testScoresMean, label="Cross-validation score",color="b")
    sns.plt.fill_between(param_range, testScoresMean - testScoresStd, testScoresMean + testScoresStd, alpha=0.2, color="b")

    sns.plt.legend(loc="best")
    return sns.plt
예제 #56
0
	def clf_validation_curve(self):
		#Plot of Accuracy vs Regularisation
		param_range=[0.001,0.01,0.1,1.0,10.0,100.0]
		pipe_lr = Pipeline([	
			('scl',StandardScaler()),
			# ('pca',PCA(n_components=2)),
			('clf',LogisticRegression(penalty='l2',random_state=0))])
		train_scores, test_scores= validation_curve(		estimator=pipe_lr,
			 X=self.X_train,
			 y=self.y_train,
			 param_name='clf__C',
			 param_range=param_range,
			 cv=10)

		train_mean = np.mean(train_scores,axis=1)
		train_std= np.std(train_scores,axis=1)
		test_mean=np.mean(test_scores,axis=1)
		test_std= np.std(test_scores,axis=1)

		plt.plot(param_range, train_mean,
			color='blue',
			marker='o',
			markersize=5,label='training accuracy')
		plt.fill_between(param_range,
			train_mean+train_std,
			train_mean-train_std,
			alpha=0.15,color='blue')
		plt.plot(param_range, test_mean,
			color='green',linestyle='--',
			marker='s',
			markersize=5,label='validation accuracy')
		plt.fill_between(param_range,
			test_mean+test_std,
			test_mean-test_std,
			alpha=0.15,color='green')
		plt.grid()
		plt.xscale('log')
		plt.legend(loc='lower right')
		plt.xlabel('Parameter C')
		plt.ylabel('Accuracy')
		plt.ylim([0.8,1.0])
		plt.show()