Beispiel #1
0
def evaluate(df, modelv="gnb", race="W", census=False, report=True, roc=True, pr=True):
    """ Run model evaluations for a specified model and race class """

    # get model
    models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" % (modelv, model_string))
    model = models[race]

    # get data
    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    # transform data
    for col in [ "first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    # run specified evaluation visualizer
    if report:
        visualizer = ClassificationReport(model, classes=model.classes_, support=True)
        visualizer.score(df[MODEL_COLS], tmpa)
        visualizer.show() 

    if roc:
        visualizer = ROCAUC(model, classes=["W", "not-W"])
        visualizer.score(df[MODEL_COLS], tmpa)
        visualizer.show()
    if pr:
        viz = PrecisionRecallCurve(model, is_fitted=True, classes=["W", "not-W"])
        viz.score(df[MODEL_COLS], tmpa)
        viz.show()
def eva_model(c, n, X, y, X_test, y_test, class_names, outdir):
    model = svm.LinearSVC(class_weight='balanced', dual=False, max_iter=10000, C=c)
    rfe = RFE(model, n_features_to_select=n)

    ## learning curve
    plt.clf()
    viz_LC = LearningCurve(
        rfe, scoring='f1_weighted', n_jobs=4
    )
    viz_LC.fit(X, y)
    viz_LC.show(outpath=outdir + '/LC.png')

    ## classification report
    plt.clf()
    viz_CR = ClassificationReport(rfe, classes=class_names, support=True)
    viz_CR.fit(X, y)
    viz_CR.score(X_test, y_test)
    viz_CR.show(outpath=outdir + '/CR.png')

    ## confusion matrix
    plt.clf()
    viz_CM = ConfusionMatrix(rfe, classes=class_names)
    viz_CM.fit(X, y)
    viz_CM.score(X_test, y_test)
    viz_CM.show(outpath=outdir + '/CM.png')

    ## precision recall curve
    plt.clf()
    viz_PRC = PrecisionRecallCurve(rfe, per_class=True, iso_f1_curves=True,
                                   fill_area=False, micro=False, classes=class_names)
    viz_PRC.fit(X, y)
    viz_PRC.score(X_test, y_test)
    viz_PRC.show(outpath=outdir + '/PRC.png',size=(1080,720))

    ## class prediction error
    plt.clf()
    viz_CPE = ClassPredictionError(
        rfe, classes=class_names
    )
    viz_CPE.fit(X, y)
    viz_CPE.score(X_test, y_test)
    viz_CPE.show(outpath=outdir + '/CPE.png')

    ## ROCAUC
    plt.clf()
    viz_RA = ROCAUC(rfe, classes=class_names, size=(1080,720))
    viz_RA.fit(X, y)
    viz_RA.score(X, y)
    viz_RA.show(outpath=outdir + '/RA.png')

    fit = rfe.fit(X,y)
    y_predict = fit.predict(X_test)
    f1 = f1_score(y_test, y_predict, average='weighted')

    features_retained_RFE = X.columns[rfe.get_support()].values
    feature_df =pd.DataFrame(features_retained_RFE.tolist())
    feature_df.to_csv(outdir + '/features.csv', sep='\t', index=False)

    return f1
Beispiel #3
0
def eval_models(df,
                race="W",
                models=["gnb", "rf", "xgb"],
                census=False,
                report=False,
                roc=False,
                pr=False,
                cpe=False):
    """ Run evaluation on a set of models and a single race class """

    df = prep_data(df)
    tes = joblib.load(DIR + "/data/models/transformers_binary.joblib")

    for col in ["first_name", "last_name", "middle_name"]:
        te = tes[race][col]
        df[col] = te.transform(df[col])
        df[col] = df[col].fillna(0)

    tmpa = np.where(df.race_code == race, True, False)
    df = df.fillna(0)

    for modelv in models:

        models = joblib.load(DIR + "/data/models/models_binary_%s%s.joblib" %
                             (modelv, model_string))
        model = models[race]

        model.target_type_ = "binary"

        if report:
            visualizer = ClassificationReport(model,
                                              classes=model.classes_,
                                              support=True)
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()

        if roc:
            visualizer = ROCAUC(model, classes=["W", "not-W"])
            visualizer.score(df[MODEL_COLS], tmpa)
            visualizer.show()
        if pr:
            viz = PrecisionRecallCurve(model,
                                       is_fitted=True,
                                       classes=["W", "not-W"])
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()

        if cpe:
            viz = ClassPredictionError(model)
            viz.score(df[MODEL_COLS], tmpa)
            viz.show()
def plot_precision_recall_curve_1(X_train, y_train, X_test, y_test, model):
    """
    Function to plot precision recall curve

    :param X_train: training set
    :param y_train: training set target
    :param X_test: test set
    :param y_test: test set target
    :param model: model to analyze performance for
    :return: precision recall curve plot
    """
    viz = PrecisionRecallCurve(model)
    viz.fit(X_train, y_train)
    viz.score(X_test, y_test)
    viz.show()
# print(df_abalone.head())

sns.countplot(data=df_abalone, x='sex', hue='rings', palette='gist_heat')

# plt.show()

# print(df_abalone.describe())
# df_abalone.info()

le = preprocessing.LabelEncoder()
df_abalone['sex'] = le.fit_transform(df_abalone['sex'])
# print(df_abalone.head())

cols = [col for col in df_abalone.columns if col is not "rings"]
# print(cols)
data = df_abalone[cols]
target = df_abalone['rings']
data_train, data_test, target_train, target_test = train_test_split(data, target, test_size = 0.20, random_state = 10)

data_train.info()

logReg = LogisticRegression()
pred = logReg.fit(data_train, target_train).predict(data_test)
# print(pred)
print("Logistic Regression accuracy: ", accuracy_score(target_test, pred, normalize=True))

visualizer = PrecisionRecallCurve(logReg)
visualizer.fit(data_train, target_train)
visualizer.score(data_test, target_test) 
visualizer.show()