Exemple #1
0
def print_tables(x, score):
    for title, value, d in x:
        table = _df_to_simpletable(d, float_format="%.2f", index=False)
        s = table.as_latex_tabular()
        s = single_tabular(s, title, score)
        print(s)
        print('\n')
Exemple #2
0
def _linear_regression_train(table,
                             feature_cols,
                             label_col,
                             fit_intercept=True,
                             is_vif=False,
                             vif_threshold=10):
    feature_names, features = check_col_type(table, feature_cols)
    label = table[label_col]

    if fit_intercept == True:
        features = sm.add_constant(features, has_constant='add')
        lr_model_fit = sm.OLS(label, features).fit()
    else:
        lr_model_fit = sm.OLS(label, features).fit()

    predict = lr_model_fit.predict(features)
    residual = label - predict

    summary = lr_model_fit.summary()
    summary_tables = simple_tables2df_list(summary.tables, drop_index=True)
    summary0 = summary_tables[0]
    summary1 = summary_tables[1]

    if type(features) != type(table):
        features = pd.DataFrame(features)

    if is_vif:
        summary1['VIF'] = [
            variance_inflation_factor(features.values, i)
            for i in range(features.shape[1])
        ]
        summary1['VIF>{}'.format(vif_threshold)] = summary1['VIF'].apply(
            lambda _: 'true' if _ > vif_threshold else 'false')
    summary.tables[1] = _df_to_simpletable(summary1)
    summary2 = summary_tables[2]

    html_result = summary.as_html()

    plt.figure()
    plt.scatter(predict, label)
    plt.xlabel('Predicted values for ' + label_col)
    plt.ylabel('Actual values for ' + label_col)
    x = predict
    p1x = np.min(x)
    p2x = np.max(x)
    plt.plot([p1x, p2x], [p1x, p2x], 'r--')
    fig_actual_predict = plt2MD(plt)

    plt.figure()
    plt.scatter(predict, residual)
    plt.xlabel('Predicted values for ' + label_col)
    plt.ylabel('Residuals')
    plt.axhline(y=0, color='r', linestyle='--')
    fig_residual_1 = plt2MD(plt)

    plt.figure()
    sm.qqplot(residual, line='s')
    plt.ylabel('Residuals')
    fig_residual_2 = plt2MD(plt)

    plt.figure()
    sns.distplot(residual)
    plt.xlabel('Residuals')
    fig_residual_3 = plt2MD(plt)

    rb = BrtcReprBuilder()
    rb.addMD(
        strip_margin("""
    | ## Linear Regression Result
    | ### Summary
    |
    """))
    rb.addHTML(html_result)
    rb.addMD(
        strip_margin("""
    |
    | ### Predicted vs Actual
    | {image1}
    |
    | ### Fit Diagnostics
    | {image2}
    | {image3}
    | {image4}
    """.format(image1=fig_actual_predict,
               image2=fig_residual_1,
               image3=fig_residual_2,
               image4=fig_residual_3)))

    model = _model_dict('linear_regression_model')
    model['features'] = feature_cols
    model['label'] = label_col
    model['coefficients'] = lr_model_fit.params
    model['fit_intercept'] = fit_intercept
    model['r2'] = lr_model_fit.rsquared
    model['adjusted_r2'] = lr_model_fit.rsquared_adj
    model['aic'] = lr_model_fit.aic
    model['bic'] = lr_model_fit.bic
    model['f_static'] = lr_model_fit.fvalue
    model['tvalues'] = lr_model_fit.tvalues
    model['pvalues'] = lr_model_fit.pvalues
    model['_repr_brtc_'] = rb.get()

    model['summary0'] = summary0
    model['summary1'] = summary1
    model['summary2'] = summary2
    lr_model_fit.remove_data()
    model['lr_model'] = lr_model_fit
    return {'model': model}
Exemple #3
0
def _linear_regression_train(table,
                             feature_cols,
                             label_col,
                             fit_intercept=True,
                             is_vif=True,
                             vif_threshold=10):
    features = table[feature_cols]
    label = table[label_col]
    lr_model = LinearRegression(fit_intercept)
    lr_model.fit(features, label)

    predict = lr_model.predict(features)
    residual = label - predict

    if fit_intercept == True:
        features = sm.add_constant(features)
        lr_model_fit = sm.OLS(label, features).fit()
    else:
        lr_model_fit = sm.OLS(label, features).fit()

    summary = lr_model_fit.summary()
    summary_tables = simple_tables2df_list(summary.tables, drop_index=True)
    summary0 = summary_tables[0]
    summary1 = summary_tables[1]
    if is_vif:
        summary1['VIF'] = [
            variance_inflation_factor(features.values, i)
            for i in range(features.shape[1])
        ]
        summary1['VIF>{}'.format(vif_threshold)] = summary1['VIF'].apply(
            lambda _: 'true' if _ > vif_threshold else 'false')
    summary.tables[1] = _df_to_simpletable(summary1)
    summary2 = summary_tables[2]

    html_result = summary.as_html()

    plt.figure()
    plt.scatter(predict, label)
    plt.xlabel('Predicted values for ' + label_col)
    plt.ylabel('Actual values for ' + label_col)
    x = predict
    y = np.array(label)
    a = x.size
    b = np.sum(x)
    c = b
    d = 0
    for i in x:
        d += +i * i
    e = np.sum(y)
    f = 0
    for i in range(0, x.size - 1):
        f += x[i] * y[i]
    det = a * d - b * c
    aa = (d * e - b * f) / det
    bb = (a * f - c * e) / det
    p1x = np.min(x)
    p1y = aa + bb * p1x
    p2x = np.max(x)
    p2y = aa + bb * p2x
    plt.plot([p1x, p2x], [p1y, p2y], 'r--')
    fig_actual_predict = plt2MD(plt)

    plt.figure()
    plt.scatter(predict, residual)
    plt.xlabel('Predicted values for ' + label_col)
    plt.ylabel('Residuals')
    plt.axhline(y=0, color='r', linestyle='--')
    fig_residual_1 = plt2MD(plt)

    plt.figure()
    sm.qqplot(residual, line='s')
    plt.ylabel('Residuals')
    fig_residual_2 = plt2MD(plt)

    plt.figure()
    sns.distplot(residual)
    plt.xlabel('Residuals')
    fig_residual_3 = plt2MD(plt)

    rb = BrtcReprBuilder()
    rb.addMD(
        strip_margin("""
    | ## Linear Regression Result
    | ### Summary
    |
    """))
    rb.addHTML(html_result)
    rb.addMD(
        strip_margin("""
    |
    | ### Predicted vs Actual
    | {image1}
    |
    | ### Fit Diagnostics
    | {image2}
    | {image3}
    | {image4}
    """.format(image1=fig_actual_predict,
               image2=fig_residual_1,
               image3=fig_residual_2,
               image4=fig_residual_3)))

    model = _model_dict('linear_regression_model')
    model['features'] = feature_cols
    model['label'] = label_col
    model['coefficients'] = lr_model_fit.params
    model['r2'] = lr_model_fit.rsquared
    model['adjusted_r2'] = lr_model_fit.rsquared_adj
    model['aic'] = lr_model_fit.aic
    model['bic'] = lr_model_fit.bic
    model['f_static'] = lr_model_fit.fvalue
    model['tvalues'] = lr_model_fit.tvalues
    model['pvalues'] = lr_model_fit.pvalues
    model['lr_model'] = lr_model
    model['_repr_brtc_'] = rb.get()

    model['summary0'] = summary0
    model['summary1'] = summary1
    model['summary2'] = summary2

    return {'model': model}