Exemple #1
0
def test_show_prediction():
    clf = LogisticRegression(C=100)
    X = [[0, 0], [1, 1], [0, 1]]
    y = ['a', 'b', 'a']
    clf.fit(X, y)

    doc = np.array([0, 1])

    html = eli5.show_prediction(clf, doc)
    write_html(clf, html.data, '')
    assert isinstance(html, HTML)
    assert 'y=b' in html.data
    assert 'BIAS' in html.data
    assert 'x1' in html.data

    # explain_prediction arguments are supported
    html = eli5.show_prediction(clf, doc, feature_names=['foo', 'bar'])
    write_html(clf, html.data, '')
    assert 'x1' not in html.data
    assert 'bar' in html.data

    # format_as_html arguments are supported
    html = eli5.show_prediction(clf, doc, show=['method'])
    write_html(clf, html.data, '')
    assert 'y=b' not in html.data
    assert 'BIAS' not in html.data
    assert 'Explained as' in html.data
Exemple #2
0
def show_local_interpretation_eli5(dataset, clf, pred, target_labels, features,
                                   dim_model, slider_idx):
    """show the interpretation of individual decision points"""
    info_local = st.button("How this works")
    if info_local:
        st.info("""
        **What's included**  
        Input data is split 80/20 into training and testing. 
        Each of the individual testing datapoint can be inspected by index.
        **To Read the table**  
        The table describes how an individual datapoint is classified.
        Contribution refers to the extent & direction of influence a feature has on the outcome
        Value refers to the value of the feature in the dataset. Bias means an intercept.
        """)

    if dim_model == "XGBoost":
        local_interpretation = eli5.show_prediction(
            clf,
            doc=dataset.iloc[slider_idx, :],
            show_feature_values=True,
            top=5)
    else:
        local_interpretation = eli5.show_prediction(
            clf,
            doc=dataset.iloc[slider_idx, :],
            target_names=target_labels,
            show_feature_values=True,
            top=5,
            targets=[True],
        )
    st.markdown(
        local_interpretation.data.replace("\n", ""),
        unsafe_allow_html=True,
    )
Exemple #3
0
def explain_pred_contrib(id,
                         clf,
                         X,
                         features,
                         cats=None,
                         waterfall={
                             'rotation_value': 60,
                             'threshold': None
                         }):
    try:
        p = clf.predict_proba(X.loc[X.index == id])[:, 1]
    except:
        p = clf.predict_proba(X.loc[X.index == id].values)[:, 1]
    print(
        f'Prediction explanation for ID: {id}; Probability of event (y=1): {np.round(p[0], 3)}\nModel used: {type(clf)}'
    )
    try:
        df = eli5.show_prediction(clf,
                                  X.loc[id],
                                  show_feature_values=True,
                                  feature_names=features)
        exp = eli5.explain_prediction_df(clf,
                                         X.loc[id],
                                         feature_names=features)
    except:
        df = eli5.show_prediction(clf,
                                  X.loc[id].values,
                                  show_feature_values=True,
                                  feature_names=features)
        exp = eli5.explain_prediction_df(clf,
                                         X.loc[id].values,
                                         feature_names=features)

    if cats is not None:
        c = id2class(exp, cats)
        for k, v in c.items():
            df.data = df.data.replace(k, v)

    if waterfall is not None:
        rot = waterfall['rotation_value']
        threshold = waterfall['threshold']
        waterfall_chart.plot(exp.feature,
                             exp.weight,
                             rotation_value=rot,
                             net_label="Final Score/Proba",
                             other_label="Minor Features",
                             formatting="{:,.2f}",
                             threshold=threshold,
                             Title='Waterfall of features contributions')
    return df
Exemple #4
0
    def explain(self, img_fpath):
        """
        Highlights image to explain prediction
        """
        #if U.is_tf_keras():
        #warnings.warn("currently_unsupported: explain() method is not available because tf.keras is "+\
        #"not yet adequately supported by the eli5 library. You can switch to " +\
        #"stand-alone Keras by setting os.environ['TF_KERAS']='0'" )
        #return

        try:
            import eli5
            from eli5.lime import TextExplainer
        except:
            msg = 'ktrain requires a forked version of eli5 to support tf.keras. '+\
                  'Install with: pip3 install git+https://github.com/amaiya/eli5@tfkeras_0_10_1'
            warnings.warn(msg)
            return

        if not hasattr(eli5, 'KTRAIN'):
            warnings.warn("Since eli5 does not yet support tf.keras, ktrain uses a forked version of eli5.  " +\
                           "We do not detect this forked version, so predictor.explain will not work.  " +\
                           "It will work if you uninstall the current version of eli5 and install "+\
                           "the forked version:  " +\
                           "pip3 install git+https://github.com/amaiya/eli5@tfkeras_0_10_1")
            return

        img = image.load_img(img_fpath,
                             target_size=self.preproc.target_size,
                             color_mode=self.preproc.color_mode)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        return eli5.show_prediction(self.model, x)
Exemple #5
0
    def explain(self, img_fpath):
        """
        ```
        Highlights image to explain prediction
        ```
        """
        # if U.is_tf_keras():
        # warnings.warn("currently_unsupported: explain() method is not available because tf.keras is "+\
        # "not yet adequately supported by the eli5 library. You can switch to " +\
        # "stand-alone Keras by setting os.environ['TF_KERAS']='0'" )
        # return

        try:
            import eli5
        except:
            msg = (
                "ktrain requires a forked version of eli5 to support tf.keras. "
                +
                "Install with: pip install https://github.com/amaiya/eli5/archive/refs/heads/tfkeras_0_10_1.zip"
            )
            warnings.warn(msg)
            return

        # if not hasattr(eli5, 'KTRAIN'):
        if (not hasattr(eli5, "KTRAIN_ELI5_TAG")
                or eli5.KTRAIN_ELI5_TAG != KTRAIN_ELI5_TAG):
            warnings.warn(
                "Since eli5 does not yet support tf.keras, ktrain uses a forked version of eli5.  "
                +
                "We do not detect this forked version (or it is out-of-date), so predictor.explain may not work.  "
                +
                "It will work if you uninstall the current version of eli5 and install "
                + "the forked version:  " +
                "pip install https://github.com/amaiya/eli5/archive/refs/heads/tfkeras_0_10_1.zip"
            )
            return

        if not DISABLE_V2_BEHAVIOR:
            warnings.warn(
                "Please add os.environ['DISABLE_V2_BEHAVIOR'] = '1' at top of your script or notebook."
            )
            msg = (
                "\nFor image classification, the explain method currently requires disabling V2 behavior in TensorFlow 2.\n"
                +
                "Please add the following to the top of your script or notebook BEFORE you import ktrain and restart Colab runtime or Jupyter kernel:\n\n"
                + "import os\n" + "os.environ['DISABLE_V2_BEHAVIOR'] = '1'\n")
            print(msg)
            return

        img = keras.preprocessing.image.load_img(
            img_fpath,
            target_size=self.preproc.target_size,
            color_mode=self.preproc.color_mode,
        )
        x = keras.preprocessing.image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        return eli5.show_prediction(self.model, x)
def explainability(X_train, y_train, X_test, y_val):

    rf_model = RandomForestClassifier(n_estimators=200,
                                      min_samples_split=10,
                                      min_samples_leaf=1,
                                      max_features='auto',
                                      max_depth=20,
                                      bootstrap=False,
                                      random_state=0).fit(X_train, y_train)

    # ======================== Permutation ===============================

    import eli5
    from eli5.sklearn import PermutationImportance
    from eli5 import explain_prediction

    perm = PermutationImportance(rf_model, random_state=123).fit(X_test, y_val)

    display(
        eli5.show_weights(perm, feature_names=X_train.columns.tolist(),
                          top=24))

    eli5.show_prediction(rf_model,
                         X_test.iloc[50],
                         feature_names=X_test.columns.tolist(),
                         show_feature_values=True)
    y_pred = rf_model.predict(X_test)
    print('Random Forest loss:', cost_scores(y_pred, y_val))

    # ========================= SHAP ===================================

    import shap

    explainer = shap.TreeExplainer(rf_model)
    shap_values = explainer.shap_values(X_test)
    shap.summary_plot(shap_values, X_test)

    shap.summary_plot(shap_values[0], X_test)
    shap.summary_plot(shap_values[1], X_test)
    shap.summary_plot(shap_values[2], X_test)

    return
Exemple #7
0
    def show_prediction(self, **kwargs):
        """
        Call :func:`eli5.show_prediction` for the locally-fit
        classification pipeline. Keyword arguments are passed
        to :func:`eli5.show_prediction`.

        :func:`fit` must be called before using this method.
        """
        self._fix_target_names(kwargs)
        return eli5.show_prediction(self.clf_, self.doc_, vec=self.vec_,
                                    **kwargs)
def get_explain(string):
    prediction = predict_character(string)
    string = get_text(string)
    character = prediction['prediction']
    html = eli5.show_prediction(models[character],
                                string,
                                targets=[1],
                                vec=tfidf).data
    html = ('<h2>Predicted ' + character + ' (' +
            str(int(round(prediction['probability'] * 100))) + '%)</h2>' +
            html)
    return html
Exemple #9
0
    def describe(self, model_name, model, data_dict):
        feature_names = list(data_dict['x_train'].columns)
        test_observation = data_dict['x_test'].iloc[0]
        explained_weights = eli5.show_weights(
            model,
            feature_names=feature_names,
            show=['targets', 'transition_features', 'feature_importances'])
        explained_prediction = eli5.show_prediction(model, test_observation)

        return {
            "Weights explanation": explained_weights,
            "Predictions explanation": explained_prediction
        }
Exemple #10
0
def eli5_features(test, pipeline):

    clf = pipeline.named_steps["clf"]
    vec = pipeline.named_steps["vec"]
    transformer = Pipeline(pipeline.steps[:-1])

    with open("eli5_weights.html", "w") as f:
        f.write(eli5.show_weights(clf, vec=vec, top=50).data)

    with open("eli5_prediction.html", "w") as f:
        f.write(
            eli5.show_prediction(clf,
                                 transformer.transform(test),
                                 feature_names=vec.get_feature_names()).data)
Exemple #11
0
    def predict(algo, zscoreX, zscoreY):
        # print algo, accuracy, r-squared
        print(algo)
        print('with zScore on X: ' + str(zscoreX) + ' and Y: ' + str(zscoreY))

        # different ZScore options
        if zscoreX:
            x = np.copy(scaled_x)
        else:
            x = np.copy(original_x)

        if zscoreY:
            y = np.copy(scaled_y)
        else:
            y = np.copy(original_y)

        model = algo()
        if algo == RandomForestRegressor:
            model = algo(n_estimators=150)
        model.fit(x[:dividing_line], y[:dividing_line])

        y_predict = model.predict(x[dividing_line:])

        # Alibi Explainable AI, only has Classifier support
        # we are going to use ELI5
        # ELI5 is typically used in a notebook, but we can export it as HTML

        swdoc = open('swdoc_' + grade + '.html', 'w')
        swdoc.write(show_weights(model).data)
        swdoc.close()
        spdoc = open('spdoc_' + grade + '.html', 'w')

        # we are remembering our prediction for every school, while it is in the test data
        myi = 0
        for school in y_predict:
            schools_to_predict[order_of_schools[myi]] = int(1000 *
                                                            float(school))
            myi += 1

            if myi % 60 == 0:
                # 1/10th chance of an individual prediction
                spdoc.write(
                    show_prediction(model,
                                    x[dividing_line + myi],
                                    show_feature_values=True).data)
        spdoc.close()

        # evaluation of test data
        print(explained_variance_score(y[dividing_line:], y_predict))
        print(r2_score(y[dividing_line:], y_predict))
Exemple #12
0
def test_image_classification(keras_clf, cat_dog_image, area, targets):
    # check explanation
    res = eli5.explain_prediction(keras_clf, cat_dog_image, targets=targets)
    assert_attention_over_area(res, area)

    # check formatting
    overlay = format_as_image(res)
    assert_good_external_format(res, overlay)

    # check show function
    show_overlay = eli5.show_prediction(keras_clf,
                                        cat_dog_image,
                                        targets=targets)
    assert_pixel_by_pixel_equal(overlay, show_overlay)
def test_image_classification(keras_clf, cat_dog_image, area, targets):
    doc, image = cat_dog_image
    # check explanation
    res = eli5.explain_prediction(keras_clf, doc, image=image, targets=targets)
    assert_attention_over_area(res, area)

    # check formatting
    res.image = res.image.convert('RGBA')  # explicitly normalize
    overlay = format_as_image(res)
    assert_good_external_format(res, overlay)

    # check show function with image auto-conversion
    show_overlay = eli5.show_prediction(keras_clf, doc, targets=targets)

    assert_pixel_by_pixel_equal(overlay, show_overlay)
Exemple #14
0
def run_model(newdata):
    st.write(newdata)
    y_pred = model.predict(newdata)
    if y_pred[0] > 0:
        st.text("Risk performance is Bad")
    else:
        st.text("Risk performance is Good")
    st.subheader("Intepretation:")
    html_object = eli5.show_prediction(model,
                                       newdata.iloc[0],
                                       feature_names=list(newdata.columns),
                                       show_feature_values=True)
    raw_html = html_object._repr_html_()
    components.v1.html(raw_html, height=500, scrolling=True)
    find_similar_record(newdata)
Exemple #15
0
    def explain(self, img_fpath):
        """
        Highlights image to explain prediction
        """
        # if U.is_tf_keras():
        # warnings.warn("currently_unsupported: explain() method is not available because tf.keras is "+\
        # "not yet adequately supported by the eli5 library. You can switch to " +\
        # "stand-alone Keras by setting os.environ['TF_KERAS']='0'" )
        # return

        try:
            import eli5
        except:
            msg = 'deepwrap requires a forked version of eli5 to support tf.keras. ' + \
                  'Install with: pip install git+https://github.com/amaiya/eli5@tfkeras_0_10_1'
            warnings.warn(msg)
            return

        # if not hasattr(eli5, 'DEEPWRAP'):
        if not hasattr(eli5, 'DEEPWRAP_ELI5_TAG'
                       ) or eli5.DEEPWRAP_ELI5_TAG != DEEPWRAP_ELI5_TAG:
            warnings.warn("Since eli5 does not yet support tf.keras, deepwrap uses a forked version of eli5.  " + \
                          "We do not detect this forked version (or it is out-of-date), so predictor.explain may not "
                          "work.  " + \
                          "It will work if you uninstall the current version of eli5 and install " + \
                          "the forked version:  " + \
                          "pip install git+https://github.com/amaiya/eli5@tfkeras_0_10_1")
            return

        if not DISABLE_V2_BEHAVIOR:
            warnings.warn(
                "Please add os.environ['DISABLE_V2_BEHAVIOR'] = '1' at top of your script or notebook."
            )
            msg = "\nFor image classification, the explain method currently requires disabling V2 behavior in " \
                  "TensorFlow 2.\n" + \
                  "Please add the following to the top of your script or notebook BEFORE you import deepwrap and " \
                  "restart Colab runtime or Jupyter kernel:\n\n" + \
                  "import os\n" + \
                  "os.environ['DISABLE_V2_BEHAVIOR'] = '1'\n"
            print(msg)
            return

        img = image.load_img(img_fpath,
                             target_size=self.preproc.target_size,
                             color_mode=self.preproc.color_mode)
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        return eli5.show_prediction(self.model, x)
def explain_message(pos, fn_messages, pipe, dataset, top=10):
    """
    explain_message(pos,X_test[y_fn],pipe,bow1k_bal)
    """

    message = fn_messages.loc[pos]
    transform_sentence(pipe.steps[0][1], message)

    print('Predicted class:', pipe.predict([message])[0])
    display(
        eli5.show_prediction(
            pipe.steps[2][1],
            dataset['Xtest'][pos, :],
            target_names=[0, 1],
            feature_names=pipe.steps[0][1].get_feature_names(),
            top=top))
def save_html(classifier, documnet, transformer):

    import eli5
    from bs4 import BeautifulSoup, Tag
    v = transformer.get_feature_names()
    html = eli5.show_prediction(classifier,
                                documnet,
                                vec=transformer,
                                top=len(v)).data
    soup = BeautifulSoup(html, "lxml")
    for tag in soup.find_all(['table', 'b']):
        tag.replaceWith('')
    html = str(soup)
    html = html.replace('(probability )', '')
    html = html.replace('top features', '')
    return html
Exemple #18
0
    def explain_plan_choice(self):
        names_features = [
            'min_accuracy', 'min_fairness', 'max_complexity_rel',
            'max_complexity_abs', 'min_safety', 'min_privacy',
            'max_search_time', 'accuracy_distance_to_landmark',
            'fairness_distance_to_landmark',
            'complexity_distance_to_landmark_rel',
            'complexity_distance_to_landmark_abs',
            'safety_distance_to_landmark', 'landmark_computation_time', 'rows',
            'columns', 'decision_tree', 'naive_bayes', 'logistic_regression'
        ]

        display(
            show_prediction(self.best_model,
                            self.features[0],
                            feature_names=names_features,
                            show_feature_values=True))
Exemple #19
0
def classify(clas, tfidf, vect, dataset, api):
    while True:
        dt = None
        print("\n\nSCEGLI COME PREFERISCI FORNIRE IL TWEET DA CLASSIFICARE")
        print("1) Inserimento Manuale")
        print("2) Download da Tweet indicando un utente")
        print("3) Tweet presente all'interno al Dataframe")
        print("4) Ritornare al menù principale")
        print("[Default: 3]")
        answer = input("La tua scelta: ")

        if answer == '1':
            dt = manualTweets()
        elif answer == '2':
            if api != None:
                dt = apiTweets(api)
            else:
                print("Non hai inizializzato le API. Opzione non disponibile!")
                continue
        elif answer == '4':
            return
        else:
            dt = datasetTweets(dataset)

        clean_tweets = cleanTweet(dt['text'].tolist())
        count_tweets = vect.transform(clean_tweets)
        tf_tweets = tfidf.transform(count_tweets)

        Probas_x = pd.DataFrame(clas.predict_proba(tf_tweets),
                                columns=clas.classes_)
        joined_x = dt
        joined_x[clas.classes_] = Probas_x
        joined_x = joined_x.reset_index(drop=True)
        print(joined_x)

        for x in dt['text'].tolist():
            display(
                eli5.show_prediction(clas,
                                     doc=x,
                                     vec=vect,
                                     top=40,
                                     target_names=clas.classes_))
Exemple #20
0
def Acha_Casos_Falsos():
    # acha os casos que errou
    res = clf_pred_test == y_test_tree

    fp_idx = []
    fn_idx = []
    #acha os indices dos casos FN e FP
    i = -1
    for j in res:
        i = i + 1
        if j == False:
            if y_test_tree[i] == 0:
                fp_idx.append(i)
            else:
                fn_idx.append(i)

    html = show_prediction(xgb,
                           X_test_tree[28],
                           show_feature_values=True,
                           feature_names=feature_names)
    with open('falso_negativo.html', 'w') as f:
        f.write(html_fn.data)
feature_names = ['Num_of_Preg', 'Glucose_Conc', 'BP', 'Skin_Thickness',
                 'TwoHour_Insulin', 'BMI', 'DM_Pedigree', 'Age']
# Create our Explainer ,a Tabular Explainer since it is a tabular data
explainer1 = lime.lime_tabular.LimeTabularExplainer(
    X_train.values, feature_names=feature_names, class_names=class_names, discretize_continuous=True)


# Sample We Predicted
X_test.iloc[0]
# The Explainer Instance
exp1 = explainer1.explain_instance(
    X_test.iloc[0], logreg.predict_proba, num_features=8, top_labels=1)

# Show in notebook
exp1.show_in_notebook(show_table=True, show_all=False)

# sing ELI5

# Showing the Weight for our model
eli5.show_weights(logreg, top=10)

# Clearly Define Feature Names
eli5.show_weights(logreg, feature_names=feature_names,
                  target_names=class_names)


# Show Explaination For A Single Prediction
eli5.show_prediction(
    logreg, X_test.iloc[0], feature_names=feature_names, target_names=class_names)
Exemple #22
0
                             ngram_range=(1, 2),
                             min_df=5)

overview_text = vectorizer.fit_transform(train['overview'].fillna(''))
linreg = LinearRegression()
linreg.fit(overview_text, train['log_revenue'])
eli5.show_weights(linreg,
                  vec=vectorizer,
                  top=20,
                  feature_filter=lambda x: x != '<BIAS>')

# In[16]:

print('Target value:', train['log_revenue'][1000])
eli5.show_prediction(linreg,
                     doc=train['overview'].values[1000],
                     vec=vectorizer)

#

#

# In[17]:

test.loc[test['release_date'].isnull() == False, 'release_date'].head()

# In[ ]:

#

# #Preprocessing Features
print('Test Accuracy :', best_svm_test_score)

# ## ELI5 evaluation

svm_best = gs_svm.best_estimator_

svm_best[1]

import eli5
# see top predictors in each class 
eli5.show_weights(svm_best[1], vec=svm_best[0], top=40)

test_corpus[8]

# test for a given comment's best features 
eli5.show_prediction(svm_best[1], test_corpus[8], vec=svm_best[0], top=10)

# ### model performance evaluation with Linear SVM

# +
import model_evaluation_utils as meu

svm_predictions = gs_svm.predict(test_corpus)
unique_classes = list(set(test_label_nums))
meu.get_metrics(true_labels=test_label_nums, predicted_labels=svm_predictions)
# -

meu.display_classification_report(true_labels=test_label_nums,
                                  predicted_labels=svm_predictions,
                                  classes=unique_classes)
# In[73]:

tokenizer = TweetTokenizer()
vectorizer = TfidfVectorizer(ngram_range=(1, 2), tokenizer=tokenizer.tokenize)
vectorizer.fit(all_data['Description'].fillna('').values)
X_train = vectorizer.transform(train['Description'].fillna(''))
rf = RandomForestClassifier(n_estimators=20)
rf.fit(X_train, train['AdoptionSpeed'])

# In[74]:

for i in range(5):
    print('Example of Adoption speed {}'.format(i))
    text = train.loc[train['AdoptionSpeed'] == i, 'Description'].values[0]
    print(text)
    display(eli5.show_prediction(rf, doc=text, vec=vectorizer, top=10))

# In[75]:

train['Description'] = train['Description'].fillna('')
test['Description'] = test['Description'].fillna('')
all_data['Description'] = all_data['Description'].fillna('')

train['desc_length'] = train['Description'].apply(lambda x: len(x))
train['desc_words'] = train['Description'].apply(lambda x: len(x.split()))

test['desc_length'] = test['Description'].apply(lambda x: len(x))
test['desc_words'] = test['Description'].apply(lambda x: len(x.split()))

all_data['desc_length'] = all_data['Description'].apply(lambda x: len(x))
all_data['desc_words'] = all_data['Description'].apply(
message = sys.argv[1]
type_mes = sys.argv[2]

filename ='../WEB-DATA/MODELS/'+sys.argv[2]+'.sav'
clas = joblib.load(filename)
filename ='../WEB-DATA/VECTORS/'+sys.argv[2]+'.sav'
vect = joblib.load(filename)
    
mess_vect = vect.transform([message])

Probas_x = pd.DataFrame(clas.predict_proba(mess_vect), columns = clas.classes_)
result = pd.DataFrame([message], columns=['Messaggio'])
result[clas.classes_] = Probas_x
df = pd.DataFrame()
df['Max'] = result[clas.classes_.tolist()].idxmax(axis=1)

output = []

s = eli5.show_weights(clas, vec=vect, target_names=clas.classes_).data
s = s.replace("\n","")

output.append(df.loc[0].tolist())
output.append(result[df.loc[0].tolist()[0]].values)
output.append([s])

s = eli5.show_prediction(clas, message, vec=vect, target_names=clas.classes_).data
s = s.replace("\n","")

output.append([s])

print(output)
Exemple #26
0
### VALIDATION
# test performance
test_feat = vecspc.transform(test_X)
pred = clf.predict(test_feat)

## performance metrics
# confusion matrix
conf_mat = metrics.confusion_matrix(test_y, pred)
print conf_mat
perf_acc = metrics.accuracy_score(test_y, pred)
print perf_acc

# standard performance metric
perf_f1 = metrics.f1_score(test_y, pred, pos_label='REAL')
print perf_f1

print metrics.classification_report(test_y, pred)

### exploring features and document classification
import eli5

# most informative features
eli5.show_weights(clf, vec=vecspc, top=25)

# explore documents
i = 2
text = data['text_clean'][i]
print data.label[i]

obj = eli5.show_prediction(clf, text, vec=vecspc)
Exemple #27
0
    predict = classifier.decision_function(X_train[test_index])
    cv_score = roc_auc_score(y[test_index], predict)
    print(cv_score)
    break

# In[ ]:

eli5.show_weights(classifier, vec=vectorizer)

# In[ ]:

train[COMMENT].values[6]

# In[ ]:

eli5.show_prediction(classifier, doc=train.values[6], vec=vectorizer)

# In[ ]:

eli5.show_weights(classifier,
                  vec=vectorizer,
                  top=100,
                  feature_filter=lambda x: x != '<BIAS>')

# Now, look at identity_hate

# In[ ]:

classifier = RidgeClassifier(solver='sag')

y = ys['identity_hate'].values
Exemple #28
0
for row in range(len(df)):              #loop through all rows in the df
    if len(df.loc[row, 'text']) < 100:   #if the len of the text is > 100:
        bad_rows.append(row)            #save the row in the bad_row list

df = df.drop(df.index[bad_rows]) #remove all the bad rows


#show most prominent words
eli5.show_weights(clf2, vec = vecspc, top=30)

#highlight words in text
i = 73
i = 201
text = df['text_clean'][i]
print df['label'][i]
eli5.show_prediction(clf2, text, vec=vecspc)



    #clf4
eli5.show_weights(clf4, vec = vecspc2, top=30)




eli5.show_prediction(clf4, df['title_clean'][99], vec=vecspc2) #99 is pretty good

    #clf5
eli5.show_weights(clf5, vec = vecspc3, top=30)

# In[12]:

eli5.show_weights(model, vec=vectorizer)

# The part before the double underscore is the vectorizer name, and the feature name goes after that. Let's show more features and get rid of the bias term:

# In[ ]:

eli5.show_weights(model,
                  vec=vectorizer,
                  top=100,
                  feature_filter=lambda x: x != '<BIAS>')

# Another handy feature is analyzing individual predictions. Let's check some predictions from the validation set. You see a summary of various vectorizer's contribution at the top, and then below you can see features highlighed in text.

# In[13]:

eli5.show_prediction(model, doc=train.values[100], vec=vectorizer)

# In[ ]:

eli5.show_prediction(model, doc=train.values[1], vec=vectorizer)

# In[ ]:

eli5.show_prediction(model, doc=train.values[2], vec=vectorizer)

# What can we do with this?
# First, you can examine features to see if they are what you expect - maybe you are missing some important information due to bad tokenization or have a lot of noise features due to insufficient regularization.
# You can also check most erroneous predictions and try to understand why does the model fail on them.
# ### Interpretation
# * ##### The features at the top are most important and at the bottom, the least.
# * ##### The number after the ± measures how performance varied from one-reshuffling to the next.
# * ##### Some weights are negative. This is because in those cases predictions on the shuffled data were found to be more accurate than the real data.
#
# Here top 5 important features :
# * chest_pain_type_typical angina
# * thalassemia_reversable defect
# * rest_ecg_ST-T wave abnormality
# * major vessels
# * thalassemia_fixed defect

# Next, to explain individual prediction by random forest model there is a method in eli5 library called show_prediction(). The code for model explanation by eli5 is shown below. As shown in the code above we explain the the 10th record of test set having following prediction as shown in below figure.

eli5.show_prediction(random_forest,
                     X_test.iloc[10],
                     feature_names=X_test.columns.tolist(),
                     show_feature_values=True)

# ##### Interpretations
# To make random forest predictions more interpretable, every prediction of the model can be presented as a sum of feature contributions (plus the bias), showing how the features lead to a particular prediction. In above plot, ELI5 does it by showing weights for each feature with their actual value depicting how influential it might have been in contributing to the final prediction decision across all trees.
#
# In the above individual prediction, the top 3 influential features seems to be, after the bias,
# the major vessels, thalassemia_fixed defect and max_heart_rate_achieved.
#
# * 42th record of test set having following prediction as shown in below figure :

eli5.show_prediction(random_forest,
                     X_test.iloc[42],
                     feature_names=X_test.columns.tolist(),
                     show_feature_values=True)