def explain_pred_contrib(id, clf, X, features, cats=None, waterfall={ 'rotation_value': 60, 'threshold': None }): try: p = clf.predict_proba(X.loc[X.index == id])[:, 1] except: p = clf.predict_proba(X.loc[X.index == id].values)[:, 1] print( f'Prediction explanation for ID: {id}; Probability of event (y=1): {np.round(p[0], 3)}\nModel used: {type(clf)}' ) try: df = eli5.show_prediction(clf, X.loc[id], show_feature_values=True, feature_names=features) exp = eli5.explain_prediction_df(clf, X.loc[id], feature_names=features) except: df = eli5.show_prediction(clf, X.loc[id].values, show_feature_values=True, feature_names=features) exp = eli5.explain_prediction_df(clf, X.loc[id].values, feature_names=features) if cats is not None: c = id2class(exp, cats) for k, v in c.items(): df.data = df.data.replace(k, v) if waterfall is not None: rot = waterfall['rotation_value'] threshold = waterfall['threshold'] waterfall_chart.plot(exp.feature, exp.weight, rotation_value=rot, net_label="Final Score/Proba", other_label="Minor Features", formatting="{:,.2f}", threshold=threshold, Title='Waterfall of features contributions') return df
def ff_display(df, index_cols, waterfall_cols=None, monthly=False): display(HTML("<b>Fama French factors:</b>")) ff_weights_ = ff_weights(df, index_cols) display(ff_weights_) print("") display(HTML("<b>Contributions to return:</b>")) ff_importances_ = ff_importances(df, ff_weights_) * (12. if monthly else 1.) display(ff_importances_) if waterfall_cols is None: waterfall_cols = index_cols for col in waterfall_cols: waterfall_chart.plot(ff_importances_.index, ff_importances_[col] * 100, formatting="{:,.2f}%", Title=col)
def ShapWaterFall(Model, X_tng, X_sc, ref1, ref2, num_feature): import pandas as pd import numpy as np import shap import matplotlib.pyplot as plt import waterfall_chart # label names until we figure out how sql alchemy can fully work on Linux clients_to_show = [ref1, ref2] # Data Frame management if isinstance(X_sc, pd.DataFrame): X_v = X_sc else: X_v = pd.DataFrame(X_sc) if isinstance(X_tng, pd.DataFrame): X_t = X_tng else: X_t = pd.DataFrame(X_tng) # SHAP Values explainer = shap.TreeExplainer(Model, shap.sample(X_t, 100)) # Data data_for_prediction1 = X_v[(X_v.Reference == clients_to_show[0])] data_for_prediction1 = data_for_prediction1.drop('Reference', 1) data_for_prediction2 = X_v[(X_v.Reference == clients_to_show[1])] data_for_prediction2 = data_for_prediction2.drop('Reference', 1) # Insert a binary option to ensure order goes from lower to higher propensity if Model.predict_proba(data_for_prediction1)[:, 1] <= Model.predict_proba( data_for_prediction2)[:, 1]: frames = [data_for_prediction1, data_for_prediction2] else: frames = [data_for_prediction2, data_for_prediction1] clients_to_show = [ref2, ref1] # Computations for Waterfall Chart data_for_prediction = pd.concat(frames) data_for_prediction = pd.DataFrame(data_for_prediction) feature_names = data_for_prediction.columns.values shap_values = explainer.shap_values(data_for_prediction) Feat_contrib = pd.DataFrame(list(map(np.ravel, shap_values[1])), columns=feature_names) counter1 = len(Feat_contrib.columns) Feat_contrib['base_line_diff'] = Feat_contrib.sum(axis=1) Feat_contrib['prediction'] = Model.predict_proba(data_for_prediction)[:, 1] Feat_contrib[ 'baseline'] = Feat_contrib.prediction - Feat_contrib.base_line_diff diff_df = pd.DataFrame({ 'features': Feat_contrib.diff().iloc[1, :].index, 'contrib': Feat_contrib.diff().iloc[1, :].values })[:counter1].sort_values(by='contrib', ascending=False).reset_index(drop=True) # Waterfall Chart plt.rcParams.update({'figure.figsize': (16, 12), 'figure.dpi': 100}) xlist = [[ clients_to_show[0], 'Other {a} Features'.format(a=counter1 - num_feature) ], diff_df.features.tolist()[:num_feature]] xlist = [item for sublist in xlist for item in sublist] ylist = [[ np.round(Feat_contrib.prediction[0], 6), np.round(diff_df.contrib[num_feature:].sum(), 6) ], np.round(diff_df.contrib.tolist(), 6)[:num_feature]] ylist = [item for sublist in ylist for item in sublist] waterfall_df = pd.DataFrame({"x_values": xlist, 'y_values': ylist}) plt.rcParams.update({'figure.figsize': (16, 12), 'figure.dpi': 100}) plot = waterfall_chart.plot(xlist, ylist, net_label=str(clients_to_show[1]), rotation_value=90, formatting='{:,.3f}') plot.show()
def printstudentreport(uid): taggedWords = [] pos = [] neg = [] s = User.objects.get(id=uid) for ans in SkillAnswer.objects.filter( student=s): #filter(date.year == datetime.date.today().year): if (ans.date.year == datetime.date.today().year): temp = ans.tags.split(',') for t in temp: taggedWords.append(t) sentimentanalyzer = SentimentIntensityAnalyzer() # print(taggedWords) for j in taggedWords: if ((sentimentanalyzer.polarity_scores(j))['compound'] > 0.3): pos.append(j) if ((sentimentanalyzer.polarity_scores(j))['compound'] < 0.0): neg.append(j) pl = ' '.join(pos) nl = ' '.join(neg) #positive wc wordcloud = WordCloud(background_color='white', max_words=200, max_font_size=80, random_state=42).generate(pl) plt.figure() plt.tight_layout() fig = plt.imshow(wordcloud) plt.axis('off') fig2 = plt.gcf() buf2 = io.BytesIO() fig2.savefig(buf2, format="png", bbox_inches='tight') buf2.seek(0) string2 = base64.b64encode(buf2.read()) uri2 = urllib.parse.quote(string2) #urilist2.append(uri2) plt.close() #negative wc wordcloud = WordCloud(background_color='white', max_words=200, max_font_size=80, random_state=42).generate(nl) wordcloud.recolor(color_func=grey_color_func) plt.figure() plt.tight_layout() fig = plt.imshow(wordcloud) plt.axis('off') fig3 = plt.gcf() buf3 = io.BytesIO() fig3.savefig(buf3, format="png", bbox_inches='tight') buf3.seek(0) string3 = base64.b64encode(buf3.read()) uri3 = urllib.parse.quote(string3) plt.close() s = User.objects.get(id=uid) dictY = {} sa = SkillAnswer.objects.filter(student=s).order_by('date') for ans in sa: if ans.date.year in dictY: dictY[ans.date.year] += ans.sentiment else: dictY[ans.date.year] = ans.sentiment a = list(dictY.values()) #a[0]=1.5 for i in range(1, len(a)): a[i] = a[i] - a[i - 1] b = [float(x) for x in list(dictY.keys())] #a=[0,1,2] print(a) print(b) buf = io.BytesIO() tempVar = waterfall_chart.plot(b, a).savefig(buf, format="png", bbox_inches='tight') #fig.show() buf.seek(0) string = base64.b64encode(buf.read()) uri4 = urllib.parse.quote(string) #tempVar.close() plt.close() #line all_s = Skill.objects.all() student = User.objects.get(id=uid) #set_subjs=set() s_to_print = [] for s in all_s: diction = {} print(s.skill_name) for q in SkillQuestion.objects.filter(skill=s): ans = SkillAnswer.objects.filter(student=student) ans = ans.filter(question=q) #ans=ans.order_by('date') for a in ans: # print(a.question.skill) print(a.answer) print(a.date) #print(" ") if a.date.year not in diction: diction[a.date.year] = [0.0, 0.0, 0.0, 0.0] if a.date.month <= 6: diction[a.date.year][0] += a.sentiment diction[a.date.year][1] += 1 else: diction[a.date.year][2] += a.sentiment diction[a.date.year][3] += 1 diction2 = {} for stemp in sorted(diction): temp = "June" + str(stemp) temp2 = "Dec" + str(stemp) try: diction2[temp] = diction[stemp][0] / diction[stemp][1] except: diction2[temp] = 0.0 try: diction2[temp2] = diction[stemp][2] / diction[stemp][3] except: diction2[temp2] = 0.0 plt.plot(list(diction2.keys()), list(diction2.values())) s_to_print.append(s.skill_name) #print(s.skill_name) plt.legend(s_to_print, loc="lower right") #print("SUbject name is ",s.subject_name) plt.tight_layout() plt.xlabel('Time') plt.ylabel('Sentiment score') fig5 = plt.gcf() buf5 = io.BytesIO() fig5.savefig(buf5, format="png", bbox_inches='tight') buf5.seek(0) string5 = base64.b64encode(buf5.read()) uri5 = urllib.parse.quote(string5) plt.close() return uri2, uri3, uri4, uri5
def plot_feature_contribution(self): # plot feature contribution wc = waterfall_chart.plot(self.features, self.metric_score) return wc
def plot_waterfall(Column, contributions, rotation_value=90, threshold=0.2, sorted_value=True, **kargs): return waterfall_chart.plot(Column, contributions, rotation_value=rotation_value, threshold=threshold, sorted_value=sorted_value,**kargs)
import waterfall_chart measure_names = [ 'Reference', 'Renewables', 'LDV\nElectrification', 'Heat\nPumps', 'Other GHG\nReductions' ] measure_quantities = [433, -103, -57, -27, 86.2 - 433 + (103 + 57 + 27)] plot = waterfall_chart.plot(measure_names, measure_quantities, rotation_value=0, figsize=(7, 4), net_label='2050 Goal', Title='CA GHG Emissions in 2050', y_lab=r'MMT CO$_2$e', green_color='red', red_color='green', formatting='{:,.0f}') plot.ylim([0, 500]) plot.savefig('waterfall.pdf')