def confusion_matrix_fig(model, X_train, X_test, y_train, y_test): """ Given a classification algorithm, it returns the confusion matrix, inspired from plotly-dash plots. """ # Fit the model model.fit(X_train_scaled, y_train) # Get the predictions y_pred_test = model.predict(X_test_scaled) # Create the confusion matrix matrix = confusion_matrix(y_true=y_test, y_pred=y_pred_test) # Label the true negative, false positive, false negative, and true positive tn, fp, fn, tp = matrix.ravel() # Plot parameters values = [tp, fn, fp, tn] label_text = [ "True Positive", "False Negative", "False Positive", "True Negative" ] labels = ["TP", "FN", "FP", "TN"] blue = cl.flipper()["seq"]["9"]["Blues"] red = cl.flipper()["seq"]["9"]["Reds"] colors = ["#13c6e9", blue[1], "#ff916d", "#ff744c"] # Create the trace of the pie chart trace0 = go.Pie( labels=label_text, values=values, hoverinfo="label+value+percent", textinfo="text+value", text=labels, sort=False, marker=dict(colors=colors), insidetextfont={"color": "white"}, rotation=90, ) # Layout parameters layout = go.Layout( # title = "Confusion Matrix", margin=dict(l=50, r=50, t=100, b=10), legend=dict(bgcolor="#282b38", font={"color": "#a5b1cd"}, orientation="h"), plot_bgcolor="#282b38", paper_bgcolor="#282b38", font=dict(color="#a5b1cd", size=18), title_font=dict(size=22), width=500, height=500, ) # Plug in our parameters above to the plotly go figure objects to create our plots data = [trace0] confusion_matrix_figure = go.Figure(data=data, layout=layout) return confusion_matrix_figure
def serve_pie_confusion_matrix(model, X_test, y_test, Z, threshold): # Compute threshold scaled_threshold = threshold * (Z.max() - Z.min()) + Z.min() y_pred_test = (model.decision_function(X_test) > scaled_threshold).astype(int) matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_test) tn, fp, fn, tp = matrix.ravel() values = [tp, fn, fp, tn] label_text = [ "True Positive", "False Negative", "False Positive", "True Negative" ] labels = ["TP", "FN", "FP", "TN"] blue = cl.flipper()['seq']['9']['Blues'] red = cl.flipper()['seq']['9']['Reds'] colors = [blue[4], blue[1], red[1], red[4]] trace0 = go.Pie(labels=label_text, values=values, hoverinfo='label+value+percent', textinfo='text+value', text=labels, sort=False, marker=dict(colors=colors)) layout = go.Layout(title=f'Confusion Matrix', margin=dict(l=10, r=10, t=60, b=10), legend=dict(bgcolor='rgba(255,255,255,0)', orientation='h')) data = [trace0] figure = go.Figure(data=data, layout=layout) return figure
def plotAnswerIDCount(submodel): data = [] yrs = list(range(1,16)) clrs = ['Greens','RdPu','Blues','YlOrRd','Purples','Reds', 'Greys'] i = 0 #Get mean AnswerID count for number of years electrified for c in submodel['class'].unique(): selectdata = submodel[submodel['class']==c][['YearsElectrified', 'AnswerID_count']].groupby('YearsElectrified').mean().applymap( lambda x: ceil(x)) t = selectdata.reindex(yrs, fill_value=0).reset_index() trace = go.Bar( x=yrs, y=t['AnswerID_count'], name=c, marker=dict(color=cl.flipper()['seq']['3'][clrs[i]][1]) ) data.append(trace) i+=1 layout = go.Layout( barmode='stack', title = 'Number of AnswerIDs inferred for each customer class for 1 - 15+ years after electrification', xaxis = dict(title='Years Electrified', tickvals = yrs), yaxis = dict(title='AnswerID count'), margin = dict(t=100,r=150,b=50,l=150)) fig = go.Figure(data=data, layout=layout) return offline.iplot(fig, filename=os.path.join(image_dir,'answer_id_count'+'.png'))
def serve_pie_confusion_matrix(model, X_test, y_test, Z, threshold): # Compute threshold scaled_threshold = threshold * (Z.max() - Z.min()) + Z.min() y_pred_test = (model.predict_proba(X_test) > scaled_threshold).astype(int) if len(y_pred_test.shape) == 2: y_pred_test = y_pred_test[:, 1] matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_test) tn, fp, fn, tp = matrix.ravel() values = [tp, fn, fp, tn] label_text = [ "True Positive", "False Negative", "False Positive", "True Negative" ] labels = ["TP", "FN", "FP", "TN"] blue = cl.flipper()["seq"]["9"]["Blues"] red = cl.flipper()["seq"]["9"]["Reds"] colors = ["#13c6e9", blue[1], "#ff916d", "#ff744c"] trace0 = go.Pie( labels=label_text, values=values, hoverinfo="label+value+percent", textinfo="text+value", text=labels, sort=False, marker=dict(colors=colors), insidetextfont={"color": "white"}, rotation=90, ) layout = go.Layout( title="Confusion Matrix", margin=dict(l=50, r=50, t=100, b=10), legend=dict(bgcolor="#282b38", font={"color": "#a5b1cd"}, orientation="h"), plot_bgcolor="#282b38", paper_bgcolor="#282b38", font={"color": "#a5b1cd"}, ) data = [trace0] figure = go.Figure(data=data, layout=layout) return figure
def serve_pie_confusion_matrix(model, X_test, y_test, Z, threshold): # Compute threshold scaled_threshold = threshold * (Z.max() - Z.min()) + Z.min() y_pred_test = (model.decision_function(X_test) > scaled_threshold).astype(int) matrix = metrics.confusion_matrix(y_true=y_test, y_pred=y_pred_test) tn, fp, fn, tp = matrix.ravel() values = [tp, fn, fp, tn] label_text = ["Low probability", "Low probability points in red area", "High probability points in blue area", "High probability"] labels = ["LP", "BinH", "HinB", "HP"] blue = cl.flipper()["seq"]["9"]["Blues"] red = cl.flipper()["seq"]["9"]["Reds"] colors = ["#13c6e9", blue[1], "#ff916d", "#ff744c"] trace0 = go.Pie( labels=label_text, values=values, hoverinfo="label+value+percent", textinfo="text+value", text=labels, sort=False, marker=dict(colors=colors), insidetextfont={"color": "white"}, rotation=90, ) layout = go.Layout( title="Existence Ratio", margin=dict(l=50, r=50, t=100, b=10), legend=dict(bgcolor="#282b38", font={"color": "#a5b1cd"}, orientation="h"), plot_bgcolor="#282b38", paper_bgcolor="#282b38", font={"color": "#a5b1cd"}, ) data = [trace0] figure = go.Figure(data=data, layout=layout) return figure
def plot_pie(self): cnf_matrix = confusion_matrix(self.y_test, self.y_pred) FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix) FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix) TP = np.diag(cnf_matrix) TN = cnf_matrix.sum() - (FP + FN + TP) fp = FP.astype(float).sum() fn = FN.astype(float).sum() tp = TP.astype(float).sum() tn = TN.astype(float).sum() label_text = [ "True Positive", "False Negative", "False Positive", "True Negative" ] labels = ["TP", "FN", "FP", "TN"] blue = cl.flipper()['seq']['9']['Blues'] red = cl.flipper()['seq']['9']['Reds'] colors = [blue[4], blue[1], red[1], red[4]] trace0 = go.Pie(labels=label_text, values=[tp, fn, fp, tn], hoverinfo='label+value+percent', textinfo='text+value', text=labels, sort=False, marker=dict(colors=colors)) layout = go.Layout(title=f'TP, TN, FP, FN', margin=dict(l=10, r=10, t=60, b=10), legend=dict(bgcolor='rgba(255,255,255,0)', orientation='h')) data = [trace0] figure = go.Figure(data=data, layout=layout) return figure
def plotMaxDemandSpread(md): table = pd.pivot_table(md, values='Unitsread_kw', index=['month','hour'],aggfunc='count') table.reset_index(inplace=True) data = [go.Heatmap( x=table['month'], y=table['hour'], z = table['Unitsread_kw'], colorscale=[[0.0, cl.flipper()['seq']['3']['Oranges'][0]], [1.0, cl.flipper()['seq']['3']['Oranges'][-1]]] )] layout = go.Layout( title = 'Spread of occurence of maximum demand for all households', xaxis = dict(title='month', tickvals = list(range(1,13))), yaxis = dict(title='hour', tickvals = list(range(1,25))) ) fig = go.Figure(data=data, layout=layout) return offline.iplot(fig, filename=os.path.join(image_dir,'max-demand-spread'+'.png'))
def plot15YearBmDemandSummary(model_dir=dpet_dir): """ This function plots the average monthly energy consumption for all customer classes from 1 to 15 years since electrification. Data is based on the DPET model. """ clrs = ['Greens', 'RdPu', 'Blues', 'YlOrRd', 'Purples', 'Reds', 'Greys'] summary = bmDemandSummary(model_dir) df = summary[['class', 'YearsElectrified', 'Energy [kWh]']].sort_values(by='Energy [kWh]') data = [] count = 0 for c in df['class'].unique(): trace = go.Scatter(x=df.loc[df['class'] == c, 'YearsElectrified'], y=df.loc[df['class'] == c, 'Energy [kWh]'], name=c, fill='tonexty', mode='lines', line=dict( color=cl.flipper()['seq']['3'][clrs[count]][1], width=3)) data.append(trace) count += 1 layout = go.Layout( title= 'Annualised Monthly Energy Consumption for Domestic Energy Consumers', xaxis=dict(title='years since electrification', tickfont=dict(size=14, color='rgb(107, 107, 107)')), yaxis=dict(title='average annual kWh/month', titlefont=dict(size=16, color='rgb(107, 107, 107)')), ) return offline.iplot({ "data": data, "layout": layout }, filename=os.path.join( image_dir, '15year_demand_summary' + '.png'))
def plotClassDist(year, class_dir): """ This function plots the probability distribution over all the inferred classes for all the AnswerIDs in a given year. """ colors = cl.flipper()['div']['5']['RdGy'] scl = [[0, colors[2]], [0.25, colors[3]], [0.5, colors[4]], [0.75, colors[1]], [1, colors[0]]] df = readClasses(year, class_dir) melt = df.reset_index().melt(id_vars='AnswerID') melt['tixnames'] = melt.apply( lambda x: 'AnswerID: ' + str(x['AnswerID']) + '<br />class: ' + x[ 'variable'] + '<br />likelihood: ' + "{0:.3f}".format(x['value']), axis=1) trace = go.Heatmap(z=melt.value, x=melt.AnswerID, y=melt.variable, colorscale=scl, colorbar=dict(title='likelihood'), text=melt['tixnames'], hoverinfo='text') data = [trace] layout = go.Layout( title='Probability Distribution of Customer Classes for ' + str(year), xaxis=dict( title='household IDs', type='category', showticklabels=False, ticks='', showline=True, ), yaxis=dict( type='category', showline=True, ), margin=go.Margin(l=175, r=75, b=50, t=100)) return offline.iplot({"data": data, "layout": layout})
def bar_chart(series: pd.Series, colors: str = 'Blues', **kwargs): """ Returns a bar chart from a pd.Series with: x-axis defined by the index y-values determined by the values Args: series: pd.Series colors: color-scale from the colorlover package. Argument should be taken from available token in cl.flipper()['seq'] **kwargs: keyworded arguments that will be passed to go.Layout Returns: go.Figure object that can be displayed, saved, etc """ tmp = series.copy() data = [go.Bar( x=tmp.index, y=tmp.values, marker=dict(color=cl.flipper()['seq']['3'][colors][-1]) )] layout = go.Layout(**kwargs) return go.Figure(data, layout)
def test_flipper(self): flipped = cl.flipper()['div']['3']['RdYlBu'] self.assertEqual( flipped, ['rgb(252,141,89)', 'rgb(255,255,191)', 'rgb(145,191,219)'])
def plotClassYearRange(yearstart, yearend, class_dir): """ This function creates subplots of the probability distribution over all the inferred classes for a range of years. """ colors = cl.flipper()['div']['5']['RdGy'] scl = [[0, colors[2]], [0.25, colors[3]], [0.5, colors[4]], [0.75, colors[1]], [1, colors[0]]] ncol = 3 nplots = yearend - yearstart + 1 nrow = int(ceil(nplots / ncol)) fig = tools.make_subplots(rows=nrow, cols=int(ncol), subplot_titles=list(range( yearstart, yearend + 1)), print_grid=False) r = 1 c = 1 for y in range(yearstart, yearend + 1): if c == ncol + 1: c = 1 ro = int(ceil(r / ncol)) if r == 1: #toggle colorscale scl_switch = True else: scl_switch = False try: df = readClasses(y, class_dir) melt = df.reset_index().melt(id_vars='AnswerID') melt['tixnames'] = melt.apply( lambda x: 'AnswerID: ' + str(x['AnswerID']) + '<br />class: ' + x['variable'] + '<br />likelihood: ' + "{0:.3f}".format(x[ 'value']), axis=1) trace = go.Heatmap(z=melt.value, x=melt.AnswerID, y=melt.variable, text=melt['tixnames'], hoverinfo='text', colorscale=scl, showscale=scl_switch, colorbar=dict(title='likelihood', len=0.5, yanchor='bottom')) fig.append_trace(trace, ro, c) except: pass c += 1 r += 1 fig['layout'].update( showlegend=False, title='Probability Distribution of Customer Classes from' + str(yearstart) + '-' + str(yearend), height=350 + 300 * (nrow - 1), margin=dict(l=140)) for k in np.arange(1, yearend + 1, 3): fig['layout'].update({ 'yaxis{}'.format(k): go.YAxis(type='category', showline=True), 'xaxis{}'.format(k): go.XAxis( #title = 'household IDs', type='category', showticklabels=False, ticks='', showline=True) }) for k in np.setdiff1d(np.arange(1, 8), np.arange(1, 8, 3)): fig['layout'].update({ 'yaxis{}'.format(k): go.YAxis(showticklabels=False, ticks='', showline=True), 'xaxis{}'.format(k): go.XAxis( #title = 'household IDs', type='category', showticklabels=False, ticks='', showline=True) }) return offline.iplot(fig)
def set_fontcolor(self, by='scores', colorscale='YlOrRd', custom_colors=None): """ This function can be used to pick a metric which decides the font color for each extracted keyword. By default, the font color is assigned based on the score of each keyword. Fonts can be picked by: 'random', 'scores', 'pos_tag', 'clustering_criteria' You can also choose custom font colors by passing in a list of (R,G,B) tuples with values for each component falling in [0,255]. Parameters ---------- by : str or None, optional The metric used to assign font sizes. Can be None if custom colors are being used colorscale: str or None, optional One of [Greys, YlGnBu, Greens, YlOrRd, Bluered, RdBu, Reds, Blues]. When by=='scores', this will be used to determine the colorscale. custom_colors : list of 3-tuple, optional A list of RGB tuples. Each tuple corresponding to the color of a keyword. Returns ------- None """ if custom_colors is not None: assert len(custom_colors) == len(self.keywords) if isinstance(custom_colors[0], str): self.fontcolors = custom_colors else: self.fontcolors = [] for rgb in custom_colors: assert len(rgb) == 3 self.fontcolors.append('rgb' + str(rgb)) elif by == 'random': tone = np.random.choice(list(cl.flipper()['seq']['3'].keys())) self.fontcolors = np.random.choice(list(cl.flipper()['seq']\ ['3'][tone]), len(self.keywords)) elif by == 'scores': scales = {**cl.scales['8']['div'], **cl.scales['8']['seq']} #Even though, currently all colorscales in 'scales.keys()' can be #used, only the ones listed in the doc can be used for creating a #colorbar in the plotly plot assert colorscale in [ 'Greys', 'YlGnBu', 'Greens', 'YlOrRd', 'Bluered', 'RdBu', 'Reds', 'Blues' ] colors = scales[colorscale].copy() colors.reverse() #The keywords are binned based on their scores mn, mx = self.scores.min(), self.scores.max() bins = np.linspace(mn, mx, 8) indices = np.digitize(self.scores, bins) - 1 self.fontcolors = [colors[i] for i in indices] elif by == 'pos_tag': c = cl.scales['5']['qual']['Set2'] + [ 'rgb(254,254,254)', 'rgb(254,254,254)' ] tags = ['NOUN', 'PROPN', 'ADJ', 'VERB', 'ADV', 'SYM', 'ADP'] mapping = {tag: c[i] for i, tag in enumerate(tags)} self.fontcolors = list(map(mapping.get, self.pos_tags)) elif by == 'clustering_criteria': mds = MDS(3, dissimilarity='precomputed').\ fit_transform(self.similarity_matrix) mds = mds - mds.min() mds = mds * 205 / mds.max() + 50 self.fontcolors = ['rgb' + str(tuple(rgb)) for rgb in mds] else: raise ValueError() #raise flag to indicate that the fontcolors have been modified self._flag_fontcolors = True
# Change COMPLNT_FR_TM to datetime.time type data.CMPLNT_FR_TM = pd.to_datetime(data.CMPLNT_FR_TM, format='%H:%M:%S').dt.time type(data.CMPLNT_FR_TM[0]) data = pd.read_csv('/content/gdrive/My Drive/Colab Notebooks/Criminal_Prediction/NYC_Crime_sampling.csv') data = data.drop(columns=['Unnamed: 0']) print('We have total {} observations and {} variables.'.format(data.shape[0], data.shape[1])) print("\nHere are first 5 row of our sample data.\n") data.head() import colorlover as cl from IPython.display import HTML HTML(cl.to_html( cl.flipper()['seq']['3'] )) colors = cl.scales['9']['seq']['GnBu'] print('Color we chose in this notebook:\n') HTML(cl.to_html(colors)) # Visulize number of crimes in NYC Boroughs data.BORO_NM.value_counts().sort_index() import plotly.graph_objs as go import plotly.offline as py trace1 = go.Bar(x = data.BORO_NM.value_counts().sort_index().index, y = data.BORO_NM[data.LAW_CAT_CD == 'MISDEMEANOR'].value_counts().sort_index(), name = 'MISDEMEANOR', text = (data.BORO_NM[data.LAW_CAT_CD == 'MISDEMEANOR'].value_counts()/data.BORO_NM.value_counts()).apply(lambda x: format(x, '.2%')).sort_index(),
def dic(c, small_location): my_path = os.path.abspath(os.path.dirname(__file__)) path = os.path.join(my_path, "../input_fields.csv") path_in = os.path.join(my_path, "../data/ratings/") input_fields = pd.read_csv(path) code = input_fields["code_or_ticker"] glassdoor = pd.read_csv(path_in + c + "_gdoor_employee_rate.csv") glassdoor_m = pd.read_csv(path_in + c + "_gdoor_mgmt_rate.csv") df_tick = pd.read_csv(path_in + c + "_stock_rate.csv") yelp = pd.read_csv(path_in + "all_yelps_rates_" + c + ".csv") c_corr = input_fields[input_fields["code_or_ticker"] == code]["ticker"].reset_index(drop=True)[0] # trace_emp = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Rating"], name="Employees Sentiment", line=dict(color='#17BECF'), opacity=0.8) trace_wlb = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Work Life Balance"], name="Work Life Balance", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_cva = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Culture Values"], name="Culture Values", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_cop = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Career Opportunities"], name="Career Opportunities", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_cbe = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Comp Benefits"], name="Comp Benefits", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_sma = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Senior Management"], name="Management Competence", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mse = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mse"], name="Management Sentiment", line=dict(color='green'), opacity=0.8) ### trace_mwlb = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mwlb"], name="Work Life Balance", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mcva = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mcva"], name="Culture Values", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mcop = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mcop"], name="Career Opportunities", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mcbe = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mcbe"], name="Comp Benefits", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_msma = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_msma"], name="Upper Management Competence", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) ### trace_sto = go.Scatter(x=df_tick["date"], y=df_tick["close"], name="Stock Price", line=dict(color='#7F7F7F'), opacity=1) trace_cus = go.Scatter(x=yelp["date"], y=yelp["all"], name="Customer Sentiment", line=dict(color="orange"), opacity=0.8) my_path = os.path.abspath(os.path.dirname(__file__)) path_in_search = os.path.join(my_path[:-7] + "/data/search/") # Google Search # #search_df = pd.read_csv("/Users/dereksnow/crc-status-dash/data/search/correlate-"+c+".csv") print(path_in_search) search_df = pd.read_csv(path_in_search + "correlate-" + c_corr + ".csv") rat = pd.read_csv(path_in_search + "rat_search.csv") search = [] import colorlover as cl daf = ["red", "green", "blue", "violet", "purple", "grey"] search_dandas = pd.read_csv( path_in_search + "searches_BRJI_dandas.csv" ) # This has to be changed for new categories. trace_search_all = go.Scatter(x=search_dandas["date"], y=search_dandas.sum(axis=1) / (len(search_dandas.columns) - 1), name="Search Sentiment", opacity=0.8) rit = -1 for col in search_dandas.drop(["date"], axis=1).columns: rit = rit + 1 trace = go.Scatter(x=search_dandas["date"], y=search_dandas[col], line=dict(color=daf[rit]), name=col, legendgroup=col, opacity=0.8) search.append(trace) #print(rat)# color_dict = {} sam = -1 for i in ["Reds", "Greens", "Blues", "PuRd", "Purples", "Greys"]: sam = sam + 1 dan = cl.flipper()['seq'][str( rat.groupby("type").count().max()[0] + 1)][i] color_dict[sam] = dan for col in search_df.drop(["Date"], axis=1).columns: tio = -1 for g in rat["type"].unique(): tio = tio + 1 ban = daf[tio] if col in rat[rat["type"] == g]["0"].values: trace = go.Scatter(x=search_df["Date"], y=search_df[col], line=dict(color=ban), name=col, legendgroup=g, opacity=0.05) search.append(trace) #dat = pd.read_csv("all_yelps_rates.csv") yelp["new"] = yelp["all"]**(1 * (np.sqrt( np.abs(np.log(np.abs(yelp["all"].iloc[-1] - yelp["all"].iloc[1]))))** 3.5)) / 10000 yelp["new"] = (yelp["all"].iloc[-1] / yelp["new"].iloc[-1]) * yelp["new"] multiplier = (glassdoor_m["trace_mse"].tail(1) ).values[0] / yelp["new"].tail(1).values[0] yelp["new"] = yelp["new"] * multiplier dat = yelp yep = [] trace_all_yelp = go.Scatter(x=dat["date"], y=dat["new"], line=dict(color='orange'), name="Customer Sentiment Avg.", legendgroup="yelps", opacity=0.8) yep.append(trace_all_yelp) for col in dat.drop(["date", "all"], axis=1).columns: if col.lower() == small_location.lower(): trace = go.Scatter(x=dat["date"], y=dat[col], line=dict(color='orange'), name=col, legendgroup="yelps", opacity=0.50) yep.append(trace) #df_rick = df_tick[df_tick["date"]<search_dandas["date"].max()] df_rick = df_tick trace_stock = go.Scatter(x=df_rick["date"], y=df_rick["close"], name="Stock", line=dict(color='#7F7F7F'), opacity=1) search.append(trace_stock) yep.append(trace_sto) # now do the api call#### data = [trace_sto, trace_emp, trace_mse, trace_all_yelp] layout = dict( margin=dict( t=20, b=15, #r=0, #l=30 ), xaxis=dict(rangeselector=dict(buttons=list([ dict(count=1, label='1m', step='month', stepmode='backward'), dict(count=6, label='6m', step='month', stepmode='backward'), dict(step='all') ])), rangeslider=dict(), type='date'), hovermode="closest") fig_overall = dict(data=data, layout=layout) #py.iplot(fig, filename = "Time Series with Rangeslider")#### fig_search = dict(data=search, layout=layout) emp_data = [ trace_sto, trace_emp, trace_wlb, trace_cop, trace_cbe, trace_sma ] fig_emp = dict(data=emp_data, layout=layout) mgm_data = [ trace_sto, trace_mse, trace_mwlb, trace_mcop, trace_mcbe, trace_msma ] fig_mgm = dict(data=mgm_data, layout=layout) # share_data = [trace_sto] fig_sha = dict(data=share_data, layout=layout) fig_cus = dict(data=yep, layout=layout) # Used elsewhere from scipy import signal glassdoor["ben_smooth"] = signal.savgol_filter( glassdoor["Final_Comp Benefits"], 199, 3) trace_cbe_smoothed = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["ben_smooth"], name="Smoothed", showlegend=False, legendgroup='Employees', opacity=0.8) trace_cbe_new = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Comp Benefits"], name="Benefits", legendgroup='Employees', showlegend=False, opacity=0.8) tal = -1 for c in code: tal = tal + 1 glassdoor = pd.read_csv(path_in + c + "_gdoor_employee_rate.csv") glassdoor[c] = signal.savgol_filter(glassdoor["Final_Comp Benefits"], 199, 3) glassdoor["Review Date"] = pd.to_datetime(glassdoor["Review Date"], infer_datetime_format=True) if tal == 0: full = glassdoor[[c, "Review Date"]].set_index("Review Date") else: full = pd.merge(full, glassdoor[[c, "Review Date" ]].set_index("Review Date"), left_index=True, right_index=True, how="outer") full = full.fillna(method="bfill") full = full.fillna(method="ffill").reset_index() full["Inds"] = full.mean(axis=1) trace_cbe_smoothed_all = go.Scatter(x=full["Review Date"], y=full["Inds"], name="Smoothed Bench", showlegend=False, legendgroup='Employees', opacity=0.8) fig_ben = dict( data=[trace_cbe_new, trace_cbe_smoothed, trace_cbe_smoothed_all], layout=layout) #path_in = os.path.join(my_path, "../data/ratings/") d = {} d["fig_overall"] = fig_overall d["fig_search"] = fig_search d["fig_emp"] = fig_emp d["fig_mgm"] = fig_mgm d["fig_sha"] = fig_sha d["fig_cus"] = fig_cus d["fig_ben"] = fig_ben return d
results_dir, f'mppdc_price_change_deviation_case_transition_year_{transition_year}.pickle', 'YEAR_CUMULATIVE_SCHEME_REVENUE', stage='stage_3_price_targeting', iteration='max') v_heuristic = analysis.extract_results( results_dir, f'heuristic_price_change_deviation_case_transition_year_{transition_year}.pickle', 'YEAR_CUMULATIVE_SCHEME_REVENUE', stage='stage_3_price_targeting', iteration='max', model='primal') # Create figures c = cl.to_numeric( cl.flipper()['qual']['5']['Set1'] ) # ['Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3']) fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2) # Average prices # -------------- # Dash spacing p_y = 10 p_z = 0.05 p_dash_length = p_y / (3 + (3 * p_z)) p_dash_spacer = p_z * p_dash_length p_dash_spacing = (2 * p_dash_length) + (3 * p_dash_spacer) # p_t1 = (p_dash_spacer*6.5, (p_dash_length, p_dash_spacing)) # p_t2 = (p_dash_length + (2 * p_dash_spacer), (p_dash_length, p_dash_spacing)) # p_t3 = ((2 * p_dash_length) + (3 * p_dash_spacer), (p_dash_length, p_dash_spacing))
def test_flipper(self): flipped = cl.flipper()['div']['3']['RdYlBu'] self.assertEqual( flipped, ['rgb(252,141,89)', 'rgb(255,255,191)', 'rgb(145,191,219)'] )
def plot_average_prices(results_dir, output_dir): """Plot average prices under different schemes""" # Prices from different models p_bau = analysis.get_average_prices(results_dir, 'bau_case.pickle', None, 'PRICES', -1) p_rep = analysis.get_average_prices(results_dir, 'rep_case.pickle', 'stage_2_rep', 'PRICES', -1) p_tax = analysis.get_average_prices(results_dir, 'rep_case.pickle', 'stage_1_carbon_tax', 'PRICES', -1) p_price_dev_mppdc = analysis.get_average_prices( results_dir, 'mppdc_price_change_deviation_case.pickle', 'stage_3_price_targeting', 'lamb', 1) p_price_dev_heuristic = analysis.get_average_prices( results_dir, 'heuristic_price_change_deviation_case.pickle', 'stage_3_price_targeting', 'PRICES', -1) # Create figures c = cl.to_numeric( cl.flipper()['qual']['5']['Set1'] ) # ['Accent', 'Dark2', 'Paired', 'Pastel1', 'Pastel2', 'Set1', 'Set2', 'Set3']) fig, ax = plt.subplots() ax.plot(p_bau.index.tolist(), p_bau['average_price_real'].tolist(), color=scale_rgb(c[1]), alpha=0.7, linewidth=0.9) ax.plot(p_tax.index.tolist(), p_tax['average_price_real'].tolist(), color=scale_rgb(c[0]), alpha=0.7, linewidth=0.9) ax.plot(p_rep.index.tolist(), p_rep['average_price_real'].tolist(), color=scale_rgb(c[2]), alpha=0.7, linewidth=0.9) ax.plot(p_price_dev_mppdc.index.tolist(), p_price_dev_mppdc['average_price_real'].tolist(), color=scale_rgb(c[3]), alpha=0.7, linewidth=0.9) ax.plot(p_price_dev_heuristic.index.tolist(), p_price_dev_heuristic['average_price_real'].tolist(), color=scale_rgb(c[4]), alpha=0.6, linewidth=0.9) fig.set_size_inches(3, 2.3) ax.set_ylabel('Average price ($/MWh)', fontsize=9, labelpad=-0.1) ax.set_xlabel('Year', fontsize=9) ax.tick_params(labelsize=8) ax.xaxis.set_major_locator(MultipleLocator(5)) ax.xaxis.set_minor_locator(MultipleLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(20)) ax.yaxis.set_minor_locator(MultipleLocator(5)) ax.legend(['BAU', 'Tax', 'REP', 'MPPDC', 'Heuristic'], fontsize=7, ncol=2, frameon=False) fig.subplots_adjust(left=0.16, bottom=0.18, top=0.98, right=0.98) fig.savefig(os.path.join(output_dir, 'average_prices.png')) fig.savefig(os.path.join(output_dir, 'average_prices.pdf')) plt.show()
def dic(c): my_path = os.path.abspath(os.path.dirname(__file__)) path = os.path.join(my_path, "../input_fields.csv") path_in = os.path.join(my_path, "../data/ratings/") input_fields = pd.read_csv(path) code = input_fields["code_or_ticker"] glassdoor = pd.read_csv(path_in + c + "_gdoor_employee_rate.csv") glassdoor_m = pd.read_csv(path_in + c + "_gdoor_mgmt_rate.csv") df_tick = pd.read_csv(path_in + c + "_stock_rate.csv") yelp = pd.read_csv(path_in + "all_yelps_rates_" + c + ".csv") # trace_emp = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Rating"], name="Employees Sentiment", line=dict(color='#17BECF'), opacity=0.8) trace_wlb = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Work Life Balance"], name="Work Life Balance", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_cva = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Culture Values"], name="Culture Values", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_cop = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Career Opportunities"], name="Career Opportunities", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_cbe = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Comp Benefits"], name="Comp Benefits", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_sma = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Senior Management"], name="Management Likability", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mse = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mse"], name="Management Sentiment", line=dict(color='green'), opacity=0.8) ### trace_mwlb = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mwlb"], name="Work Life Balance", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mcva = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mcva"], name="Culture Values", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mcop = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mcop"], name="Career Opportunities", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_mcbe = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_mcbe"], name="Comp Benefits", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) trace_msma = go.Scatter(x=glassdoor_m["date"], y=glassdoor_m["trace_msma"], name="Management Likability", line=dict(color='#17BECF'), legendgroup='Employees', opacity=0.2) ### trace_sto = go.Scatter(x=df_tick["date"], y=df_tick["close"], name="Stock Price", line=dict(color='#7F7F7F'), opacity=1) trace_cus = go.Scatter(x=yelp["date"], y=yelp["all"], name="Customer Sentiment", line=dict(color="orange"), opacity=0.8) my_path = os.path.abspath(os.path.dirname(__file__)) path_in_search = os.path.join(my_path[:-7] + "/data/search/") # Google Search # #search_df = pd.read_csv("/Users/dereksnow/crc-status-dash/data/search/correlate-"+c+".csv") print(path_in_search) search_df = pd.read_csv(path_in_search + "correlate-" + c + ".csv") rat = pd.read_csv(path_in_search + "rat_search.csv") search = [] import colorlover as cl daf = ["red", "green", "blue", "violet", "purple", "grey"] search_dandas = pd.read_csv( path_in_search + "searches_BRJI_dandas.csv" ) # This has to be changed for new categories. trace_search_all = go.Scatter(x=search_dandas["date"], y=search_dandas.sum(axis=1) / (len(search_dandas.columns) - 1), name="Search Sentiment", opacity=0.8) rit = -1 for col in search_dandas.drop(["date"], axis=1).columns: rit = rit + 1 trace = go.Scatter(x=search_dandas["date"], y=search_dandas[col], line=dict(color=daf[rit]), name=col, legendgroup=col, opacity=0.8) search.append(trace) #print(rat)# color_dict = {} sam = -1 for i in ["Reds", "Greens", "Blues", "PuRd", "Purples", "Greys"]: sam = sam + 1 dan = cl.flipper()['seq'][str( rat.groupby("type").count().max()[0] + 1)][i] color_dict[sam] = dan for col in search_df.drop(["Date"], axis=1).columns: tio = -1 for g in rat["type"].unique(): tio = tio + 1 ban = daf[tio] if col in rat[rat["type"] == g]["0"].values: trace = go.Scatter(x=search_df["Date"], y=search_df[col], line=dict(color=ban), name=col, legendgroup=g, opacity=0.05) search.append(trace) #dat = pd.read_csv("all_yelps_rates.csv") dat = yelp yep = [] trace_all_yelp = go.Scatter(x=dat["date"], y=dat["all"], line=dict(color='orange'), name="Customer Sentiment", legendgroup="yelps", opacity=0.8) yep.append(trace_all_yelp) for col in dat.drop(["date", "all"], axis=1).columns: trace = go.Scatter(x=dat["date"], y=dat[col], line=dict(color='orange'), name=col, legendgroup="yelps", opacity=0.10) yep.append(trace) df_rick = df_tick[df_tick["date"] < search_dandas["date"].max()] trace_stock = go.Scatter(x=df_rick["date"], y=df_rick["close"], name="Stock", line=dict(color='#7F7F7F'), opacity=1) search.append(trace_stock) yep.append(trace_sto) # now do the api call#### data = [trace_sto, trace_emp, trace_mse, trace_all_yelp, trace_search_all] layout = dict(xaxis=dict(rangeselector=dict(buttons=list([ dict(count=1, label='1m', step='month', stepmode='backward'), dict(count=6, label='6m', step='month', stepmode='backward'), dict(step='all') ])), rangeslider=dict(), type='date'), hovermode="closest") fig_overall = dict(data=data, layout=layout) #py.iplot(fig, filename = "Time Series with Rangeslider")#### fig_search = dict(data=search, layout=layout) emp_data = [ trace_sto, trace_emp, trace_wlb, trace_cop, trace_cbe, trace_sma ] fig_emp = dict(data=emp_data, layout=layout) mgm_data = [ trace_sto, trace_mse, trace_mwlb, trace_mcop, trace_mcbe, trace_msma ] fig_mgm = dict(data=mgm_data, layout=layout) # share_data = [trace_sto] fig_sha = dict(data=share_data, layout=layout) fig_cus = dict(data=yep, layout=layout) # Used elsewhere from scipy import signal glassdoor["ben_smooth"] = signal.savgol_filter( glassdoor["Final_Comp Benefits"], 199, 3) trace_cbe_smoothed = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["ben_smooth"], name="Smoothed", showlegend=False, legendgroup='Employees', opacity=0.8) trace_cbe_new = go.Scatter(x=glassdoor["Review Date"], y=glassdoor["Final_Comp Benefits"], name="Benefits", legendgroup='Employees', showlegend=False, opacity=0.8) fig_ben = dict(data=[trace_cbe_new, trace_cbe_smoothed], layout=layout) d = {} d["fig_overall"] = fig_overall d["fig_search"] = fig_search d["fig_emp"] = fig_emp d["fig_mgm"] = fig_mgm d["fig_sha"] = fig_sha d["fig_cus"] = fig_cus d["fig_ben"] = fig_ben return d
def show_forecast(X, y, button_name_prefix, graph_name_prefix, chart_name, anomaly=None, timeinterval=None, date_range=None): ''' Visualization function ''' colors = [color for color in cl.flipper()['seq']['9'].values()] data = defaultdict(list) for i, value in enumerate([(key, value) for key, value in y.items()]): button, dict_graphs = value fact_data = [] if i == 0: ButtonVisible = True else: ButtonVisible = False # Аномальные значения if anomaly is None: anomaly_data = [] else: anomaly_data = [ go.Scatter(x=[ anomaly[button][i], (anomaly[button][i] + timeinterval)[0] ], y=[1, 1], fill='tozeroy', fillcolor='rgba(190,127,188,0.5)', line=dict(width=0), mode='none', legendgroup='anomaly', name='anomaly', visible=ButtonVisible, showlegend=True if i == 0 else False) for i in range(len(anomaly[button])) ] # фактические значения for j, value in enumerate([(key, value) for key, value in dict_graphs.items()]): graph, list_values = value dash = 'longdash' if j % 2 == 0: dash = 'solid' elif j % 3 == 0: dash = 'dash' elif j % 5 == 0: dash = 'dot' if (ButtonVisible == True) & (j != 0): ButtonVisible = 'legendonly' colorpal = random.randint(0, len(colors) - 1) colorintensity = random.randint(2, 8) fact_data.append( go.Scatter(name=graph_name_prefix + str(graph), x=X[button][graph], y=y[button][graph], mode='lines', line=dict(color=colors[colorpal][colorintensity], dash=dash, width=2), visible=ButtonVisible)) data[button] = list(filter(None.__ne__, [*fact_data, *anomaly_data])) updatemenus = list([ dict(type="buttons", x=-0.07, buttons=list([ dict(label=button_name_prefix + str(button), method='update', args=[ { 'visible': list( itertools.chain.from_iterable([ ([True] + (len(y[key]) - 1) * ['legendonly'] + (len(values) - len(y[key])) * [True]) if key == button else len(values) * [False] for key, values in data.items() ])) }, ]) for i, button in enumerate([key for key in y.keys()]) ])) ]) layout = dict( title=chart_name, showlegend=True, updatemenus=updatemenus, xaxis=dict(range=date_range, rangeselector=dict(buttons=list([ dict(count=1, label='1d', step='day', stepmode='backward'), dict(count=7, label='1w', step='day', stepmode='backward'), dict(count=1, label='1m', step='month', stepmode='backward'), dict(step='all', stepmode='backward') ]), ), rangeslider=dict(visible=True), type='date'), yaxis=dict(ticks='outside', zeroline=False), ) return dict(data=list( itertools.chain.from_iterable([value for key, value in data.items()])), layout=layout)
def drawTilePlot(pangenome, output, nocloud=False): checkPangenomeInfo(pangenome, needAnnotations=True, needFamilies=True, needGraph=True) if pangenome.status["partitionned"] == "No": raise Exception( "Cannot draw the tile plot as your pangenome has not been partitionned" ) if len(pangenome.organisms) > 500 and nocloud is False: logging.getLogger().warning( "You asked to draw a tile plot for a lot of organisms (>500). Your browser will probably not be able to open it." ) logging.getLogger().info("Drawing the tile plot...") data = [] all_indexes = [] all_columns = [] fam2index = {} index2fam = {} if nocloud: families = { fam for fam in pangenome.geneFamilies if not fam.partition.startswith("C") } else: families = set(pangenome.geneFamilies) org_index = pangenome.getIndex() index2org = {} for org, index in org_index.items(): index2org[index] = org COLORS = { "pangenome": "black", "exact_accessory": "#EB37ED", "exact_core": "#FF2828", "soft_core": "#c7c938", "soft_accessory": "#996633", "shell": "#00D860", "persistent": "#F7A507", "cloud": "#79DEFF", "undefined": "#828282" } logging.getLogger().info("start with matrice") for row, fam in enumerate(families): new_col = [org_index[org] for org in fam.organisms] all_indexes.extend([row] * len(new_col)) all_columns.extend(new_col) data.extend([1.0] * len(new_col)) index2fam[row] = fam.name fam2index[fam.name] = row mat_p_a = csc_matrix((data, (all_indexes, all_columns)), shape=(len(families), len(pangenome.organisms)), dtype='float') dist = pdist(1 - jaccard_similarities(mat_p_a, 0).todense()) hc = linkage(dist, 'single') dendro = dendrogram(hc, no_plot=True) logging.getLogger().info( "done with making the dendrogram to order the organisms on the plot") order_organisms = [index2org[index] for index in dendro["leaves"]] binary_data = [] text_data = [] fam_order = [] partitions_dict = defaultdict(list) shell_subs = set() for fam in families: partitions_dict[fam.partition].append(fam) if fam.partition.startswith("S"): shell_subs.add( fam.partition ) #number of elements will tell the number of subpartitions ordered_nodes = [] ordored_nodes_p = sorted(partitions_dict["P"], key=lambda n: len(n.organisms), reverse=True) ordored_nodes_c = sorted(partitions_dict["C"], key=lambda n: len(n.organisms), reverse=True) sep_p = len(ordored_nodes_p) - 0.5 separators = [sep_p] shell_NA = None if len(shell_subs) == 1: ordored_nodes_s = sorted(partitions_dict[shell_subs.pop()], key=lambda n: len(n.organisms), reverse=True) ordered_nodes = ordored_nodes_p + ordored_nodes_s + ordored_nodes_c separators.append(separators[len(separators) - 1] + len(ordored_nodes_s)) separators.append(separators[len(separators) - 1] + len(ordored_nodes_c)) else: ordered_nodes = ordored_nodes_p for subpartition in sorted(shell_subs): if subpartition == "S_": shell_NA = len(separators) - 1 ordored_nodes_s = sorted(partitions_dict[subpartition], key=lambda n: len(n.organisms), reverse=True) ordered_nodes += ordored_nodes_s separators.append(separators[len(separators) - 1] + len(ordored_nodes_s)) ordered_nodes += ordored_nodes_c separators.append(separators[len(separators) - 1] + len(ordored_nodes_c)) logging.getLogger().info( "Getting the gene name(s) and the number for each tile of the plot ..." ) for node in ordered_nodes: fam_order.append('\u200c' + node.name) data = node.organisms binary_data.append([ len(node.getGenesPerOrg(org)) if org in data else numpy.nan for org in order_organisms ]) text_data.append([("\n".join(map(str, node.getGenesPerOrg(org)))) if org in data else numpy.nan for org in order_organisms]) xaxis_values = ['\u200c' + org.name for org in order_organisms] logging.getLogger().info( "Done extracting names and numbers. Making the heatmap ...") heatmap = go.Heatmap(z=binary_data, x=xaxis_values, y=fam_order, text=text_data, zauto=False, zmin=1, zmax=2, autocolorscale=False, colorscale=[[0.50, 'rgb(100, 15, 78)'], [1, 'rgb(59, 157, 50)']], colorbar=dict(title='Presence/Absence', titleside='top', tickmode='array', tickvals=[1, 2], ticktext=['Presence', 'Multicopy'], ticks='outside')) shell_color = None if len(shell_subs) > 1: if "S_" not in shell_subs: shell_color = cl.interp(cl.flipper()['seq']['9']['Greens'][1:7], len(shell_subs)) else: shell_color = cl.interp(cl.flipper()['seq']['9']['Greens'][1:7], len(shell_subs) - 1) shapes = [] sep_prec = 0 for nb, sep in enumerate(separators): color = None if nb == 0: color = COLORS["persistent"] elif nb == (len(separators) - 1): color = COLORS["cloud"] elif len(shell_subs) > 1: if shell_NA is not None and nb == shell_NA: color = COLORS["shell"] else: color = shell_color.pop() else: color = COLORS["shell"] shapes.append( dict(type='line', x0=-1, x1=-1, y0=sep_prec, y1=sep, line=dict(dict(width=10, color=color)))) shapes.append( dict(type='line', x0=len(pangenome.organisms), x1=len(pangenome.organisms), y0=sep_prec, y1=sep, line=dict(dict(width=10, color=color)))) shapes.append( dict(type='line', x0=-1, x1=len(pangenome.organisms), y0=sep, y1=sep, line=dict(dict(width=1, color=color)))) sep_prec = sep layout = go.Layout(title="presence/absence matrix", xaxis=go.layout.XAxis(ticktext=xaxis_values, title='organisms', tickvals=xaxis_values, automargin=True, tickfont=dict(size=10)), yaxis=go.layout.YAxis(ticktext=fam_order, tickvals=fam_order, title='gene families', automargin=True, tickfont=dict(size=10)), shapes=shapes, plot_bgcolor='#ffffff') logging.getLogger().info("Drawing the figure itself...") out_plotly.plot(go.Figure(data=[heatmap], layout=layout), filename=output + "/tile_plot.html", auto_open=False) logging.getLogger().info( f"Done with the tile plot : '{output+'/tile_plot.html'}' ")
for col in search_dandas.drop(["date"], axis=1).columns: rit = rit + 1 trace = go.Scatter(x=search_dandas["date"], y=search_dandas[col], line=dict(color=daf[rit]), name=col, legendgroup=col, opacity=0.8) search.append(trace) #print(rat)# color_dict = {} sam = -1 for i in ["Reds", "Greens", "Blues", "PuRd", "Purples", "Greys"]: sam = sam + 1 dan = cl.flipper()['seq'][str(rat.groupby("type").count().max()[0] + 1)][i] color_dict[sam] = dan for col in search_df.drop(["date"], axis=1).columns: tio = -1 for g in rat["type"].unique(): tio = tio + 1 ban = daf[tio] if col in rat[rat["type"] == g]["0"].values: trace = go.Scatter(x=search_df["date"], y=search_df[col], line=dict(color=ban), name=col, legendgroup=g, opacity=0.05) search.append(trace)
# -*- coding: utf-8 -*- """ Created on Thu Feb 23 23:49:44 2017 @author: nmishra """ import numpy as np import pandas as pd from tabulate import tabulate import matplotlib.pyplot as plt from collections import defaultdict import matplotlib import colorlover as cl from IPython.display import HTML HTML(cl.to_html( cl.flipper()['seq']['3'] )) matplotlib.style.use('ggplot') Colors = ['blue','green','red','cyan', 'magenta', 'beige', 'bisque', 'black', 'dimgray','darkturquoise','blanchedalmond','darkviolet'] deadline = ['0.60','0.65','0.70','0.75','0.80','0.85','0.90'] table = [] All_data = defaultdict(dict) for x in ['eff','lat']: for y in deadline: filename = '../results/single-app/dyn-' + x +'-' +y +'-v3.txt' f = open(filename, 'r') table = [row.strip().split('\t')[0].split() for row in f if 'AVERAGE'] All_data[x][y]= np.array([table[i][1:] for i in range(1,len(table)-1)]).astype(float) f.close()
def multiplotDemandSimilarity(merged_ds): """ daytype = one of [Weekday, Saturday, Sunday] """ data = [] lay = [] clrs = ['Greens','RdPu','Blues','YlOrRd','Purples','Reds', 'Greys'] #generate existing and new model traces for each customer subclass count=1 for c in merged_ds['class'].unique(): d = merged_ds.loc[(merged_ds['class']==c)][['YearsElectrified','Energy [kWh]','M_kw_mean','M_kw_std']] slope, intercept, r_value, p_value, std_err = stats.linregress(d['YearsElectrified'].values, d['M_kw_mean'].values) line = slope*d['YearsElectrified'].values+intercept trace0 = go.Bar( x=d['YearsElectrified'].values, y=d['Energy [kWh]'].values, xaxis='x'+str(count), yaxis='y'+str(count), marker=dict( color=cl.flipper()['seq']['3'][clrs[count-1]][-1]), name=c + ' benchmark', ) trace1 = go.Bar( x=d['YearsElectrified'].values, y=d['M_kw_mean'].values, name=c + ' data model', marker=dict( color=cl.flipper()['seq']['3'][clrs[count-1]][1]), ) trace2 = go.Scatter( x=d['YearsElectrified'].values, y=line, mode='lines', line=dict(color=cl.flipper()['seq']['3'][clrs[count-1]][1], width=3), name=c + ' data lin_reg' ) lay.append({'yaxis{}'.format(count): go.YAxis(type = 'linear', title='annual mean monthly<br /> consumption (kWh)'), 'xaxis{}'.format(count): go.XAxis(title = 'time electrified (years)', ticktext = list(range(0, d.YearsElectrified.max()+1)), tickvals = np.arange(0, d.YearsElectrified.max()+1, 1)) }) data.append(trace1) data.append(trace2) data.append(trace0) count+=1 #create subplot graph objects rows = int(len(data)/3) fig = py.tools.make_subplots(rows=rows, cols=1, subplot_titles=list(merged_ds['class'].unique()), horizontal_spacing = 0.1, print_grid=False) for i in list(range(0,len(data))): r = floor(i/3)+1 fig.append_trace(data[i],r,1) fig['layout'].update( title='Annual mean monthly demand model similarity') #update layout for all subplots for k in range(0,rows): fig['layout'].update(lay[k]) return offline.iplot(fig, filename=os.path.join(image_dir,'multiplot-demand-similarity'+'.png'))
def plotBmHourlyHeatmap(customer_class, year_list, daytype='Weekday', model_dir=dpet_dir): """ This function plots the hourly load profiles for a specified customer class, day type and list of years since electrification. Data is based on the DPET model. """ df = bmHourlyProfiles(model_dir) maxdemand = df['Mean [kVA]'].max( ) #get consistent max demand & color scale across classes df = df[(df['daytype'] == daytype) & (df['class'] == customer_class)] #set heatmap colours colors = cl.flipper()['div']['5']['RdYlBu'] scl = [[0, colors[0]], [0.25, colors[1]], [0.5, colors[2]], [0.75, colors[3]], [1, colors[4]]] #set subplot parameters if len(year_list) < 3: ncol = len(year_list) else: ncol = 3 nrow = ceil(len(year_list) / ncol) fig = py.tools.make_subplots( rows=nrow, cols=ncol, subplot_titles=['Year ' + str(x) for x in year_list], horizontal_spacing=0.1, print_grid=False) r = 1 #initiate row c = 1 #initiate column for yr in year_list: if c == ncol + 1: c = 1 ro = ceil(r / ncol) #set colorbar parameters if nrow == 1: cblen = 1 yanc = 'middle' else: cblen = 0.5 yanc = 'bottom' if r == 1: #toggle colorscale scl_switch = True else: scl_switch = False #generate trace try: data = df[df['YearsElectrified'] == yr] z = data['Mean [kVA]'].reset_index(drop=True) x = data['hour'] y = data.month hovertext = list() for yi, yy in enumerate(y.unique()): hovertext.append(list()) for xi, xx in enumerate(x.unique()): hovertext[-1].append( 'hour: {}<br />month: {}<br />{:.3f} kVA'.format( xx, yy, z[24 * yi + xi])) trace = go.Heatmap(z=z, x=x, y=y, zmin=0, zmax=maxdemand, text=hovertext, hoverinfo="text", colorscale=scl, reversescale=True, showscale=scl_switch, colorbar=dict(title='kVA', len=cblen, yanchor=yanc)) fig.append_trace(trace, ro, c) except: pass c += 1 r += 1 fig['layout'].update( showlegend=False, title='<b>' + customer_class + '</b> mean estimated <b>' + daytype + '</b> energy demand (kVA) <br />' + ', '.join(map(str, year_list[:-1])) + ' and ' + str(year_list[-1]) + ' years after electrification', height=350 + 300 * (nrow - 1)) for k in range(1, len(year_list) + 2): fig['layout'].update({ 'yaxis{}'.format(k): go.YAxis( type='category', ticktext=[ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ], #data.month.unique(), tickvals=np.arange(1, 13, 1), tickangle=-15, tickwidth=0.5), 'xaxis{}'.format(k): go.XAxis(title='Time of day (hours)', tickvals=np.arange(0, 24, 2)) }) return offline.iplot(fig, filename='testagain')
def plotDemandSimilarity(merged_ds): """ daytype = one of [Weekday, Saturday, Sunday] """ data = [] trcs = len(merged_ds['class'].unique()) clrs = ['Greens','RdPu','Blues','YlOrRd','Purples','Reds', 'Greys'] #generate existing and new model traces for each customer subclass count=0 for c in merged_ds['class'].unique(): d = merged_ds.loc[(merged_ds['class']==c)][['YearsElectrified','Energy [kWh]','M_kw_mean','M_kw_std']] wx = 0.8/trcs ox = -wx*(count) slope, intercept, r_value, p_value, std_err = stats.linregress(d['YearsElectrified'].values, d['M_kw_mean'].values) line = slope*d['YearsElectrified'].values+intercept trace0 = go.Bar( x=d['YearsElectrified'].values, y=d['Energy [kWh]'].values, marker=dict( color=cl.flipper()['seq']['3'][clrs[count]][-1]), name=c + ' benchmark', opacity=0.6, width = wx, offset = ox, ) trace1 = go.Bar( x=d['YearsElectrified'].values, y=d['M_kw_mean'].values, name=c + ' data model', marker=dict( color=cl.flipper()['seq']['3'][clrs[count]][1]), width = wx, offset = ox, ) trace2 = go.Scatter( x=d['YearsElectrified'].values, y=line, mode='lines', line=dict(color=cl.flipper()['seq']['3'][clrs[count]][1], width=3), name=c + ' data lin_reg' ) data.append(trace1) data.append(trace2) data.append(trace0) count+=1 layout = go.Layout( title='Annual mean monthly demand model similarity', xaxis = dict(title='time electrified (years)', tickvals = list(range(1,16))), yaxis = dict(title='annual mean monthly consumption (kWh)') ) fig = go.Figure(data=data, layout=layout) return offline.iplot(fig, filename=os.path.join(image_dir,'demand-similarity'+'.png'))