def get_target_histogram(targets: np.array, name: str, color_scheme: list = BLUE_SCHEME) -> dict: traces = [go.Histogram(x=targets, marker=dict(color=color_scheme[1]))] layout = dict(title=name) return {'layout': layout, 'data': traces}
def gera_histograma_taxas(dg, ds): taxa_municipal_máxima_no_alvo = ds.df_M_Alvo_em_foco[ 'Taxa_por_hab_SUS'].max() hist, bin_edges = np.histogram(ds.df_M_Alvo_em_foco['Taxa_por_hab_SUS']) mean_count = np.mean( hist) / 2 # para regular a altura das linhas marcadoras verticais taxa_município_em_foco_trace = \ go.Scatter(x=[ds.taxa_anual_M_Res * ds.denominador_populacional]*2, y=[0, mean_count], name=wrap(f'Taxa em {ds.M_Res_em_foco}', 30), mode='lines', line = dict(width=2), hoverinfo='skip', opacity=0.9) taxa_BR_trace = go.Scatter( x=[ds.taxa_anual_Br * ds.denominador_populacional] * 2, y=[0, mean_count], name=wrap(f'Taxa Brasil', 30), mode='lines', line=dict(width=1), hoverinfo='skip', opacity=0.9) taxa_capital_trace = go.Scatter( x=[ds.taxa_anual_Capital * ds.denominador_populacional] * 2, y=[0, mean_count], name=wrap(f'Taxa {ds.Capital_em_foco}', 30), mode='lines', line=dict(width=1), hoverinfo='skip', opacity=0.9) taxa_UF_trace = go.Scatter( x=[ds.taxa_anual_UF * ds.denominador_populacional] * 2, y=[0, mean_count], name=wrap(f'Taxa {ds.UF_M_Res_em_foco}', 30), mode='lines', line=dict(width=1), hoverinfo='skip', opacity=0.9) taxa_limiar_trace = go.Scatter( x=[ds.taxa_anual_limiar * ds.denominador_populacional] * 2, y=[0, mean_count], name=wrap(f'Limiar log-normal', 30), mode='lines', line=dict(width=1), hoverinfo='skip', opacity=0.9) df_M_Alvo_em_foco_qtd_maior_que_zero = ds.df_M_Alvo_em_foco[ ds.df_M_Alvo_em_foco['Qtd_EMA'] > 0] data = [ go.Histogram( x=df_M_Alvo_em_foco_qtd_maior_que_zero['Taxa_por_hab_SUS'] * ds.denominador_populacional, name='Número de municípios'), taxa_BR_trace, taxa_UF_trace, taxa_capital_trace, taxa_limiar_trace, taxa_município_em_foco_trace ] layout = go.Layout( title={ "text": f'Histograma de taxas de Atendimentos em {dg.ano_em_foco} no alvo <br>{ds.alvo_completo}', 'x': 0.5, 'xanchor': 'center' }, font=dict(size=10), yaxis={'title': f"Número de municípios"}, xaxis={ 'title': f'Atendimentos / {f_int(ds.denominador_populacional)} Habitantes SUS' }, ) return go.Figure(data=data, layout=layout)
print('Youngest Passenger was of:',train.Age.min(),'Years') print('Average Age on the ship:',train.Age.mean(),'Years') # In[ ]: Age_female_survived = train[(train.Sex=='female') & (train.Survived==1)].Age Age_female_dead = train[(train.Sex=='female') & (train.Survived==0)].Age Age_male_survived = train[(train.Sex=='male') & (train.Survived==1)].Age Age_male_dead = train[(train.Sex=='male') & (train.Survived==0)].Age fig = tools.make_subplots(rows=1, cols=2,subplot_titles=('Female', 'Male')) survived_female = go.Histogram( name='Survived_female', x=Age_female_survived ) fig.append_trace(survived_female, 1, 1) dead_female = go.Histogram( name='Dead_female', x=Age_female_dead ) fig.append_trace(dead_female, 1, 1) fig.layout.xaxis1.update({'title':'Age'}) survived_male = go.Histogram( name='Survived_male', x=Age_male_survived ) dead_male = go.Histogram( name='Dead_male',
showlegend = True, xaxis = dict(title = 'Male Rating', range=[0, 1]), yaxis = dict(title = 'Female Rating', range=[0, 1])) fig = go.Figure(data = trace, layout = layout) py.iplot(fig, filename = 'inverse-ratings-by-gender') ########################################################### #Distribution of women rating movies vs. men ########################################################### x0 = movie_graph_data['no_of_female_ratings']/movie_graph_data['gender_ratings'] data = go.Histogram(x = x0) layout = go.Layout(xaxis = dict(title = 'Proportion of Female Raters', range = [0,1])) fig = go.Figure(data = [data], layout = layout) py.iplot(fig, filename='proportion-of-female-raters') ########################################################### #Rating differentials between women and men ########################################################### test = movie_graph_data[['no_of_female_ratings','gender_ratings','ratings_differential']] test['proportion'] = (test['no_of_female_ratings']/test['gender_ratings']/5).round(2)*5 test = test.groupby(['proportion'])[['proportion','ratings_differential']].agg(['mean','count']) test['films'] = ['Number of films: ' + str(test['proportion']['count'].iloc[i]) for i in range(0,len(test))]
# Calculate number_of_recordings = [] for direct in dirs: waves = [ f for f in os.listdir(join(train_audio_path, direct)) if f.endswith('.wav') ] number_of_recordings.append(len(waves)) # Plot data = [go.Histogram(x=dirs, y=number_of_recordings)] trace = go.Bar( x=dirs, y=number_of_recordings, marker=dict(color=number_of_recordings, colorscale='Viridius', showscale=True), ) layout = go.Layout(title='Number of recordings in given label', xaxis=dict(title='Words'), yaxis=dict(title='Number of recordings')) py.iplot(go.Figure(data=[trace], layout=layout))
def f(viz='X-Y', colx='', coly='', colz='', colw=10, na=False, asc='', source=df): if (viz == 'X-Y'): print( '{}: {:0.1%} null ({:d} out of {:d})'\ .format( colx , source[colx].isnull().sum() / source.shape[0] , source[colx].isnull().sum() , source.shape[0] ) ) print( '{}: {:0.1%} null ({:d} out of {:d})'\ .format( colz , source[colz].isnull().sum() / source.shape[0] , source[colz].isnull().sum() , source.shape[0] ) ) temp = source if na: temp = temp.fillna(-1000) if (coly == 'unaggregrated'): grouped = temp.loc[:, [colx, colz]].set_index(colx) grouped.columns = pd.MultiIndex.from_product([[colz], [coly]]) if (coly in ['count', 'sum', 'mean', 'std', 'max', 'min']): grouped = temp.groupby(colx).agg({colz: [coly]}) elif (coly == 'uniques'): grouped = temp.groupby(colx).apply(lambda g: pd.Series( g[colz].unique().size, index=pd.MultiIndex.from_product([[colz], [coly]]))) # print(grouped.head()) trace = go.Scattergl(x=grouped.index, y=grouped[colz][coly], name=coly + ' of ' + colz + ' vs ' + colx, mode=colw) layout = go.Layout(title=coly + ' of ' + colz + ' vs ' + colx, yaxis=dict(title=coly + ' of ' + colz), xaxis=dict(title=colx)) elif (viz == 'pareto'): print( '{}: {:0.1%} null ({:d} out of {:d})'\ .format( colx , source[colx].isnull().sum() / source.shape[0] , source[colx].isnull().sum() , source.shape[0] ) ) print( '{}: {:0.1%} null ({:d} out of {:d})'\ .format( colz , source[colz].isnull().sum() / source.shape[0] , source[colz].isnull().sum() , source.shape[0] ) ) sort_order = (asc == 'Ascending') temp = source if na: temp = temp.fillna(-1000) grouped = temp.groupby(colx) if (coly in ['count', 'sum', 'mean', 'std', 'max', 'min']): grouped = grouped.agg({colz: [coly]}) elif (coly == 'uniques'): grouped = grouped.apply(lambda g: pd.Series( g[colz].unique().size, index=pd.MultiIndex.from_product([[colz], [coly]]))) grouped = grouped.reset_index().sort_values([(colz, coly)], ascending=sort_order).head(colw)\ .sort_values([(colz, coly)], ascending = (not sort_order)) # print(grouped) trace = go.Bar(y=grouped[colx], x=grouped[colz][coly], name=colx, marker=dict(color='rgb(49,130,189)'), orientation='h') layout = go.Layout( title=coly + ' of ' + colz + ' by ' + colx, yaxis=dict( title=colx, type="category", # categoryorder = "category descending" tickformat=".3f"), xaxis=dict(title=coly + ' of ' + colz), margin=dict(l=160)) else: print( '{}: {:0.1%} null ({:d} out of {:d})'\ .format( colx , source[colx].isnull().sum() / source.shape[0] , source[colx].isnull().sum() , source.shape[0] ) ) temp = source if na: temp = temp.fillna(-1000) trace = go.Histogram(x=temp[colx], name=colx, marker=dict(color='rgb(49,130,189)')) layout = go.Layout(title='distribution', yaxis=dict(title='count'), xaxis=dict(title=colx)) data = [trace] fig = go.Figure(data=data, layout=layout) plot_url = py.iplot(fig)
# So there are coluns about Estimate, Margin of Error, Percent related to Sex, Age, Race, and Total Population. Lets start exploring these variables. # # ### Distribution of Total Population across Census Tracts # # <br> # # **Census Tracts:** # Census tracts (CTs) are small, relatively stable geographic areas that usually have a population between 2,500 and 8,000 persons. They are located in census metropolitan areas and in census agglomerations that had a core population of 50,000 or more in the previous census. # # # In[ ]: total_population = rca_df["HC01_VC03"][1:] trace = go.Histogram(x=total_population, marker=dict(color='orange', opacity=0.6)) layout = dict(title="Total Population Distribution - Across the counties", margin=dict(l=200), width=800, height=400) data = [trace] fig = go.Figure(data=data, layout=layout) iplot(fig) male_pop = rca_df["HC01_VC04"][1:] female_pop = rca_df["HC01_VC05"][1:] trace1 = go.Histogram(x=male_pop, name="male population", marker=dict(color='blue', opacity=0.6)) trace2 = go.Histogram(x=female_pop,
def stock(stockval): # pobiera dane dla waloru main_df = stock_data(stockval, 2, 120) # ustala ticker dla danego waloru ticker = get_ticker(stockval[:-4]) # Pobiera arkusz zleceń ten_orders = order_book(ticker) # Pobiera wskaźniki indicators = company_indicators(ticker) # Pobiera wiadomości news = get_news(get_isin(stockval[:-4])) # Pobiera dane fiansowe financial_data = get_financial_data(get_isin(stockval[:-4])) gross_profit = [] net_profit = [] sales = [] debt = [] capital = [] dates = [x for x in financial_data[0] if x] try: for i in range(0, 2): for val in financial_data[1][i]: if len(val) > 0: if val[0] == "Zysk (strata) brutto": gross_profit.append([ int("".join(x.split())) * 1000 for x in val[1] if x ]) elif val[0] == "Zysk (strata) netto": net_profit.append([ int("".join(x.split())) * 1000 for x in val[1] if x ]) elif val[ 0] == "Przychody netto ze sprzedaży produktów, towarów i materiałów": sales.append([ int("".join(x.split())) * 1000 for x in val[1] if x ]) elif val[0] == "Zobowiązania i rezerwy na zobowiązania": debt.append([ int("".join(x.split())) * 1000 for x in val[1] if x ]) elif val[0] == "Kapitał własny": capital.append([ int("".join(x.split())) * 1000 for x in val[1] if x ]) years = dates[4:8] profit_data_net_q = go.Bar(x=dates[0:4], y=net_profit[0], name='net') profit_data_gross_q = go.Bar(x=dates[0:4], y=gross_profit[0], name='gross') profit_layout = go.Layout(barmode='group') profit_data_q = [profit_data_net_q, profit_data_gross_q] profit_fig_q = go.Figure(data=profit_data_q, layout=profit_layout) pio.write_image(profit_fig_q, 'static/profits_q.png', width=600, height=400) profit_data_net_y = go.Bar(x=dates[4:8], y=net_profit[1], name='net') profit_data_gross_y = go.Bar(x=years, y=gross_profit[1], name='gross') layout_reversed = go.Layout(xaxis=dict(autorange='reversed'), bargap=0.5) bars_style = go.Layout(bargap=0.5) profit_data_y = [profit_data_net_y, profit_data_gross_y] profit_fig_y = go.Figure(data=profit_data_y, layout=layout_reversed) pio.write_image(profit_fig_y, 'static/profits_y.png', width=600, height=400) if len(sales) > 0: sales_status = True sales_q = [go.Bar(x=dates[0:4], y=sales[0])] fig_sales = go.Figure(data=sales_q, layout=bars_style) pio.write_image(fig_sales, 'static/sales_q.png', width=600, height=400) sales_y = [go.Bar(x=years, y=sales[1])] fig_sales = go.Figure(data=sales_y, layout=layout_reversed) pio.write_image(fig_sales, 'static/sales_y.png', width=600, height=400) else: sales_status = False if len(debt) > 0: debt_status = True debt_q = [go.Bar(x=dates[0:4], y=debt[0])] fig_sales = go.Figure(data=debt_q, layout=bars_style) pio.write_image(fig_sales, 'static/debt_q.png', width=600, height=400) debt_y = [go.Bar(x=years, y=debt[1])] fig_sales = go.Figure(data=debt_y, layout=layout_reversed) pio.write_image(fig_sales, 'static/debt_y.png', width=600, height=400) else: debt_status = False if len(capital) > 0: capital_status = True capital_q = [go.Bar(x=dates[0:4], y=capital[0])] fig_sales = go.Figure(data=capital_q, layout=bars_style) pio.write_image(fig_sales, 'static/capital_q.png', width=600, height=400) capital_y = [go.Bar(x=years, y=capital[1])] fig_sales = go.Figure(data=capital_y, layout=layout_reversed) pio.write_image(fig_sales, 'static/capital_y.png', width=600, height=400) else: capital_status = False except: sales_status = False debt_status = False capital_status = False print("no financial") # Pobiera dane o akcjonariacie shareholders = get_shareholders(ticker) # Pobiera opis firmy company_details = company_info(ticker) # Pobiera szczegółowe dane o transakcjach transaction_data(stockval) # Zwraca volumen akcji, który został nabyty po danej cenie. stock_prices = analyze_stock_transactions(stockval) vol_x, vol_y = zip(*stock_prices) vol_data = [go.Bar(x=vol_y, y=vol_x, orientation='h')] vol_layout = go.Layout(yaxis=dict(dtick=0.25, )) vol_fig = go.Figure(data=vol_data, layout=vol_layout) pio.write_image(vol_fig, 'static/daily_volume.png', width=700, height=500) sma_100 = sma(stockval, 100) sma_200 = sma(stockval, 200) a = daily_return(stockval) data = [go.Bar(x=a.index, y=a['<CLOSE>'])] fig = go.Figure(data=data) pio.write_image(fig, 'static/daily_return.png', width=600, height=400) # Histogram dziennych zwrotów histogram = [go.Histogram(x=a['<CLOSE>'])] hist_layout = go.Layout(xaxis=dict( tick0=0, dtick=2.0, ), bargap=0.1) histogram_fig = go.Figure(data=histogram, layout=hist_layout) pio.write_image(histogram_fig, 'static/histogram.png', width=550, height=350) # Średnie kroczące sma100 = go.Scatter(x=sma_100.index, y=sma_100['<CLOSE>'], line=dict(color='#af211c', dash='dot'), opacity=0.8, name='sma 100') sma200 = go.Scatter(x=sma_200.index, y=sma_200['<CLOSE>'], line=dict(color='#bc59ff', dash='dot'), opacity=0.8, name='sma 200') # Bollinger bands + wykres świecowy boll = bollinger(stockval) boll_high = go.Scatter(x=boll[0].index, y=boll[0]['<CLOSE>'], line=dict(color='#17BECF'), opacity=0.8, name='bollinger up') boll_min = go.Scatter(x=boll[1].index, y=boll[1]['<CLOSE>'], line=dict(color='#17BECF'), opacity=0.8, name='bollinger 65 mean') boll_low = go.Scatter(x=boll[2].index, y=boll[2]['<CLOSE>'], line=dict(color='#17BECF'), opacity=0.8, name='bollinger down') vol = go.Bar(x=main_df[-90:].index, y=main_df[-90:]['<VOL>'], marker=dict( color='rgb(158,202,225)', line=dict(color='rgb(8,48,107)', width=0.5), ), opacity=0.2, yaxis='y2', name='volume') candle_boll = go.Candlestick(x=main_df[:90].index, open=main_df[:90]['<OPEN>'], high=main_df[:90]['<HIGH>'], low=main_df[:90]['<LOW>'], close=main_df[:90]['<CLOSE>']) candle_layout = go.Layout(xaxis=dict(rangeslider=dict(visible=False)), yaxis2=dict(title='Volume', overlaying='y', side='right')) boll_data = [ boll_high, boll_min, boll_low, candle_boll, vol, sma100, sma200 ] boll_fig = dict(data=boll_data, layout=candle_layout) pio.write_image(boll_fig, 'static/chart.png', width=1920, height=1080) return render_template('stock.html', data_list=stock_list, stock_name=stockval[:-4], o_book=ten_orders, close_value=main_df.iloc[-1]['<CLOSE>'], daily_return=round(a.iloc[-1]['<CLOSE>'], 2), indicators=indicators, stock_news=news, shareholder=shareholders, ticker=ticker, fin_data=financial_data, prices=stock_prices, details=company_details, sales_status=sales_status, debt_status=debt_status, capital_status=capital_status)
# ### Plots ALS_proc_gender_count = ALS_proc_df.groupby('REF').first().Gender.value_counts().to_frame() data = [go.Pie(labels=ALS_proc_gender_count.index, values=ALS_proc_gender_count.Gender)] layout = go.Layout(title='Patients Gender Demographics') fig = go.Figure(data, layout) fig.show() ALS_proc_niv_count = ALS_proc_df.NIV.value_counts().to_frame() data = [go.Pie(labels=ALS_proc_niv_count.index, values=ALS_proc_niv_count.NIV)] layout = go.Layout(title='Visits where the patient is using NIV') fig = go.Figure(data, layout) fig.show() data = [go.Histogram(x = ALS_proc_df.NIV)] layout = go.Layout(title='Number of visits where the patient is using NIV.') fig = go.Figure(data, layout) fig.show() ALS_proc_patient_niv_count = ALS_proc_df.groupby('subject_id').niv.max().value_counts().to_frame() data = [go.Pie(labels=ALS_proc_patient_niv_count.index, values=ALS_proc_patient_niv_count.niv)] layout = go.Layout(title='Patients which eventually use NIV') fig = go.Figure(data, layout) fig.show() data = [go.Scatter( x = ALS_proc_df.FVC, y = ALS_proc_df.NIV, mode = 'markers' )]
return x # Paso 1: Generamos muestras de la variable uniforme U x_n = constante.SEMILLA u = [] # array de uniformes x = [] # array de inversas for _ in range(constante.CANT_EXPERIMENTOS): x_n = gcl_uniforme(x_n) u.append(x_n) # Paso 2: Aplicar la transformacion inversa for i in range(len(u)): x.append(obtenerTransfInversa(u[i])) # Transformacion inversa # Mostramos histograma del resultado data = [go.Histogram(x=x)] #py.plot(data, filename='histograma-inversa-normal-v1') # Mostramos media, varianza y moda muestrales y teoricos media = np.mean(x) varianza = np.var(x) moda = max(set(x), key=x.count) print("Media muestral: {0} Varianza muestral: {1} Moda muestral: {2}".format( media, varianza, moda)) print("Media teorica: {0} Varianza teorica: {1} Moda teorica: {2}".format( 0, 1, 0))
y="Time Decimal", data=daily_lows, palette='rainbow') # In[ ]: box_tracer = [] for key, day in dayOfWeek.items(): box_tracer.append( go.Box(y=daily_lows[daily_lows['Day of Week'] == day]['Time Decimal'], name=day)) iplot(box_tracer) # ### Tuesday Low Histogram # # Let's take a closer look into the distrobution of the lows on Tuesdays. # In[ ]: sns.distplot(daily_lows[daily_lows['Day of Week'] == 'Tue']['Time Decimal'], bins=24, kde=False) # In[ ]: histo_tracer = [ go.Histogram( x=daily_lows[daily_lows['Day of Week'] == 'Tue']['Time Decimal']) ] iplot(histo_tracer)
def main(): db = QuestionDatabase() question_lookup = db.all_questions() questions = list(question_lookup.values()) guesser_train_questions = [q for q in questions if q.fold == "guesstrain"] guesser_train_answers = [q.page for q in guesser_train_questions] answer_counts = Counter(guesser_train_answers) answer_set = set(answer_counts.keys()) app = dash.Dash() app.layout = html.Div(children=[ html.H1(children="Quiz Bowl Question Explorer"), compute_stats(questions, db.location), html.H2("Question Inspector"), dcc.Dropdown( options=[{ "label": q.qnum, "value": q.qnum } for q in questions], value=questions[0].qnum, id="question-selector", ), html.Div([html.Div(id="question-display")]), dcc.Graph( id="answer-count-plot", figure=go.Figure( data=[ go.Histogram(x=list(answer_counts.values()), name="Answer Counts") ], layout=go.Layout(title="Answer Count Distribution", showlegend=True), ), ), dcc.Graph( id="answer-count-cum-plot", figure=go.Figure( data=[ go.Histogram( x=list(answer_counts.values()), name="Answer Counts Cumulative", cumulative=dict(enabled=True, direction="decreasing"), histnorm="percent", ) ], layout=go.Layout(title="Answer Count Cumulative Distribution", showlegend=True), ), ), html.Label("Answer Selection"), dcc.Dropdown( options=sorted( [{ "label": a, "value": a } for a in answer_set], key=lambda k: k["label"], ), id="answer-list", ), html.Div(id="answer-count"), ]) @app.callback( Output(component_id="answer-count", component_property="children"), [Input(component_id="answer-list", component_property="value")], ) def update_answer_count(answer): return f"Answer: {answer} Question Count: {answer_counts[answer]}" @app.callback( Output(component_id="question-display", component_property="children"), [Input(component_id="question-selector", component_property="value")], ) def update_question(qb_id): qb_id = int(qb_id) question = question_lookup[qb_id] sentences, answer, _ = question.to_example() return ([ html.P(f"ID: {qb_id} Fold: {question.fold}"), html.H3("Sentences") ] + [html.P(f"{i}: {sent}") for i, sent in enumerate(sentences)] + [html.H3("Answer"), html.P(answer)]) app.css.append_css( {"external_url": "https://codepen.io/chriddyp/pen/bWLwgP.css"}) app.run_server(debug=True)
break print("***DONE! ") #diff_values = np.asarray(diff_values) #np.savetxt(result_path +'xy.txt', np.c_[X,Y], delimiter = ',', fmt='%i') #df = pd.DataFrame({"X-value" : np.asarray(X), "Y-value" : np.asarray(Y)}) #df.to_csv(result_path + "dist.csv", index = False) trace1 = go.Histogram( x=X, marker=dict( color='#FFD7E9', ), opacity=0.50 ) trace2 = go.Histogram( x=Y, marker=dict( color='#EB89B5' ), marker_line_color='rgb(8,48,107)', marker_line_width=1.5, opacity=0.50 ) data = [trace1, trace2] layout = go.Layout(barmode='stack')
# tx_max_purchase holds the maximum invoice date in our dataset tx_max_purchase['Recents'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days #merge this dataframe to our new user dataframe tx_user = pd.merge(tx_user, tx_max_purchase[['CustomerID', 'Recents']], on='CustomerID') tx_user.head() #plot a histogram just to check what does the 'Recents' data would look like if it is distributed among customers #After we find out if there are any maximum data in Recents, then we can use K-means algorithm to assign groups with scores to it. plot_data = [go.Histogram(x=tx_user['Recents'])] plot_layout = go.Layout(title='Recents') fig = go.Figure(data=plot_data, layout=plot_layout) pyoff.iplot(fig) # In[52]: from sklearn.cluster import KMeans sse = {} tx_recency = tx_user[['Recents']] for k in range(1, 10): kmeans = KMeans(n_clusters=k, max_iter=1000).fit(tx_recency) tx_recency["clusters"] = kmeans.labels_ sse[k] = kmeans.inertia_ plt.figure()
'title': 'Complete date', 'range': [two_months_ago, current_date] }, yaxis={'title': 'Number of cases'}, # showlegend=True ) }), dcc.Graph( id='histogram', figure={ 'data': [ go.Histogram( x=waiting_days, # histnorm='probability xbins=dict(size=5), marker=dict(color='rgb(158,202,225)', line=dict( color='rgb(8,48,107)', width=1.5, )), ), ], 'layout': go.Layout( title= 'Distribution of waiting days for cases cleared in the past 4 weeks', xaxis={ 'title': 'Waiting days', 'range': [0, 90] }, yaxis={'title': 'Number of cases'}, # showlegend=True
def hypt(accuracy, iv, dv, perms=10000, show_graph=True, name="hyptest", print_progress=True, multiprocess=True, save_perm_accuracies=True): ''' Tests whether classifiers are performing significantly better than chance. Permutation-based hypothesis testing based on removing the correspondence between the IV and the DV via randomization to generate a null distribution. :param accuracy: :param iv: :param dv: :param perms: :param show_graph: :param plot_name: :param print_progress: :return: ''' import copy null_accuracy = [] if multiprocess: import multiprocessing async_kwargs = { "hyp_test": False, "show_graph": False, "write_out": False } print("Instantiating multiprocessing for " + str(perms) + " permutations on " + str(multiprocessing.cpu_count()) + " cores.") with multiprocessing.Pool(multiprocessing.cpu_count()) as pool: out_dicts = [] resps = [] for i in range(perms): civ = copy.deepcopy(iv) np.random.shuffle(civ) resps.append( pool.apply_async(leave_one_out, args=(civ, dv), kwds=async_kwargs, callback=out_dicts.append)) for r in resps: r.wait() null_accuracy = [d['accuracy'] for d in out_dicts] else: for i in range(perms): civ = copy.deepcopy(iv) np.random.shuffle(civ) null_accuracy.append( leave_one_out(civ, dv, show_graph=False, hyp_test=False, write_out=False)['accuracy']) if print_progress: print("Permutation test iteration #: " + str(i + 1)) print("Percent complete: " + str(((i + 1) / perms) * 100) + "%") if show_graph: # todo: add line to show where the classifier's average accuracy was import plotly.graph_objs as go from plotly.offline import plot fig = go.Figure(data=[go.Histogram(x=null_accuracy, opacity=0.9)]) plot(fig, filename=name + ".html") g = [s for s in null_accuracy if s >= accuracy] if save_perm_accuracies: import csv with open(name + '_null_accuracies.csv', "w") as f: w = csv.writer(f) for a in null_accuracy: w.writerow([a]) return len(g) / len(null_accuracy) # probability of null hypothesis
####### # Objective: Create a histogram that plots the 'length' field # from the Abalone dataset (../data/abalone.csv). # Set the range from 0 to 1, with a bin size of 0.02 ###### # Perform imports here: import plotly.offline as pyo import plotly.graph_objs as go import pandas as pd # create a DataFrame from the .csv file: df = pd.read_csv('../../Data/abalone.csv') # create a data variable: data = [ go.Histogram(x=df['length'], xbins={ 'start': 0, 'end': 1, 'size': 0.02 }) ] # add a layout layout = go.Layout(title='Length Histogram') # create a fig from data & layout, and plot the fig fig = go.Figure(data=data, layout=layout) pyo.plot(fig, filename='histogram-solution.html')
py.iplot(h_data_x, filename = "histogram consumer good x") py.iplot(h_data_y, filename = "histogram consumer good y") py.iplot(h_data_k, filename = "histogram producer capital") py.iplot(h_data_l, filename = "histogram producer labor") py.iplot(h_data_s, filename = "histogram market supply") py.iplot(h_data_d, filename = "histogram market demand") """ data = [] data.extend([ class_x - sim_x for class_x, sim_x in zip(class_consumer_x, sim_consumer_x) ]) data.extend([ class_y - sim_y for class_y, sim_y in zip(class_consumer_y, sim_consumer_y) ]) data.extend([ class_k - sim_k for class_k, sim_k in zip(class_producer_k, sim_producer_k) ]) data.extend([ class_l - sim_l for class_l, sim_l in zip(class_producer_l, sim_producer_l) ]) data.extend( [class_s - sim_s for class_s, sim_s in zip(class_market_s, sim_market_s)]) data.extend( [class_d - sim_d for class_d, sim_d in zip(class_market_d, sim_market_d)]) h_data = [go.Histogram(x=data)] py.iplot(h_data, filename="histogram total data") f.close()
# * y = y axis # * opacity = opacity of histogram # * name = name of legend # * marker = color of histogram # * trace2 = second histogram # * layout = layout # * barmode = mode of histogram like overlay. Also you can change it with *stack* # In[ ]: # prepare data x2011 = timesData.student_staff_ratio[timesData.year == 2011] x2012 = timesData.student_staff_ratio[timesData.year == 2012] trace1 = go.Histogram(x=x2011, opacity=0.75, name="2011", marker=dict(color='rgba(171, 50, 96, 0.6)')) trace2 = go.Histogram(x=x2012, opacity=0.75, name="2012", marker=dict(color='rgba(12, 50, 196, 0.6)')) data = [trace1, trace2] layout = go.Layout( barmode='overlay', title=' students-staff ratio in 2011 and 2012', xaxis=dict(title='students-staff ratio'), yaxis=dict(title='Count'), ) fig = go.Figure(data=data, layout=layout) iplot(fig)
import plotly.graph_objs as go from plotly import tools import plotly.plotly as py external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) df = pd.read_csv( 'https://raw.githubusercontent.com/dogatekin/Project/master/subsetDF.csv') x = df['Review Score'] X = x[x > 0.5] trace0 = go.Histogram(x=df['Sales Rank']) trace1 = go.Histogram(x=X, ) fig = tools.make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=False, shared_yaxes=False, vertical_spacing=0.001) fig.append_trace(trace0, 1, 1) fig.append_trace(trace1, 1, 2) fig['layout'].update(height=400, width=800, title='Histogram of the two possible response variables')
import cauldron as cd import plotly.graph_objs as go from cauldron import plotting import measurement_stats as mstats df = cd.shared.couplings_df variations = df['raw_mad'] / df['raw_median'] cd.display.plotly(data=go.Histogram(x=variations), layout=plotting.create_layout( title='Coupling Length Fractional Deviations', x_label='Fractional Deviation', y_label='Frequency (#)')) dist = mstats.create_distribution(measurements=variations.tolist(), uncertainties=0.01) x = mstats.distributions.uniform_range(dist, 3) y = dist.probabilities_at(x) cd.display.plotly(data=go.Scatter(x=x, y=y, mode='lines', fill='tozeroy'), layout=plotting.create_layout(title='Coupling Length KDE', x_label='Coupling Lengths (m)', y_label='Expectation Value'))
def update_frequency_graphs(n_clicks,jsonified_data): # error checks if ((n_clicks is None) or (jsonified_data == [])): print('Returning dash.no_update') return dash.no_update # get variables from jsonidied_data datasets=json.loads(jsonified_data) df_key_1=pd.read_json(datasets['df_key_1'],orient='split') df_key_2=pd.read_json(datasets['df_key_2'],orient='split') df_key_1_2=pd.read_json(datasets['df_key_1_2'],orient='split') key_1=datasets['key_1'] key_2=datasets['key_2'] # convert strings to dates df_key_1['datee']=pd.to_datetime(df_key_1['datee']).dt.date df_key_2['datee']=pd.to_datetime(df_key_2['datee']).dt.date df_key_1_2['datee']=pd.to_datetime(df_key_1_2['datee']).dt.date # calculate consecutive transaction intervals intervals_1=(df_key_1['datee']-df_key_1['datee'].shift(1)).dropna().apply(lambda x: x.days) intervals_2=(df_key_2['datee']-df_key_2['datee'].shift(1)).dropna().apply(lambda x: x.days) intervals_1_2=(df_key_1_2['datee']-df_key_1_2['datee'].shift(1)).dropna().apply(lambda x: x.days) # make subplots fig = make_subplots( rows=3, cols=1, vertical_spacing=0.02 ) # key_1 fig.add_trace(go.Histogram( x=intervals_1, name='Intervals' ), row=1, col=1 ) # key_2 fig.add_trace(go.Histogram( x=intervals_2, name='Intervals' ), row=2, col=1 ) # key_1_2 fig.add_trace(go.Histogram( x=intervals_1_2, name='Intervals' ), row=3, col=1 ) # xaxis properties fig.update_xaxes(title_text="Interval (days)", row=3, col=1) # yaxis properties fig.update_yaxes(title_text="Frequency", row=1, col=1) fig.update_yaxes(title_text="Frequency", row=2, col=1) fig.update_yaxes(title_text="Frequency", row=3, col=1) # layout fig.update_layout( autosize=False, # width=800, # height=800, margin={'l': 0, 'b': 0, 't': 0, 'r': 0, 'autoexpand' : True}, hovermode='closest', showlegend=False ) return fig
def get_data(self, series): trace = go.Histogram(x=series, opacity=0.75) return [trace]
df2 = pd.read_csv('https://query.data.world/s/jq7lk27hbmlg2t5rf4tqoksxnrs4fl') targets = df1['Emotion'] corpus = df1['Text'] corpus2 = df2['content'] targets2 = df2['sentiment'] list_emotions = list(df1['Emotion'].unique()) list_emotions.append('all') #names = list('1er dataset', '2eme dataset', 'dataset global') """definition des graphs""" #goBar emotions 1st data set fig1 = go.Figure() fig1 = go.Figure(data=[go.Histogram(x=targets, name='Emotions')], layout={ 'title': 'Emotions Histogram', 'xaxis_title_text': 'Emotions', 'yaxis_title_text': 'frequence' }) #goBar emotions 2nd data set fig2 = go.Figure() fig2 = go.Figure(data=[go.Histogram(x=targets2, name='Emotions')], layout={ 'title': 'Emotions Histogram', 'xaxis_title_text': 'Emotions', 'yaxis_title_text': 'frequence' })
# Replace data by its standardized values data[list(ecdf_normalized_df.columns.values)] = ecdf_normalized_df # Visualisations # Flows print(data['flow'].describe()) flows_winsorized = mstats.winsorize(data['flow'], limits=[0.05, 0.05]) layout = go.Layout( title="Basic histogram of flows (winsorized)") data_hist = [go.Histogram(x=flows_winsorized)] fig = go.Figure(data=data_hist, layout=layout) iplot(fig, filename='Basic histogram of flows') # Corr corr = ecdf_normalized_df.corr() sns.heatmap(corr[(corr >= 0.5) | (corr <= -0.5)], cmap='viridis', vmax=1.0, vmin=-1.0, linewidths=0.1, annot=True, annot_kws={"size": 8}, square=True) # Coef of variation
def plot_distplot( df, column, hist=True, kde=True, gauss=False, show_box=True, points=False, x_range=None, notched=True, show_mean=True, kde_resolution=128, colors="default", n_bins=None, x_legend=0.85, y_legend=0.8, show_legend=True, legend="default", bargap=0.03, transparent=True, ): # vrednosti koje fitujemo variable_values = df[column].values if x_range is not None: variable_values = variable_values[(variable_values >= x_range[0]) & (variable_values <= x_range[1])] xaxis_range = [min(variable_values), max(variable_values)] # generiši vrednosti za x osu x_values = np.linspace(min(variable_values), max(variable_values), kde_resolution) # srednja vrednost i medijana za vrednosti koje fitujemo mean, std = stats.norm.fit(variable_values) # gustina verovatniće gauss_prob_dens = stats.norm.pdf(sorted(df[column].values), loc=mean, scale=std) # Kernel Density Estimate gustina verovatnoće kde = stats.gaussian_kde(variable_values) kde_values = kde(x_values) if colors == "default": colors = ["#191970", "#64b5f6", "#ef6c00", "#03adfc"] traces = [] if show_box: box = go.Box( x=variable_values, marker=dict(color=colors[3]), boxpoints=points, notched=notched, boxmean=show_mean, showlegend=False, ) if hist: hist_trace = go.Histogram( x=variable_values, histnorm="probability density", marker=dict(color=colors[0], opacity=0.7), nbinsx=n_bins, name="Histogram", showlegend=show_legend, ) traces.append(hist_trace) # KDE probability density if kde: kde_trace = go.Scatter(x=x_values, y=kde_values, name="KDE PDF", showlegend=show_legend) traces.append(kde_trace) # Gaussian probability density if gauss: gauss_trace = go.Scatter( x=sorted(variable_values), y=gauss_prob_dens, name="Gauss PDF", line=dict(color="#FFA500"), showlegend=show_legend, ) traces.append(gauss_trace) if show_box: fig = make_subplots(rows=2, cols=1) fig.add_trace(box, row=1, col=1) for trace in traces: fig.add_trace(trace, row=2, col=1) fig.layout["xaxis2"].update( axis_layout(show_grid=False, range_=xaxis_range, ticks="")) fig.layout["yaxis2"].update( axis_layout(ticks=""), domain=[0, 0.75], showexponent="last", exponentformat="power", ) fig.layout["xaxis"].update( axis_layout( title="", ticks="", showticklabels=False, range_=xaxis_range, show_grid=False, )) fig.layout["yaxis"].update( axis_layout(title="", ticks="", showticklabels=False, show_grid=False), domain=[0.78, 1], ) else: fig = go.Figure() for trace in traces: fig.add_trace(trace) fig.layout["xaxis"].update( axis_layout(title="", range_=xaxis_range, show_grid=False), ) fig.layout["yaxis"].update( axis_layout(title="", show_grid=True), showexponent="last", exponentformat="power", ) legend_font = {"x": x_legend, "y": y_legend} if legend == "default": font = dict(size=16, family="Times New Roman") legend_font.update({"font": font}) fig.update_layout(legend=legend_font, bargap=bargap) if transparent: fig.update_layout( legend=dict(bgcolor="rgba(0,0,0,0)"), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", ) return fig
def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): if column_mapping: date_column = column_mapping.get('datetime') id_column = column_mapping.get('id') target_column = column_mapping.get('target') prediction_column = column_mapping.get('prediction') num_feature_names = column_mapping.get('numerical_features') if num_feature_names is None: num_feature_names = [] else: num_feature_names = [ name for name in num_feature_names if is_numeric_dtype(reference_data[name]) ] cat_feature_names = column_mapping.get('categorical_features') if cat_feature_names is None: cat_feature_names = [] else: cat_feature_names = [ name for name in cat_feature_names if is_numeric_dtype(reference_data[name]) ] else: date_column = 'datetime' if 'datetime' in reference_data.columns else None id_column = None target_column = 'target' if 'target' in reference_data.columns else None prediction_column = 'prediction' if 'prediction' in reference_data.columns else None utility_columns = [ date_column, id_column, target_column, prediction_column ] num_feature_names = list( set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns)) cat_feature_names = list( set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns)) #set params data params_data = [] drifted_fetures_count = 0 #plt.ioff() for feature_name in num_feature_names: # + cat_feature_names: #feature_names: prod_small_hist = np.histogram( production_data[feature_name][np.isfinite( production_data[feature_name])], bins=10, density=True) ref_small_hist = np.histogram( reference_data[feature_name][np.isfinite( reference_data[feature_name])], bins=10, density=True) feature_type = 'num' p_value = ks_2samp(reference_data[feature_name], production_data[feature_name])[1] distr_sim_test = "Detected" if p_value < 0.05 else "Not Detected" drifted_fetures_count += 1 if p_value < 0.05 else 0 params_data.append({ "details": { "parts": [{ "title": "Data drift", "id": feature_name + "_drift" }, { "title": "Data distribution", "id": feature_name + "_distr" }], "insights": [] }, "f1": feature_name, "f6": feature_type, "f3": { "x": list(ref_small_hist[1]), "y": list(ref_small_hist[0]) }, "f4": { "x": list(prod_small_hist[1]), "y": list(prod_small_hist[0]) }, "f2": distr_sim_test, "f5": round(p_value, 6) }) for feature_name in cat_feature_names: #feature_names: prod_small_hist = np.histogram( production_data[feature_name][np.isfinite( production_data[feature_name])], bins=10, density=True) ref_small_hist = np.histogram( reference_data[feature_name][np.isfinite( reference_data[feature_name])], bins=10, density=True) feature_type = 'cat' #p_value = ks_2samp(reference_data[feature_name], production_data[feature_name])[1] #CHI2 to be implemented for cases with different categories ref_feature_vc = reference_data[feature_name][np.isfinite( reference_data[feature_name])].value_counts() prod_feature_vc = production_data[feature_name][np.isfinite( production_data[feature_name])].value_counts() keys = set( list(reference_data[feature_name][np.isfinite( reference_data[feature_name])].unique()) + list(production_data[feature_name][np.isfinite( production_data[feature_name])].unique())) ref_feature_dict = dict.fromkeys(keys, 0) for key, item in zip(ref_feature_vc.index, ref_feature_vc.values): ref_feature_dict[key] = item prod_feature_dict = dict.fromkeys(keys, 0) for key, item in zip(prod_feature_vc.index, prod_feature_vc.values): prod_feature_dict[key] = item f_exp = [value[1] for value in sorted(ref_feature_dict.items())] f_obs = [value[1] for value in sorted(prod_feature_dict.items())] p_value = chisquare(f_exp, f_obs)[1] distr_sim_test = "Detected" if p_value < 0.05 else "Not Detected" drifted_fetures_count += 1 if p_value < 0.05 else 0 params_data.append({ "details": { "parts": [{ "title": "Data drift", "id": feature_name + "_drift" }, { "title": "Data distribution", "id": feature_name + "_distr" }], "insights": [] }, "f1": feature_name, "f6": feature_type, "f3": { "x": list(ref_small_hist[1]), "y": list(ref_small_hist[0]) }, "f4": { "x": list(prod_small_hist[1]), "y": list(prod_small_hist[0]) }, "f2": distr_sim_test, "f5": round(p_value, 6) }) #set additionalGraphs additional_graphs_data = [] for feature_name in num_feature_names + cat_feature_names: #feature_names: #plot distributions fig = go.Figure() fig.add_trace( go.Histogram(x=reference_data[feature_name], marker_color=grey, opacity=0.6, nbinsx=10, name='Reference', histnorm='probability')) fig.add_trace( go.Histogram(x=production_data[feature_name], marker_color=red, opacity=0.6, nbinsx=10, name='Production', histnorm='probability')) fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), xaxis_title=feature_name, yaxis_title="Share") distr_figure = json.loads(fig.to_json()) #plot drift reference_mean = np.mean(reference_data[feature_name][np.isfinite( reference_data[feature_name])]) reference_std = np.std(reference_data[feature_name][np.isfinite( reference_data[feature_name])], ddof=1) x_title = "Timestamp" if date_column else "Index" fig = go.Figure() fig.add_trace( go.Scatter(x=production_data[date_column] if date_column else production_data.index, y=production_data[feature_name], mode='markers', name='Production', marker=dict(size=6, color=grey))) fig.update_layout( xaxis_title=x_title, yaxis_title=feature_name, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1), shapes=[ dict( type="rect", # x-reference is assigned to the x-values xref="paper", # y-reference is assigned to the plot paper [0,1] yref="y", x0=0, y0=reference_mean - reference_std, x1=1, y1=reference_mean + reference_std, fillcolor="LightGreen", opacity=0.5, layer="below", line_width=0, ), dict( type="line", name='Reference', xref="paper", yref="y", x0=0, #min(testset_agg_by_date.index), y0=reference_mean, x1=1, #max(testset_agg_by_date.index), y1=reference_mean, line=dict(color="Green", width=3)), ]) drift_figure = json.loads(fig.to_json()) #add distributions data additional_graphs_data.append( AdditionalGraphInfo(feature_name + '_distr', { "data": distr_figure['data'], "layout": distr_figure['layout'] })) #add drift data additional_graphs_data.append( AdditionalGraphInfo(feature_name + '_drift', { "data": drift_figure['data'], "layout": drift_figure['layout'] })) self.wi = BaseWidgetInfo( title="Data Drift: drift detected for " + str(drifted_fetures_count) + " out of " + str(len(num_feature_names) + len(cat_feature_names)) + " features", type="big_table", details="", alertStats=AlertStats(), alerts=[], alertsPosition="row", insights=[], size=2, params={ "rowsPerPage": min(len(num_feature_names) + len(cat_feature_names), 10), "columns": [{ "title": "Feature", "field": "f1" }, { "title": "Type", "field": "f6" }, { "title": "Reference Distribution", "field": "f3", "type": "histogram", "options": { "xField": "x", "yField": "y" } }, { "title": "Production Distribution", "field": "f4", "type": "histogram", "options": { "xField": "x", "yField": "y" } }, { "title": "Data drift", "field": "f2" }, { "title": "P-Value for Similarity Test", "field": "f5", "sort": "asc" }], "data": params_data }, additionalGraphs=additional_graphs_data)
) textbox = widgets.Dropdown( description='City: ', value='SOUTHINGTON', options=drug_info_dataframe['city'].unique().tolist() ) state = widgets.Dropdown(options=list(drug_info_dataframe['state'].unique()), value='CONNECTICUT', description='State:', ) # Assign an empty figure widget with two traces trace1 = go.Histogram(x=drug_info_dataframe['avg_drug_score'], opacity=0.75, name='Average Drug Score') trace2 = go.Histogram(x=drug_info_dataframe['count'], opacity=0.75, name='Death Count') g = go.FigureWidget(data=[trace1, trace2], layout=go.Layout( title=dict( text='US Drug Death' ), barmode='overlay' )) def validate(): if state.value in drug_info_dataframe['state'].unique() and textbox.value in drug_info_dataframe['city'].unique() and \ use_age_group.value in drug_info_dataframe['age_bin'].unique(): return True else: return False
fig11 = go.Figure(data=data, layout=layout) #iplot(fig11, filename='basic-bar') graphJSON3 = json.dumps(fig11, cls=plotly.utils.PlotlyJSONEncoder) #plt.savefig('static/marital_count.png') #----------------------------- # In[16]: # Distribution of Balances by Marital status single = df['balance'].loc[df['marital'] == 'single'].values married = df['balance'].loc[df['marital'] == 'married'].values divorced = df['balance'].loc[df['marital'] == 'divorced'].values single_dist = go.Histogram(x=single, histnorm='density', name='single', marker=dict(color='#6E6E6E')) married_dist = go.Histogram(x=married, histnorm='density', name='married', marker=dict(color='#2E9AFE')) divorced_dist = go.Histogram(x=divorced, histnorm='density', name='divorced', marker=dict(color='#FA5858')) fig4 = tools.make_subplots(rows=3, print_grid=False) fig4.append_trace(single_dist, 1, 1)
def update_output_div(graph_type, box_type): if graph_type == 'cars_by_fuel_type': values = [] for ftype in obbey["Fuel Type"].unique(): count = obbey[(obbey["Fuel Type"] == ftype)]["Fuel Type"].count() values.append(count) fig = { "data": [{ "values": values, "labels": obbey["Fuel Type"].unique(), "hoverinfo": "label+percent", "type": "pie" }], "layout": { "title": "Cars by Fuel Type", } } elif graph_type == 'miles_per_gallon_city_highway': if box_type == 'miles_per_gallon_highway': data = [] for mtype in obbey.Manufacturer.unique(): trace = go.Box( x=obbey[(obbey.Manufacturer == mtype)].Manufacturer, y=obbey[(obbey.Manufacturer == mtype )]["Miles-per-gallon in highway"], name=mtype, ) data.append(trace) layout = go.Layout( title='Miles per gallon in Highway', showlegend=True, yaxis=dict(title="Miles per gallon"), xaxis=dict(title=""), ) fig = dict(data=data, layout=layout) else: data = [] for mtype in obbey.Manufacturer.unique(): trace = go.Box( x=obbey[(obbey.Manufacturer == mtype)].Manufacturer, y=obbey[(obbey.Manufacturer == mtype )]["Miles-per-gallon in city"], name=mtype, ) data.append(trace) layout = go.Layout( title='Miles per gallon in City', showlegend=True, yaxis=dict(title="Miles per gallon"), xaxis=dict(title=""), ) fig = dict(data=data, layout=layout) else: trace1 = go.Histogram( x=obbey[(obbey.Driveline == "All-wheel drive")].Horsepower, name='All-wheel drive') trace2 = go.Histogram( x=obbey[(obbey.Driveline == "Front-wheel drive")].Horsepower, name='Front-wheel drive') trace3 = go.Histogram( x=obbey[(obbey.Driveline == "Rear-wheel drive")].Horsepower, name='Rear-wheel drive') trace4 = go.Histogram( x=obbey[(obbey.Driveline == "Four-wheel drive")].Horsepower, name='Four-wheel drive') data = [trace1, trace2, trace3, trace4] layout = go.Layout(title='Horsepower by driveline') fig = go.Figure(data=data, layout=layout) return fig