def update_graph2(data): dff2 = pd.read_json(data, orient='split') return px.scatter(dff2, x="material", y="proportion", color="material")
y=selected_value, title=f"{selected_country}の{selected_value}") handson5 = html.Div( [ dcc.Graph( id="my_graph5", figure=px.scatter( gapminder, x="gdpPercap", y="lifeExp", size="pop", animation_frame="year", log_x=True, range_y=[20, 90], color="continent", size_max=70, hover_data=["country"], template={"layout": { "dragmode": "select" }}, ), ), html.H1(id="show_text5"), ], style={"margin": "5%"}, ) @app.callback(Output("show_text5", "children"),
location = '/Users/Derrick-Vlad-/Desktop/Personal Projects/2019/Web Scraping/Monetary Authority Singapore (MAS)/Attempt 3/BackTests/CompileBackTest_4/Finalized_2/Plot_Data_3.csv' typesss2 = pd.read_csv(location) typesss2 = typesss2[(typesss2['Gross Claims'].astype(float) > min_all) & (typesss2['Gross Premiums'].astype(float) > min_all) & (typesss2['Operating Result'].astype(float) > min_all)] typesss2 = typesss2[(typesss2['Gross Claims'].astype(float) < max_Claims) & (typesss2['Gross Premiums'].astype(float) < max_Premiums)] continents = ['Fire', 'Cargo & Hull', 'Work Injury', 'Misc'] typesss2 = typesss2[~typesss2.Coverage.isin(continents)] typesss2 = typesss2.replace(r'^\s*$', np.nan, regex=True) typesss2.dropna(inplace=True) """Plotter""" fig5 = px.scatter( typesss2, x="Gross Claims", y="Gross Premiums", animation_frame="Year", animation_group="Insurer Code", size="Operating Result", color="Coverage", hover_name="Insurer Code", log_x=log_scaling, size_max=60, title= 'Evolution: 14-Years of Private General Insurance Data in SG (Logarithmic Scale)' #range_x=[100, 1000000], range_y=[-50000, 250000], ) plotly.offline.plot(fig5, filename='file6.html')
merged[["temp","humidity","dew_point"]] = imp_values merged.iloc[null_indexes] # split meters into two groups meters = list(merged.METER_ID.unique()) group1 = merged[(merged.METER_ID == meters[0]) | (merged.METER_ID == meters[1])] group2 = merged[(merged.METER_ID != meters[0]) & (merged.METER_ID != meters[1])] """# EDA""" subset = merged[["READ_VALUE","temp","humidity","dew_point"]] sns.heatmap(subset.corr(),annot=True) plt.title("Correlation Heatmap") fig = px.scatter(merged, x="temp", y="READ_VALUE", color="humidity", width=2000, height=500, title="Temp Against Meter Value", facet_col="METER_ID") fig.show() fig = px.scatter(merged, x="humidity", y="READ_VALUE", color="temp", width=2000, height=500, title="Humidity Against Meter Value", facet_col="METER_ID") fig.show() fig = px.scatter(merged, x="dew_point", y="READ_VALUE", color="humidity", width=2000, height=500, title="Dew Point Against Meter Value", facet_col="METER_ID") fig.show() fig = px.line(merged,x="date",y="READ_VALUE",width=2000, height=500, color="METER_ID",title="Meter Reading Trend", facet_col="METER_ID") fig.update_xaxes(tickangle=45) fig.show() """# Stationary Data Check"""
nba_df = nba_df.astype({'draft_year': 'float64'}) nba_df.rename(columns={ 'draft_year': 'Draft Year', 'career_WS': 'Career Win Shares', 'career_earnings': 'Career Earnings', 'name': 'Name' }, inplace=True) print(nba_df.columns) import plotly.express as px fig = px.scatter( nba_df, x="Career Win Shares", y="Career Earnings", color='Draft Year', title="Win Shares by Salary in NBA History", hover_data=['Career Win Shares', 'Career Earnings', 'Name', 'Draft Year'], color_continuous_scale=px.colors.sequential.Jet) fig.update_layout( title="Win Shares by Salary in NBA History", xaxis_title="Career Win Shares", yaxis_title="Career Earnings ($)", height=500, font=dict(family="Courier New, monospace", size=14, color="Black"), margin=dict(l=150, r=60, t=60, b=60), paper_bgcolor="LightSteelBlue", ) fig.update_traces(marker=dict(size=12,
import plotly.express as px df = px.data.iris() fig = px.scatter(df, x="sepal_length", y="sepal_width", width=800, height=500, title='Gráfico de Dispersão') # In jupyter can use: # fig.show() fig.write_html('first_figure.html', auto_open=True)
###State pivot pivot_state = df.pivot_table( index="state", values=["projectAmountMillions", "numberOfProperties"], aggfunc="sum") pivot_state.reset_index(inplace=True) pivot_state["region"] = pivot_state["state"].apply( lambda x: state_to_region(x)) pivot_state = pivot_state[pivot_state["region"] != "skip"] #removing regions not included in 50 states #pairplot for states and amount funded/properties covered ax = sns.pairplot(pivot_state.iloc[:, 1:], kind="scatter", hue="region") ax.fig.suptitle( "Total Project Amount and Number of Properties Covered In FEMA Projects by US State" ) ax.fig.tight_layout() ax.fig.subplots_adjust(top=0.95) # Reduce plot to make room plt.show() ###plotly express scatter, for dash app later import plotly.express as px from plotly.offline import plot available_states = pivot_state['state'].unique() fig_px = px.scatter(pivot_state, x="numberOfProperties", y="projectAmountMillions", hover_data=['state', 'region'], color="region") # plot(fig_px,filename='fema_state_propnumber_amountspent_scatter.html')
import pandas as pd import plotly.express as px df=pd.read_csv("corona.csv") fig=px.scatter(df,x="date",y="cases",color="country",title="corona cases per date") fig.show()
gss_clean['men_bettersuited'] = gss_clean['men_bettersuited'].cat.reorder_categories(['agree', 'disagree']) gss_clean['child_suffer'] = gss_clean['child_suffer'].astype('category') gss_clean['child_suffer'] = gss_clean['child_suffer'].cat.reorder_categories(['strongly agree', 'agree', 'disagree', 'strongly disagree']) gss_clean['men_overwork'] = gss_clean['men_overwork'].astype('category') gss_clean['men_overwork'] = gss_clean['men_overwork'].cat.reorder_categories(['strongly agree', 'agree', 'neither agree nor disagree', 'disagree', 'strongly disagree']) scat = px.scatter(gss_clean, x='job_prestige', y='income', color='sex', hover_data=['education', 'socioeconomic_index'], trendline='ols', labels={'job_prestige':'Occupational Prestige', 'income':'Income'}) boxes = pd.melt(gss_clean, id_vars=['sex'], value_vars=['income', 'job_prestige']) fig1 = px.box(boxes.loc[boxes['variable']=='income'], x='value', y='sex', color='sex', labels={'value':'Income', 'sex':''}) fig1.update_layout(showlegend=False) fig2 = px.box(boxes.loc[boxes['variable']=='job_prestige'], x='value', y='sex', color='sex', labels={'value':'Occupational Prestige', 'sex':''}) fig2.update_layout(showlegend=False) gss6 = gss_clean[['income', 'sex', 'job_prestige']] gss6['job_prestige'] = pd.cut(gss6.job_prestige, bins=6) gss6 = gss6.dropna() box_grid = px.box(gss6, x='income', y='sex', color='sex', facet_col='job_prestige', facet_col_wrap=2, labels={'income':'Income', 'sex':'Sex', 'job_prestige':'Occupational Prestige'}, color_discrete_map = {'male':'blue', 'female':'red'})
def perform_eda(data): st.title('Data Analysis') st.markdown('## Age Analysis') st.plotly_chart(plot_value_counts_bar(data, 'Age')) st.markdown('''#### Observation: - Most of the movies/shows are targeted to adult audience''') st.markdown('## Rotten Tomatoes Ratings') data['Rotten_Tomatoes_Rounded'] = data['Rotten Tomatoes'].apply(round_fix) st.plotly_chart(plot_value_counts_bar(data, 'Rotten_Tomatoes_Rounded')) st.markdown('''#### Observations: - Most of the content is high rated on streaming platforms''') st.markdown('## IMDB Ratings') data['IMDB_Rounded'] = data.IMDb.apply(round_fix_imdb) st.plotly_chart(plot_value_counts_bar(data, 'IMDB_Rounded')) st.markdown('''#### Observations: - Most of content on streaming platforms has average ratings on IMDB.''') st.markdown('## Highest IMDb Movies/Shows') netflix_count = data[data['IMDB_Rounded']=='Really_Good']['Netflix'].sum() hulu_count = data[data['IMDB_Rounded']=='Really_Good']['Hulu'].sum() disney_count = data[data['IMDB_Rounded']=='Really_Good']['Disney+'].sum() prime_count = data[data['IMDB_Rounded']=='Really_Good']['Prime Video'].sum() indexes = ['Netflix', 'Hulu', 'Disney', 'Amazon Prime'] values = [netflix_count, hulu_count, disney_count, prime_count] fig=px.pie(labels=indexes, values=values,title='Top content on OTT',hover_name=indexes) st.plotly_chart(fig) st.markdown('## Most Popular Genre') temp_data=data.copy() kata, temp_data = apply_encoding(temp_data, ['Genres', 'Country', 'Language'], get_kata=1) base_counts = get_counts(temp_data, 'Genres', kata['Genres']) base_counts = pd.DataFrame(index=base_counts.keys(), data=base_counts.values(), columns=['Counts']) base_counts.sort_values(by='Counts', inplace=True) colors=['#988D90' if i<1000 else '#F00045' for i in base_counts.Counts] fig = px.bar(x=base_counts.index, y=base_counts['Counts'], title='Most Popular Genre',color_discrete_sequence=colors,color=base_counts.index) st.plotly_chart(fig) st.markdown('''#### Observations: - Drama is most popular genre''') st.markdown('## Most Released Content') st.markdown('### Country') base_counts = get_counts(temp_data, 'Country', kata['Country']) base_counts = pd.DataFrame(index=base_counts.keys(), data=base_counts.values(), columns=['Counts']) base_counts.sort_values(by='Counts', ascending=False, inplace=True) fig = px.bar(x=base_counts.index[:10], y=base_counts['Counts'][:10], color=base_counts['Counts'][:10], title='Most Released Content') st.plotly_chart(fig) st.markdown('''#### Observations: - Most released content was in US''') st.markdown('### Language') base_counts = get_counts(temp_data, 'Language', kata['Language']) base_counts = pd.DataFrame(index=base_counts.keys(), data=base_counts.values(), columns=['Counts']) base_counts.sort_values(by='Counts', ascending=False, inplace=True) fig = px.bar(x=base_counts.index[:5], y=base_counts['Counts'][:5], color=base_counts['Counts'][:5], title='Most Released Content: Language') st.plotly_chart(fig) st.markdown('''#### Observations: From the above visualizations we can conclude that: - We can work with few genres with count more than 1000 and rest of the genres can be categorized as others. - It is important to keep countries, but at continent level for better clarity. - Most of the content is in english only. ''') st.markdown('## OTT Platforms') st.markdown('### Content Releases') release_scores = get_ott_counts(temp_data, ['Netflix', 'Hulu', 'Prime Video', 'Disney+'], 'Year') fig = px.scatter( release_scores, x='Year', y='Count', size='Count', color='Platform', title='Content Per OTT Apps released in consecutive years', color_discrete_sequence=['#E50914', '#3DBB3D', '#00A8E1', '#048f70 ']) st.plotly_chart(fig) st.markdown('''#### Observations: - Amazon Prime Video has the most modern as well as old content''') st.markdown('### Top Genres') genres = kata['Genres'].copy() genres.extend(['All']) platform = ['Netflix', 'Hulu', 'Prime Video', 'Disney+', 'All'] temp_data.IMDb=temp_data.IMDb.apply(replaceNAby1) temp_data.IMDb=temp_data.IMDb.astype(float) genre=st.selectbox('Genres',genres) plt_frm=st.selectbox('Platform',platform) st.plotly_chart(plot_genres(genre,plt_frm,temp_data)) keep_genres=pickle.load(open('keep_genres.pickle','rb')) genre_counts = get_counts(eata, 'Genres', keep_genres) genre_counts = get_counts(eata, 'Genres', keep_genres) genre_counts = pd.DataFrame(index=genre_counts.keys(), data=genre_counts.values(), columns=['Counts']) genre_counts.sort_values(by='Counts',inplace=True) cont_counts = get_counts(eata, 'Continent', ['Africa', 'Antarctica', 'Asia', 'Europe', 'North America', 'Oceania', 'South America','NA']) cont_counts = pd.DataFrame(index=cont_counts.keys(), data=cont_counts.values(), columns=['Counts']) cont_counts.sort_values(by='Counts', ascending=False, inplace=True) keep_lang=pickle.load(open('keep_lang.pickle','rb')) lang_counts = get_counts(eata, 'Language',keep_lang) lang_counts = pd.DataFrame(index=lang_counts.keys(), data=lang_counts.values(), columns=['Counts']) lang_counts.sort_values(by='Counts', ascending=False, inplace=True)
This app was made to help answer these questions in a meaningful and usable way. It is simple to use and can give a recommendation backed by data on what the user should buy for their personal treatment. """), dcc.Link(dbc.Button('Find out what you need', color='primary'), href='/predictions') ], md=4, ) gapminder = px.data.gapminder() fig = px.scatter(gapminder.query("year==2007"), x="gdpPercap", y="lifeExp", size="pop", color="continent", hover_name="country", log_x=True, size_max=60) column2 = dbc.Col([ html.Div( html.Img(src=app.get_asset_url('herb.jpg'), style={ 'height': '80%', 'width': '80%' })) ]) layout = dbc.Row([column1, column2])
import csv import plotly.express as px import pandas as pd with open("class2.csv", newline="") as f: data = csv.reader(f) fileData = list(data) fileData.pop(0) newData = [] for i in range(len(fileData)): num = fileData[i][1] newData.append(float(num)) n = len(newData) sum = 0 for i in newData: sum = sum + i mean = sum / n df = pd.read_csv("class2.csv") fig = px.scatter(df, x='Student Number', y="Marks") fig.update_layout(shapes=[dict(type="line", y0=mean, y1=mean, x0=0, x1=n)]) fig.update_yaxes(rangemode="tozero") fig.show() print(mean)
import dash import dash_core_components as dcc import dash_html_components as html import plotly.express as px import pandas as pd if __name__ == "__main__": app = dash.Dash(__name__) df_boston_housing = pd.read_csv('./data/BostonHousing.csv') df_lin_model_params = pd.read_csv('./data/lin-model-params.csv', names=['param', 'coef']) bar_chart = px.bar(df_lin_model_params, x='param', y='coef') histogram = px.histogram(df_boston_housing, x="ptratio") scatter = px.scatter(df_boston_housing, x="age", y="medv") app.layout = html.Div(children=[ dcc.Graph(id='bar', figure=bar_chart), dcc.Graph(id='hist', figure=histogram), dcc.Graph(id='scat', figure=scatter), ]) app.run_server(debug=True)
df_BCG_1 = df.groupby(["Angebotenes Produkt"])[["Gewinn", "Anzahl"]].sum().reset_index() df_BCG_2 = df.groupby(["Angebotenes Produkt" ])["Anzahl"].apply(lambda x: x.sum() / x.count()) df_BCG = df_BCG_1.merge(df_BCG_2, on="Angebotenes Produkt") df_BCG = df_BCG.rename(columns={ "Anzahl_x": "Anzahl", "Anzahl_y": "Kaufwahrscheinlichkeit" }) df_BCG["Kaufwahrscheinlichkeit in %"] = df_BCG["Kaufwahrscheinlichkeit"] * 100 df_BCG["Gewinn pro Verkauf in €"] = df_BCG["Gewinn"] / df_BCG["Anzahl"] # Scatter-Plot erstellen nach den Variablen "Kaufwahrscheinlichkeit in %" und "Gewinn pro Verkauf in €" pro Produkt fig = px.scatter(df_BCG, x=df_BCG["Kaufwahrscheinlichkeit in %"], y=df_BCG["Gewinn pro Verkauf in €"], color="Angebotenes Produkt") # Figure-Element mit den einzelnen Sektionen der BCG-MAtrix über den Scatter-Plot legen, um die Klassifizierung zu visualisieren fig.add_trace( go.Scatter(x=[12.5, 12.5], y=[900, 900], text=["<b>Poor Dogs</b>"], mode="text", showlegend=False)) fig.add_trace( go.Scatter(x=[12.5, 12.5], y=[1900, 1900], text=["<b>Questionmarks</b>"], mode="text", showlegend=False))
# this directory import os dir_name = os.path.join("test", "percy") import plotly.express as px print(px.data.iris.__doc__) px.data.iris().head() # #### Scatter and Line plots import plotly.express as px iris = px.data.iris() fig = px.scatter(iris, x="sepal_width", y="sepal_length") fig.write_html(os.path.join(dir_name, "scatter.html")) import plotly.express as px iris = px.data.iris() fig = px.scatter(iris, x="sepal_width", y="sepal_length", color="species") fig.write_html(os.path.join(dir_name, "scatter_color.html")) import plotly.express as px iris = px.data.iris() fig = px.scatter( iris, x="sepal_width", y="sepal_length",
def predictor_processing( df, predicts, response, response_col, resp_type, resp_mean, response_col_uncoded ): # Predictor loop ######################################## predicts_col = df[df.columns.intersection(predicts)] # Build preliminary results table results_cols = [ "Response", "Predictor Type", "t Score", "p Value", "Regression Plot", "Diff Mean of Response (Unweighted)", "Diff Mean of Response (Weighted)", "Diff Mean Plot", ] results = pd.DataFrame(columns=results_cols, index=predicts) for pred_name, pred_data in predicts_col.iteritems(): # Decide cat or cont ########## pred_string_check = isinstance(pred_data, str) pred_unique_ratio = len(pred_data.unique()) / len(pred_data) if pred_string_check or pred_unique_ratio < 0.05: pred_type = "Categorical" # Encode pred_data = pd.Categorical(pred_data, categories=pred_data.unique()) pred_data, pred_labels = pd.factorize(pred_data) pred_data = pd.DataFrame(pred_data, columns=[pred_name]) pred_data_uncoded = df[pred_name] else: pred_type = "Continuous" pred_data = pred_data.to_frame() # Bind response and predictor together again df_c = pd.concat([response_col, pred_data], axis=1) df_c.columns = [response, pred_name] # Relationship plot and correlations if resp_type == "Categorical" and pred_type == "Categorical": rel_matrix = confusion_matrix(pred_data, response_col) fig_relate = go.Figure( data=go.Heatmap(z=rel_matrix, zmin=0, zmax=rel_matrix.max()) ) fig_relate.update_layout( title=f"Relationship Between {response} and {pred_name}", xaxis_title=pred_name, yaxis_title=response, ) elif resp_type == "Categorical" and pred_type == "Continuous": fig_relate = px.histogram(df_c, x=pred_name, color=response_col_uncoded) fig_relate.update_layout( title=f"Relationship Between {response} and {pred_name}", xaxis_title=pred_name, yaxis_title="count", ) elif resp_type == "Continuous" and pred_type == "Categorical": fig_relate = px.histogram(df_c, x=response, color=pred_data_uncoded) fig_relate.update_layout( title=f"Relationship Between {response} and {pred_name}", xaxis_title=response, yaxis_title="count", ) elif resp_type == "Continuous" and pred_type == "Continuous": fig_relate = px.scatter(y=response_col, x=pred_data, trendline="ols") fig_relate.update_layout( title=f"Relationship Between {response} and {pred_name}", xaxis_title=pred_name, yaxis_title=response, ) response_html = response.replace(" ", "") pred_name_html = pred_name.replace(" ", "") relate_file_save = f"./hw4_plots/{response_html}_{pred_name_html}_relate.html" relate_file_open = f"./{response_html}_{pred_name_html}_relate.html" fig_relate.write_html(file=relate_file_save, include_plotlyjs="cdn") relate_link = ( "<a target='blank' href=" + relate_file_open + "><div>" + pred_type + "</div></a>" ) # Regression ########## if resp_type == "Categorical": reg_model = sm.Logit(response_col, pred_data, missing="drop") else: reg_model = sm.OLS(response_col, pred_data, missing="drop") # Fit model reg_model_fitted = reg_model.fit() # Get t val and p score t_score = round(reg_model_fitted.tvalues[0], 6) p_value = "{:.6e}".format(reg_model_fitted.pvalues[0]) # Plot regression reg_fig = px.scatter(y=df_c[response], x=df_c[pred_name], trendline="ols") reg_fig.write_html( file=f"./hw4_plots/{pred_name}_regression.html", include_plotlyjs="cdn" ) reg_fig.update_layout( title=f"Regression: {response} on {pred_name}", xaxis_title=pred_name, yaxis_title=response, ) reg_file_save = f"./hw4_plots/{response_html}_{pred_name_html}_reg.html" reg_file_open = f"./{response_html}_{pred_name_html}_reg.html" reg_fig.write_html(file=reg_file_save, include_plotlyjs="cdn") reg_link = "<a target='blank' href=" + reg_file_open + "><div>Plot</div></a>" # Diff with mean of response (unweighted and weighted) ########## # Get user input on number of mean diff bins to use if pred_type == "Continuous": bin_n = "" while isinstance(bin_n, int) is False or bin_n == "": bin_n = input( f"\nEnter number of bins to use for difference with mean of response for {pred_name}:\n" ) try: bin_n = int(bin_n) except Exception: continue else: pass df_c["bin_labels"] = pd.cut(df_c[pred_name], bins=bin_n, labels=False) binned_means = df_c.groupby("bin_labels").agg( {response: ["mean", "count"], pred_name: "mean"} ) else: df_c.columns = [f"{response}", f"{pred_name}"] binned_means = df_c.groupby(pred_data.iloc[:, 0]).agg( {response: ["mean", "count"], pred_name: "mean"} ) bin_n = len(np.unique(pred_data.iloc[:, 0].values)) binned_means.columns = [f"{response} mean", "count", f"{pred_name} mean"] # Binning and mean squared difference calc binned_means["weight"] = binned_means["count"] / binned_means["count"].sum() binned_means["mean_sq_diff"] = ( binned_means[f"{response} mean"].subtract(resp_mean, fill_value=0) ** 2 ) binned_means["mean_sq_diff_w"] = ( binned_means["weight"] * binned_means["mean_sq_diff"] ) # Diff with mean of response stat calculations (weighted and unweighted) msd_uw = binned_means["mean_sq_diff"].sum() * (1 / bin_n) msd_w = binned_means["mean_sq_diff_w"].sum() # Diff with mean of response plots fig_diff = make_subplots(specs=[[{"secondary_y": True}]]) fig_diff.add_trace( go.Bar( x=binned_means[f"{pred_name} mean"], y=binned_means["count"], name="Observations", ) ) fig_diff.add_trace( go.Scatter( x=binned_means[f"{pred_name} mean"], y=binned_means[f"{response} mean"], line=dict(color="red"), name=f"Relationship with {response}", ), secondary_y=True, ) fig_diff.update_layout( title_text=f"Difference in Mean Response: {response} and {pred_name}", ) fig_diff.update_xaxes(title_text=f"{pred_name} (binned)") fig_diff.update_yaxes(title_text="count", secondary_y=False) fig_diff.update_yaxes(title_text=f"{response}", secondary_y=True) fig_diff_file_save = f"./hw4_plots/{response_html}_{pred_name_html}_diff.html" fig_diff_file_open = f"./{response_html}_{pred_name_html}_diff.html" fig_diff.write_html(file=fig_diff_file_save, include_plotlyjs="cdn") diff_link = ( "<a target='blank' href=" + fig_diff_file_open + "><div>Plot</div></a>" ) # Create processed df if pred_name == predicts_col.columns[0]: pred_proc = pd.concat([response_col, pred_data], axis=1) else: pred_proc = pd.concat([pred_proc, pred_data], axis=1) # Add to results table results.loc[pred_name] = pd.Series( { "Response": response, "Predictor Type": relate_link, "t Score": t_score, "p Value": p_value, "Regression Plot": reg_link, "Diff Mean of Response (Unweighted)": msd_uw, "Diff Mean of Response (Weighted)": msd_w, "Diff Mean Plot": diff_link, } ) return pred_proc, results
import plotly.express as px tips = px.data.tips() # tips dataset can be loaded from plotly # data_canada = px.data.gapminder().query("country == 'Canada'") import pandas as pd tips.to_csv('/Users/vivekparashar/Downloads/tips.csv') import altair as alt import statsmodels.api as sm # Dot plot shows changes between two (or more) points in time or between two (or more) conditions. t = tips.groupby(['day','sex']).mean()[['total_bill']].reset_index() px.scatter(t, x='day', y='total_bill', color='sex', title='Average bill by gender by day', labels={'day':'Day of the week', 'total_bill':'Average Bill in $'}) # Bar (vertical and horizontal) tips.groupby('sex').mean()['total_bill'].plot(kind='bar') # using pandas plot tips.groupby('sex').mean()['tip'].plot(kind='barh') t = tips.groupby(['day','sex']).mean()[['total_bill']].reset_index() px.bar(t, x='day', y='total_bill') # Using plotly px.bar(t, x='total_bill', y="day", orientation='h') # Stacked Bar - need to unstack one of the levels and fill na values tips.groupby(['day','sex']).mean()[['total_bill']]\ .unstack('sex').fillna(0)\ .plot(kind='bar', stacked=True) # using pandas plot; kind='barh' for horizontal plot
import pandas as pd import csv import plotly.express as px df = pd.read_csv("data.csv") mean = df.groupby(["student_id", "level"], as_index=False)["attempt"].mean() fig = px.scatter(mean, x="student_id", y="level", size="attempt", color="attempt") fig.show()
df.washer = df.washer.map(yes_no_dict) df.cable_tv = df.cable_tv.map(yes_no_dict) df.kitchen = df.kitchen.map(yes_no_dict) # rename columns df.rename(columns={'neighborhood': 'Neighborhood', 'room_type': 'Room Type', 'accommodates': 'Accommodates', 'bedrooms': 'Bedrooms', 'number_of_reviews': 'Number of Reviews', 'wifi': 'Wifi', 'cable_tv': 'Cable TV', 'washer': 'Washer', 'kitchen': 'Kitchen', 'price': 'Price (US Dollars)'}, inplace=True) # remove outliers df = df[df['Price (US Dollars)'] < 501] return df # clean data df = clean_data(df) #show data fig = px.scatter(df, x='Neighborhood', y='Price (US Dollars)' ,size='Accommodates' ,hover_data=['Bedrooms', 'Wifi', 'Cable TV', 'Kitchen', 'Washer', 'Number of Reviews'] ,color= 'Room Type') fig.update_layout(template='plotly_white') fig.update_layout(title='How much should you charge in a Berlin neighborhood?') # fig.show() # display it locally # write to html pio.write_html(fig, file='templates/visss.html', auto_open=True)
import dash import dash_core_components as dcc import dash_html_components as html import plotly.express as px import pandas as pd external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) df = pd.read_csv( 'https://gist.githubusercontent.com/chriddyp/5d1ea79569ed194d432e56108a04d188/raw/a9f9e8076b837d541398e999dcbac2b2826a81f8/gdp-life-exp-2007.csv' ) fig = px.scatter(df, x='gdp per capita', y='life expectancy', size='population', color='continent', hover_name='country', log_x=True, size_max=60) app.layout = html.Div([dcc.Graph(id='life-exp-vs-gdp', figure=fig)]) if __name__ == '__main__': app.run_server(debug=True)
from dash.dependencies import Input, Output import json gapminder = px.data.gapminder() app = dash.Dash(__name__) app.layout = html.Div([ html.H1("Gapminder Data"), dcc.Graph(id="my_graph", figure=px.scatter(gapminder, x="gdpPercap", y="lifeExp", size="pop", log_x=True, log_y=True, color="continent", hover_data=["country"], animation_frame="year", size_max=80)), dcc.Graph(id="show_data") ]) @app.callback(Output("show_data", "figure"), [Input("my_graph", "clickData")]) def update_data(hoverData): if hoverData is None: jp_gap = gapminder[gapminder.country.isin(["Japan"])] return px.line(jp_gap, x="year", y="gdpPercap",
import csv with open("class1.csv", newline="") as f: reader = csv.reader(f) data = list(reader) data.pop(0) totalmarks = 0 n = len(data) for i in data: totalmarks += float(i[1]) mean = totalmarks / n print(mean) import pandas as pd import plotly.express as px df = pd.read_csv("class1.csv") fig = px.scatter(df, x="Student Number", y="Marks") fig.update_layout(shapes=[dict(type="line", y0=mean, y1=mean, x0=0, x1=n)]) fig.show()
date_last = df.index[-1] df_simu = mymodule.mysimu(df, previous_days, money_ini, bitcoin_ini, prop_ini) df_simu['Relative Closing Price'] = df_simu['Closing Price'] / df_simu[ 'Closing Price'][0] df_simu['Relative Total Worth'] = df_simu['Total Worth'] / df_simu[ 'Total Worth'][0] df_simu['Relative Time'] = [ x / (len(df_simu.index) - 1) for x in [*range(len(df_simu.index))] ] fig2 = px.scatter( df_simu, x='Relative Closing Price', y='Relative Total Worth', color='Relative Time', color_continuous_scale=px.colors.sequential.Viridis, title='Performance against "Buy and Hold" across selected period.') line = df_simu['Relative Closing Price'] if df_simu[ 'Relative Closing Price'].max() < df_simu['Relative Total Worth'].max( ) else df_simu['Relative Total Worth'] fig2.add_scatter(x=line, y=line, mode='lines', opacity=0.5, name='Unitary reference') fig2.update(layout_showlegend=False) app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP]) server = app.server
def update_graph(subsidio_type_value, ayuda_type_value, xaxis_type, yaxis_type): # get the data data_key_name = "%s_%s" % (subsidio_type_value, ayuda_type_value) precalculated_data_bytes = redis.get("precalculated_data") precalculated_data_str = precalculated_data_bytes.decode("utf-8").replace( "'", '"') precalculated_data = json.loads(precalculated_data_str) if data_key_name in precalculated_data: data = precalculated_data[data_key_name] else: data = get_data_for_graphic(subsidio_type_value, ayuda_type_value) new_precalculated_data = {} new_precalculated_data[data_key_name] = { "data": data, "precalculation_date": str(datetime.datetime.now()) } precalculated_data = {**precalculated_data, **new_precalculated_data} redis.set("precalculated_data", str(precalculated_data)) data = precalculated_data[data_key_name] precalculation_date = datetime.datetime.strptime( data["precalculation_date"], '%Y-%m-%d %H:%M:%S.%f') # get corresponding labels x_axis_label = [ x_option["label"] for x_option in available_x_axis if x_option["value"] == subsidio_type_value ][0] y_axis_label = [ y_option["label"] for y_option in available_y_axis if y_option["value"] == ayuda_type_value ][0] # create graphic df = pd.DataFrame(data["data"]) fig = px.scatter(df, x=x_axis_label, y=y_axis_label, title="Datos actualizados al %s" % precalculation_date.strftime("%d/%m/%Y, a las %H:%M")) # linear regression if xaxis_type == 'Lineal' and yaxis_type == 'Lineal': df['regression'] = sm.OLS(df[y_axis_label], sm.add_constant( df[x_axis_label])).fit().fittedvalues fig.add_trace( go.Scatter(name='Regresión lineal', x=df[x_axis_label], y=df['regression'], mode='lines')) fig.update_xaxes(type='linear' if xaxis_type == 'Lineal' else 'log') fig.update_yaxes(type='linear' if yaxis_type == 'Lineal' else 'log') fig = go.Figure(fig) fig.update_traces( marker_size=10, marker_color='#3fa652', ) return fig, None
import plotly.express as px import csv with open("cups of coffee vs hours of sleep.csv") as csv_file: df = csv.DictReader(csv_file) fig = px.scatter(df, x="Coffee in ml", y="sleep in hours") fig.show() # df = pd.read_csv("Teacher refrence\data.csv") # fig = px.scatter(df, x="Population", y="Per capita", # size="Percentage",color="Country", # size_max=60) # fig.show()
dcc.Dropdown( id="event-dd", options=[{ "label": e, "value": e } for e in ["all", *touch_df["type"].unique()]], ), html.Div( children=[ html.Div( children=[ dcc.Graph( id="event-graph", figure=px.scatter(touch_df, x="x", y="y", color="team", hover_data=["type"], render_mode="svg"), ) ], className="six columns", ), html.Div( children=[ dcc.Graph( id="stat-graph", figure=px.bar( stat_df.pipe( lambda df: df.groupby("stat")["value"].sum(). rename("sum").reset_index().merge(df)).assign( rel=lambda df: df["value"] / df["sum"]),
# In[243]: figb = px.histogram(dt, x="Player", template="plotly_dark", title="How many times has each player been mentioned in the Top 1000 posts this year?") figb.show() figb.write_html("FigB.html") # In[253]: figc = px.scatter(dt, x = "Upvotes", y = "Player", hover_name = "Title", color = "Awards", template="plotly_dark", color_continuous_scale=["blue", "yellow", "purple", "red"], title="Distribution of Upvotes on Posts about each Player") figc.show() figc.write_html("FigC.html") # In[245]: figd = px.scatter(dt, x = "Comments", y = "Player", hover_name = "Title", color = "Awards", template="plotly_dark", color_continuous_scale=["blue", "yellow", "purple", "red"], title="Distribution of Comments on Posts about each Player") figd.show() figd.write_html("FigD.html")
import dash_core_components as dcc import dash_html_components as html import plotly.express as px import plotly.graph_objects as go import pandas as pd df = pd.read_sql_table("dataset_Titanic_Dataset", db.engine) df["Pclass"] = df["Pclass"].astype(str) fig_scatter = px.scatter( df, x="Fare", y="Age", size="SibSp", color="Pclass", hover_name="Name", hover_data=["Sex", "Survived"], category_orders={"Pclass": ["1", "2", "3"]}, log_x=False, size_max=60, ) Fare = df["Fare"] Age = df["Age"] fig_markers = go.Figure() # Add traces fig_markers.add_trace(go.Scatter(x=Fare, y=Age, mode="markers", name="markers")) fig_markers.add_trace( go.Scatter(x=Fare, y=Age, mode="lines+markers", name="lines+markers")
def plotFigure(datapath): with open(datapath) as csv_file: df = csv.DictReader(csv_file) fig = px.scatter(df, x="Days Present", y="Marks In Percentage") fig.show()
def update_graph1(data, tab): if tab != 'tab-gen': return None dff = pd.read_json(data, orient='split') return px.scatter(dff, x="stat_value", y="Overall", color="Position")