def generate_wspd_vs_vspd(df, df_dropna): """Generates vessel speed and wind speed density plot.""" fig = None if not _check_wind_outages(df, df_dropna): fig = px.density_contour(df_dropna, x="VSPD kn", y="WSPD mph") fig.update_traces(contours_coloring="fill", colorscale="blues") fig.update_layout(xaxis_title_text="VSPD kn", title="<b>Vessel and Wind Speed Density Plot</b>" + "<br>" + "VSPD-WSPD Correlation: " + str(round(df_dropna.loc[:, ("VSPD kn", "WSPD mph")] .corr().iloc[0][1], 2)), hoverlabel=dict(bgcolor="white", font_size=13), width=875, height=600, plot_bgcolor="#F1F1F1", font=dict(size=12), titlefont=dict(size=14), margin=dict(t=100)) fig.add_shape(type="line", x0=10, y0=0, x1=10, y1=1, xref="x", yref="paper", line=dict(color="red", dash="solid", width=1.5)) fig.add_annotation(text="Speed Limit", showarrow=False, textangle=90, font=dict(color="red"), xref="x", x=10.15, yref="paper", y=1, hovertext="10 kn") else: fig = px.density_contour(pd.DataFrame({"WSPD mph":[], "VSPD kn":[]}), x="VSPD kn", y="WSPD mph") fig.add_annotation(text="Major Wind Outage<br>" + str(round(100 - len(df_dropna) / len(df) * 100, 2)) + "% of Data Missing", showarrow=False, textangle=0, font=dict(color="black", size=20), xref="paper", x=0.5, yref="paper", y=0.5) fig.update_layout(title="<b>Vessel and Wind Speed Density Plot</b>", width=875, height=600, plot_bgcolor="#F1F1F1", font=dict(size=12), titlefont=dict(size=14)) return fig
def build_graphBB1(dff,x_axis, y_axis,mode,trendline, marginal_sel,color,facet): if marginal_sel == "None": marginal_sel = None if color == "None": color = None if facet == "None": facet = None if trendline == "Ordinary Least Squares Regression": trendline = 'ols' elif trendline == "Locally Weighted Smoothing": trendline = 'lowess' fig = go.Figure() if mode == "Scatter": fig = px.scatter(dff, x=x_axis, y=y_axis, color=color, facet_col=facet, facet_col_wrap=3, marginal_y=marginal_sel, marginal_x=marginal_sel, trendline="ols") if mode == "Heat": fig = px.density_heatmap(dff, x=x_axis, y=y_axis, marginal_x=marginal_sel, marginal_y=marginal_sel) if mode == "Density": #fig = px.density_contour(df, x="total_bill", y="tip", marginal_x=marginal_x, marginal_y=marginal_y) fig = px.density_contour(dff, x=x_axis, y=y_axis, color=color, marginal_x=marginal_sel, marginal_y=marginal_sel, trendline=trendline) if mode == "Density Fill": fig = px.density_contour(dff, x=x_axis, y=y_axis) fig.update_traces(contours_coloring="fill", contours_showlabels = True) fig.update_layout( #margin=dict(l=0, r=0, t=0, b=0), #paper_bgcolor="lightcyan", #plot_bgcolor='gainsboro' #gainsboro, lightsteelblue lightsalmon lightgreen lightpink lightcyan lightblue black ) graph = dcc.Graph(id="scatterB", figure=fig) return graph
def plot_data_density(self): ''' Visualize reduced data in a 1dim, 2dim or 3dim scatter plot. If the panda data frame contains "Classification" as one column, the plots are labeled, otherwise not. ''' # If reduced data is just 1 dimensional if self.d == 1: if self.classification: fig = px.density_contour(self.pd_data_frame, x=self.features[0], y=np.zeros(self.n), color='Classification', title='Density Contour') else: fig = px.density_contour(self.pd_data_frame, x=self.features[0], y=np.zeros(self.n), title='Density Contour') else: if self.classification: fig = px.density_contour(self.pd_data_frame, x=self.features[0], y=self.features[1], color=self.column_name, title='Density Contour') else: fig = px.density_contour(self.pd_data_frame, x=self.features[0], y=self.features[1], color=self.column_name, title='Density Contour') return fig
def test_render_mode(): df = px.data.gapminder() df2007 = df.query("year == 2007") fig = px.scatter(df2007, x="gdpPercap", y="lifeExp", trendline="ols") assert fig.data[0].type == "scatter" assert fig.data[1].type == "scatter" fig = px.scatter(df2007, x="gdpPercap", y="lifeExp", trendline="ols", render_mode="webgl") assert fig.data[0].type == "scattergl" assert fig.data[1].type == "scattergl" fig = px.scatter(df, x="gdpPercap", y="lifeExp", trendline="ols") assert fig.data[0].type == "scattergl" assert fig.data[1].type == "scattergl" fig = px.scatter(df, x="gdpPercap", y="lifeExp", trendline="ols", render_mode="svg") assert fig.data[0].type == "scatter" assert fig.data[1].type == "scatter" fig = px.density_contour(df, x="gdpPercap", y="lifeExp", trendline="ols") assert fig.data[0].type == "histogram2dcontour" assert fig.data[1].type == "scatter"
def kdemap(num): fig = px.density_contour(df, x=num, y="Attrition_Flag", title=f"Contour Map for {num}") fig.update_traces(contours_coloring="fill", contours_showlabels=True) return fig
def plot_da_stats(self): """Computes, prints & plots useful statistics about dissemination areas.""" import plotly.express as px df = pd.DataFrame(list(self.dauid_map.values())) fig = px.density_contour( df, x="area", y="pop", marginal_x="histogram", marginal_y="histogram", range_x=[0, 2000], range_y=[0, 2000], ) fig.data[0].update(contours_coloring="fill", contours_showlabels=True) fig.show() below_100_pops = [ m for m in self.dauid_map.values() if m["pop"] <= 100 ] below_200_pops = [ m for m in self.dauid_map.values() if m["pop"] <= 200 ] print(f"DA count: {len(self.dauid_map)}") print( f"\tDAs below 100 pop: {len(below_100_pops)} (population sum = {sum([int(m['pop']) for m in below_100_pops])})" ) print( f"\tDAs below 200 pop: {len(below_200_pops)} (population sum = {sum([int(m['pop']) for m in below_200_pops])})" )
def make_metric_plot(line='K40', pareto='Product', marginal='histogram'): plot = oee.loc[oee['Line'] == line] plot = plot.sort_values('Thickness Material A') plot['Thickness Material A'] = pd.to_numeric(plot['Thickness Material A']) if marginal == 'none': fig = px.density_contour(plot, x='Rate', y='Yield', color=pareto) else: fig = px.density_contour(plot, x='Rate', y='Yield', color=pareto, marginal_x=marginal, marginal_y=marginal) fig.update_layout({ "plot_bgcolor": "#F9F9F9", "paper_bgcolor": "#F9F9F9", "height": 750, "title": "{}, Pareto by {}".format(line, pareto), }) return fig
def g7(batch_id, x, y, colorset): input_model = models.BatchInput(batch_id) df = input_model.as_pandas_dataframe() fig = px.density_contour(df, x=x, y=y) div = opy.plot(fig, auto_open=False, output_type='div') return div
def update_graph_3(n_clicks, dropdown_value, range_slider_value, check_list_value, radio_items_value): print(n_clicks) print(dropdown_value) print(range_slider_value) print(check_list_value) print(radio_items_value) df = px.data.iris() fig = px.density_contour(df, x='sepal_width', y='sepal_length') return fig
def correlation_densityplots(x_axis, y_axis): # density plots density_legendary = px.density_contour( df_ds, x=x_axis, y=y_axis, color="Legendary", marginal_x="rug", marginal_y="histogram", template=graph_template, height=300, title="Density contour, by legendary", ) density_type = px.density_contour( df_ds, x=x_axis, y=y_axis, color="Type 1", marginal_x="rug", marginal_y="histogram", hover_name="Name", template=graph_template, height=300, title="Density contour, by Types", ) density_generation = px.density_contour( df_ds, x=x_axis, y=y_axis, color="Generation", marginal_x="rug", marginal_y="histogram", hover_name="Name", template=graph_template, height=300, title="Density contour, by Genrations", ) # return scatter_legendary,scatter_type,scatter_generation,density_legendary,density_type,density_generation,fig return density_legendary, density_type, density_generation
def gdp_suicide_contour(self, df): df = df.sort_values(by=['year']) fig = px.density_contour(df, x=' gdp_for_year ($) ', y='suicides_no', color='year', marginal_y="histogram", animation_frame='year', animation_group='country') return fig
def plotly_demo(ctx: Context, ) -> Records[PlotlyJson]: df = px.data.iris() fig = px.density_contour( df, x="sepal_width", y="sepal_length", color="species", marginal_x="rug", marginal_y="histogram", ) return [fig.to_plotly_json()]
def update_graph_2(n_clicks, dropdown_value): print(n_clicks) print(dropdown_value) fig = px.density_contour(df_survey, x='Years of experience', y='Salary_in_EUR', color="Gender", title='XP Years by Salary & Gender') fig.update_layout({'height': 600}) return fig
def pair_contour(ftr0, ftr1): fig = px.density_contour( df_combine, x=ftr0, y=ftr1, color="color", color_discrete_sequence=[OPPOSITE_COLOR_0, MAIN_COLOR_0], marginal_x="histogram", marginal_y="histogram") fig.update_layout( dict(width=500, height=400, legend=dict(xanchor='left', x=0, y=1.2), margin=dict(l=50, r=0))) return fig
import plotly.express as px gapminder = px.data.gapminder() fig = px.area(gapminder, x="year", y="pop", color="continent", line_group="country") fig.write_html(os.path.join(dir_name, "area.html")) # #### Visualize Distributions import plotly.express as px iris = px.data.iris() fig = px.density_contour(iris, x="sepal_width", y="sepal_length") fig.write_html(os.path.join(dir_name, "density_contour.html")) import plotly.express as px iris = px.data.iris() fig = px.density_contour( iris, x="sepal_width", y="sepal_length", color="species", marginal_x="rug", marginal_y="histogram", ) fig.write_html(os.path.join(dir_name, "density_contour_marginal.html"))
def contour(**kwargs): fig = px.density_contour(**kwargs) fig.update_traces(contours_coloring="fill", contours_showlabels=True) return fig
def __init__(self, df, info): super().__init__() self.df = df self.resize(721, 600) self.setWindowTitle('Chart') self.setWindowIcon(QIcon('resource/icon.ico')) self.setWindowIconText('viuplot') self.view = QWebEngineView(self) if info['chart_type'] == 'scatter': self.fig = px.scatter( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], marginal_x=info['x']['marginal'], error_x=info['x']['error'], error_x_minus=info['x']['error_minus'], range_x=info['x']['range'], facet_col=info['x']['facet'], y=info['y']['column'], log_y=info['y']['log'], marginal_y=info['y']['marginal'], error_y=info['y']['error'], error_y_minus=info['y']['error_minus'], range_y=info['y']['range'], facet_row=info['y']['facet'], trendline=info['trendline'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], text=info['text'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], color=info['color'], symbol=info['symbol'], size=info['size']) elif info['chart_type'] == 'scatter_3d': self.fig = px.scatter_3d( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], error_x=info['x']['error'], error_x_minus=info['x']['error_minus'], range_x=info['x']['range'], y=info['y']['column'], log_y=info['y']['log'], error_y=info['y']['error'], error_y_minus=info['y']['error_minus'], range_y=info['y']['range'], z=info['z']['column'], log_z=info['z']['log'], error_z=info['z']['error'], error_z_minus=info['z']['error_minus'], range_z=info['z']['range'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], text=info['text'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], color=info['color'], symbol=info['symbol'], size=info['size']) elif info['chart_type'] == 'line': self.fig = px.line( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], error_x=info['x']['error'], error_x_minus=info['x']['error_minus'], range_x=info['x']['range'], facet_col=info['x']['facet'], y=info['y']['column'], log_y=info['y']['log'], error_y=info['y']['error'], error_y_minus=info['y']['error_minus'], range_y=info['y']['range'], facet_row=info['y']['facet'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], text=info['text'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], color=info['color']) elif info['chart_type'] == 'bar': self.fig = px.bar( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], error_x=info['x']['error'], error_x_minus=info['x']['error_minus'], range_x=info['x']['range'], facet_col=info['x']['facet'], y=info['y']['column'], log_y=info['y']['log'], error_y=info['y']['error'], error_y_minus=info['y']['error_minus'], range_y=info['y']['range'], facet_row=info['y']['facet'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], text=info['text'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], color=info['color']) elif info['chart_type'] == 'density_contour': self.fig = px.density_contour( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], range_x=info['x']['range'], facet_col=info['x']['facet'], marginal_x=info['x']['marginal'], y=info['y']['column'], log_y=info['y']['log'], range_y=info['y']['range'], facet_row=info['y']['facet'], marginal_y=info['y']['marginal'], z=info['z']['column'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], color=info['color'], histfunc=info['histfunc'], histnorm=info['histnorm'], nbinsx=info['nbinsx'], nbinsy=info['nbinsy']) elif info['chart_type'] == 'density_heatmap': self.fig = px.density_heatmap( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], range_x=info['x']['range'], facet_col=info['x']['facet'], marginal_x=info['x']['marginal'], y=info['y']['column'], log_y=info['y']['log'], range_y=info['y']['range'], facet_row=info['y']['facet'], marginal_y=info['y']['marginal'], z=info['z']['column'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], histfunc=info['histfunc'], histnorm=info['histnorm'], nbinsx=info['nbinsx'], nbinsy=info['nbinsy']) elif info['chart_type'] == 'histogram': self.fig = px.histogram( self.df, title=info['title'], x=info['x']['column'], log_x=info['x']['log'], range_x=info['x']['range'], facet_col=info['x']['facet'], marginal=info['x']['marginal'], y=info['y']['column'], log_y=info['y']['log'], range_y=info['y']['range'], facet_row=info['y']['facet'], animation_frame=info['animation_frame'], animation_group=info['animation_group'], hover_name=info['hover_name'], template=info['template'], #width = info['width'], #height = info['height'], #opacity = info['opacity'], color=info['color'], barmode=info['barmode'], barnorm=info['barnorm'], histfunc=info['histfunc'], histnorm=info['histnorm'], nbins=info['nbins'], cumulative=info['cumulative']) self.file_path = os.path.abspath( os.path.join(os.path.dirname(__file__), "temp.html")) po.plot(self.fig, filename=self.file_path, auto_open=False) url = QUrl(QUrl.fromLocalFile(self.file_path)) self.view.resize(721, 800) self.view.load(url) self.create_menu() self.show()
def main(): st.title("Machine Learning Web Application") menu = [ "Sentiment", "EDA", "DataViz", "Story", "Classification", "Timeseries" ] choice = st.sidebar.selectbox("Select Menu", menu) if choice == "EDA": data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) else: st.write("No Dataset To Show") st.subheader("Exploratory Data Analysis") if data is not None: if st.checkbox("Show Shape"): st.write(df.shape) if st.checkbox("Show Summary"): st.write(df.describe()) if st.checkbox("Correlation Matrix"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() elif choice == "DataViz": data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) else: st.write("No Dataset To Show") st.subheader("Data Visualization") if data is not None: all_columns = df.columns.to_list() if st.checkbox("Pie Chart"): columns_to_plot = st.selectbox("Select 1 Column to Visualize", all_columns) pie_plot = df[columns_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() plot_type = st.selectbox("Select Type of Plot", ["bar", "line", "area", "hist", "box"]) selected_columns = st.multiselect("Select Columns To Visualize", all_columns) if st.button("Generate Plot"): st.success("Generating Custom Plot of {} for {}".format( plot_type, selected_columns)) if plot_type == "bar": cust_data = df[selected_columns] st.bar_chart(cust_data) elif plot_type == "line": cust_data = df[selected_columns] st.line_chart(cust_data) elif plot_type == "area": cust_data = df[selected_columns] st.area_chart(cust_data) elif plot_type: cust_plot = df[selected_columns].plot(kind=plot_type) st.write(cust_plot) st.pyplot() elif choice == "Story": data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) st.subheader("Storytelling with Data") if data.name == "gapminder.csv": fig = px.scatter( df, x="gdpPercap", y="lifeExp", animation_frame="year", animation_group="country", # fig = px.scatter(px.data.gapminder(), x="gdpPercap", y="lifeExp", animation_frame="year", animation_group="country", size="pop", color="country", hover_name="country", log_x=True, size_max=100, range_x=[100, 100000], range_y=[25, 90]) fig.update_layout(height=650) st.write(fig) elif data.name == "stocks.csv": fig = go.Figure() fig.add_trace( go.Scatter(x=df['date'], y=df['AAPL'], name="Apple")) fig.add_trace( go.Scatter(x=df['date'], y=df['AMZN'], name="Amazon")) fig.add_trace( go.Scatter(x=df['date'], y=df['FB'], name="Facebook")) fig.add_trace( go.Scatter(x=df['date'], y=df['GOOG'], name="Google")) fig.add_trace( go.Scatter(x=df['date'], y=df['NFLX'], name="Netflix")) fig.add_trace( go.Scatter(x=df['date'], y=df['MSFT'], name="Microsoft")) fig.layout.update( title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True) st.write(fig) elif data.name == "iris.csv": y1 = df['sepal_length'] x1 = df['sepal_width'] y2 = df['petal_length'] x2 = df['petal_width'] color = df['species'] fig1 = px.scatter(df, x=x1, y=y1, color=color, marginal_y="violin", marginal_x="box", trendline="ols", template="simple_white") fig2 = px.density_contour(df, x=x2, y=y2, color=color, marginal_y='histogram') st.write(fig1, fig2) else: st.write("No Dataset To Show") elif choice == "Classification": st.subheader("Classification Prediction") # if data is None: # pass # elif data.name == "iris.csv": # st.subheader("Iris flower Prediction from Machine Learning Model") iris = Image.open('iris.png') st.image(iris) model = open("model.pkl", "rb") knn_clf = joblib.load(model) #Loading images setosa = Image.open('setosa.png') versicolor = Image.open('versicolor.png') virginica = Image.open('virginica.png') st.sidebar.title("Features") #Intializing sl = st.sidebar.slider(label="Sepal Length (cm)", value=5.2, min_value=0.0, max_value=8.0, step=0.1) sw = st.sidebar.slider(label="Sepal Width (cm)", value=3.2, min_value=0.0, max_value=8.0, step=0.1) pl = st.sidebar.slider(label="Petal Length (cm)", value=4.2, min_value=0.0, max_value=8.0, step=0.1) pw = st.sidebar.slider(label="Petal Width (cm)", value=1.2, min_value=0.0, max_value=8.0, step=0.1) if st.button("Click Here to Classify"): dfvalues = pd.DataFrame(list(zip([sl], [sw], [pl], [pw])), columns=[ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width' ]) input_variables = np.array(dfvalues[[ 'sepal_length', 'sepal_width', 'petal_length', 'petal_width' ]]) prediction = knn_clf.predict(input_variables) if prediction == 1: st.image(setosa) elif prediction == 2: st.image(versicolor) elif prediction == 3: st.image(virginica) # st.title("Emotion Classifier App") # menu = ["Home"] # choice = st.sidebar.selectbox("Menu",menu) # create_page_visited_table() # create_emotionclf_table() elif choice == "Sentiment": # add_page_visited_details("Home",datetime.now()) # data = " " st.subheader("Sentiment-Emotion Prediction") with st.form(key='emotion_clf_form'): search_text = st.text_area("Type Here") submit_text = st.form_submit_button(label='Submit') if submit_text: hasilSearch = api.search(q=str(search_text), count=2) texts = [] for tweet in hasilSearch: texts.append(tweet.text) # raw_text2 = texts[1] raw_text = texts[0] # translated = translator.translate(raw_text) # translated = raw_text prediction = predict_emotions(raw_text) probability = get_prediction_proba(raw_text) sentiment = predict_sentiment(raw_text) proba_sentiment = get_sentiment_proba(raw_text) col1, col2 = st.beta_columns(2) with col1: st.success("Search Result") st.write(raw_text) # st.write(raw_text2) # st.write(translated.text) st.success("Prediction") emoji_icon = emotions_emoji_dict[prediction] st.write("{}:{}".format(prediction, emoji_icon)) st.write("Confidence:{}".format(np.max(probability))) st.write("{}:{}".format(sentiment, emoji_sentiment[sentiment])) st.write("Confidence:{}".format(np.max(proba_sentiment))) with col2: st.success("Prediction Probability") # st.write(probability) # st.write(proba_sentiment) proba_df = pd.DataFrame(probability, columns=pipe_lr.classes_) proba_sent_df = pd.DataFrame(proba_sentiment, columns=pipe_ctm.classes_) # st.write(proba_df.T) # st.write(proba_sent_df.T) # proba_df_clean = proba_df.T.reset_index() # proba_df_clean.columns = ["emotions","probability"] proba_df_sent_clean = proba_sent_df.T.reset_index() proba_df_sent_clean.columns = ["sentiments", "probability"] # fig = alt.Chart(proba_df_clean).mark_bar().encode(x='emotions',y='probability',color='emotions') # st.altair_chart(fig,use_container_width=True) fig = alt.Chart(proba_df_sent_clean).mark_bar().encode( x='sentiments', y='probability', color='sentiments') st.altair_chart(fig, use_container_width=True) elif choice == "Timeseries": import plotly.io as pio pio.templates.default = "seaborn" # Timeseries model from statsmodels.tsa.holtwinters import ExponentialSmoothing def create_model(col, seasonal): col = str(col) tr = ['add', 'mul'] ss = ['add', 'mul'] dp = [True, False] combs = {} aics = [] # iterasi kombinasi option for i in tr: for j in ss: for k in dp: model = ExponentialSmoothing(data[col], trend=i, seasonal=j, seasonal_periods=seasonal, damped_trend=k) model = model.fit() combs.update({model.aic: [i, j, k]}) aics.append(model.aic) # forecasting dengan kombinasi terbaik best_aic = min(aics) model = ExponentialSmoothing(data[col], trend=combs[best_aic][0], seasonal=combs[best_aic][1], seasonal_periods=seasonal, damped_trend=combs[best_aic][2]) # output fit = model.fit() return fit st.subheader("Time Series Prediction") data = st.file_uploader("Upload Dataset", type=["xlsx"]) kolom = [] if data is None: st.write('Please upload timeseries file (xlsx)') else: df = pd.read_excel(data) data = df.dropna() # st.write(data.head()) data['yyyy-mm'] = pd.to_datetime(data['Date']).dt.strftime('%Y-%m') st.write(data.head()) # st.write(data.head()) # st.write(data.head()) kolom = data.columns.tolist() pilih = st.selectbox('Pilih Kolom', kolom) xaxis = data.iloc[:, 0].astype('str') fig1 = px.line(x=xaxis, y=data[pilih]) st.plotly_chart(fig1) seasonal = st.number_input('Seasonal_periods', value=12, max_value=len(data), min_value=1, step=1) pred_period = st.number_input('Prediction_periods', value=6, max_value=len(data), min_value=1, step=1) # submit_data = st.form_submit_button(label='Create_model') if st.button('Create_model and Run_Prediction'): st.success("Create Model Success") tsmodel = create_model(pilih, seasonal) prediksi = list(tsmodel.forecast(pred_period)) yaxis = data[pilih].tolist() # st.write(prediksi) for p in prediksi: yaxis.append(p) last = df.index[-1] dfnew = df.drop(df.index[len(yaxis):last + 1]) dfnew['prediction'] = yaxis dfnew.iloc[:, 0] = dfnew.iloc[:, 0].astype('str') # dfnew = dfnew.dropna() fig2 = go.Figure() fig2.add_trace( go.Scatter( x=dfnew.iloc[:, 0], y=dfnew['prediction'], # line = dict(color='firebrick', width=4, dash='dot'), mode='lines+markers', name='prediction')) fig2.add_trace( go.Scatter( x=dfnew.iloc[:, 0], y=dfnew[pilih], # line = dict(color='firebrick', width=4, dash='dot'), mode='lines+markers', name='actual')) st.plotly_chart(fig2)
px.scatter( iris, # 绘图数据集 x="sepal_width", # 横坐标 y="sepal_length", # 纵坐标 color="species", # 颜色值 error_x="e", # 横轴误差 error_y="e" # 纵轴误差 ) # In[23]: px.density_contour( iris, # 绘图数据集 x="sepal_width", # 横坐标 y="sepal_length", # 纵坐标值 color="species" # 颜色 ) # In[24]: px.density_contour( iris, # 数据集 x="sepal_width", # 横坐标值 y="sepal_length", # 纵坐标值 color="species", # 颜色 marginal_x="rug", # 横轴为线条图 marginal_y="histogram" # 纵轴为直方图 )
import plotly.express as px df = {'Информатика': [3.3, 4.4, 3.5], 'Физика': [3.2, 4.5, 5], 'Математика': [3.4, 3.8, 4.2] } fig = px.density_contour(df, x="Информатика", y="Физика", color="Математика", template='presentation') fig.show()
0 if money <= 0 else money for money in data['MONEY_LOST'] ] data['ARR_DELAY_ONLY'] = data['ARR_DELAY_ONLY'].apply(log2) fig_info = pltx.scatter(data, x="ARR_DELAY_ONLY", y="MONEY_LOST", color="OP_UNIQUE_CARRIER", log_x=True) fig_info.update_traces(marker=dict(size=3)) fig_info.show() # Show density fig_density = pltx.density_contour(data.head(60000), x="DISTANCE", y="MONEY_LOST", color="OP_UNIQUE_CARRIER", marginal_x="rug", marginal_y="histogram") # >> fig_density.show() # Show density heatmap for cities fig_city = pltx.density_heatmap(data.head(30000), x="ORIGIN", y="DEST", marginal_y="histogram") # >> fig_city.show() # Explore the skewness skew = data.skew() print('Skewness:', skew)
def generate_density(df): density = px.density_contour(df, x="sepal_width", y="sepal_length") return density
fig.update_traces(diagonal_visible=False) plot(fig) # these 3 variables have the strongest separation for the target group = [ "RepublicanFraction Ohio", "RepublicanFraction Wyoming", "RepublicanFraction Idaho" ] # plot the group of 3 variables across categories fig = px.scatter_3d(df, x=group[0], y=group[1], z=group[2], color=" DemocratWon", opacity=0.7) plot(fig) # plot two variables across categories fig = px.density_contour(df, x=group[0], y=group[1], marginal_x="histogram", marginal_y="box", color=" DemocratWon") plot(fig) # plot SalePrice across categories fig = px.strip(df, y=group[0], color=" DemocratWon") plot(fig)
def nhanes_multivariate_analysis(df): st.write(""" ### Quantitative bivariate data Bivariate data arise when every "unit of analysis" (e.g. a person in the NHANES dataset) is assessed with respect to two traits (the NHANES subjects were assessed for many more than two traits, but we can consider two traits at a time here). Below we make a scatterplot of arm length against leg length. This means that arm length ([BMXARML](https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/BMX_I.htm#BMXARML)) is plotted on the vertical axis and leg length ([BMXLEG](https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/BMX_I.htm#BMXLEG)) is plotted on the horizontal axis). We see a positive dependence between the two measures -- people with longer arms tend to have longer legs, and vice-versa. However it is far from a perfect relationship. """) df["RIAGENDRx"] = df.RIAGENDR.replace({1: "Male", 2: "Female"}) fig = px.scatter(df, x="BMXLEG", y="BMXARML", color="RIAGENDRx", opacity=0.5, title="Correlation arm length against leg length") st.plotly_chart(fig) st.write(""" This plot also shows the Pearson correlation coefficient between the arm length and leg length, which is 0.62. The Pearson correlation coefficient ranges from -1 to 1, with values approaching 1 indicating a more perfect positive dependence. In many settings, a correlation of 0.62 would be considered a moderately strong positive dependence. """) fig = px.density_contour( df, x="BMXLEG", y="BMXARML", title="Contour correlation between arm length and leg length") fig.add_annotation(x=50, y=45, text="p=0.62", font=dict(color="white", size=12), showarrow=False) fig.update_traces(contours_coloring="fill", contours_showlabels=True) st.plotly_chart(fig) st.write(""" As another example with slightly different behavior, we see that systolic and diastolic blood pressure (essentially the maximum and minimum blood pressure between two consecutive heart beats) are more weakly correlated than arm and leg length, with a correlation coefficient of 0.32. This weaker correlation indicates that some people have unusually high systolic blood pressure but have average diastolic blood pressure, and vice versa. """) fig = px.density_contour(df, x="BPXSY1", y="BPXDI1", marginal_x="rug", marginal_y="rug", title="BPXSY1 and BPXDI1 correlation") fig.add_annotation(x=200, y=100, text="p=0.32", font=dict(size=15, color="black"), showarrow=False) st.plotly_chart(fig) st.write(""" Next we look at two repeated measures of systolic blood pressure, taken a few minutes apart on the same person. These values are very highly correlated, with a correlation coefficient of around 0.96. """) x = df["BPXSY1"].to_numpy() y = df["BPXSY2"].to_numpy() fig = go.Figure( go.Histogram2dContour(x=x, y=y, colorscale='Jet', contours=dict(showlabels=True, labelfont=dict(family='Raleway', color='white')), hoverlabel=dict(bgcolor='white', bordercolor='black', font=dict(family='Raleway', color='black')))) fig.update_layout(title_text="BPXSY1 and BPXSY2 correlation", ) fig.add_annotation(x=200, y=200, text="p=0.96", font=dict(size=15, color="white"), showarrow=False) st.plotly_chart(fig) st.write(""" ### Heterogeneity and stratification Most human characteristics are complex -- they vary by gender, age, ethnicity, and other factors. This type of variation is often referred to as "heterogeneity". When such heterogeneity is present, it is usually productive to explore the data more deeply by stratifying on relevant factors, as we did in the univariate analyses. Below, we continue to probe the relationship between leg length and arm length, stratifying first by gender, then by gender and ethnicity. The gender-stratified plot indicates that men tend to have somewhat longer arms and legs than women -- this is reflected in the fact that the cloud of points on the left is shifted slightly up and to the right relative to the cloud of points on the right. In addition, the correlation between arm length and leg length appears to be somewhat weaker in women than in men. """) fig_fem = px.scatter(df, x="BMXLEG", y="BMXARML", facet_row="RIAGENDRx") st.plotly_chart(fig_fem, use_container_width=True) st.write(""" Next we look to stratifying the data by both gender and ethnicity. This results in 2 x 5 = 10 total strata, since there are 2 gender strata and 5 ethnicity strata. These scatterplots reveal differences in the means as well a diffrences in the degree of association (correlation) between different pairs of variables. We see that although some ethnic groups tend to have longer/shorter arms and legs than others, the relationship between arm length and leg length within genders is roughly similar across the ethnic groups. One notable observation is that ethnic group 5, which consists of people who report being multi-racial or are of any race not treated as a separate group (due to small sample size), the correlation between arm length and leg length is stronger, especially for men. This is not surprising, as greater heterogeneity can allow correlations to emerge that are indiscernible in more homogeneous data. """) fig = px.density_contour(df, x="BMXLEG", y="BMXARML", facet_col="RIDRETH1", facet_row="RIAGENDRx") fig.update_traces(contours_coloring="fill", contours_showlabels=True) st.plotly_chart(fig)
dimensions=targets + features, color=colors[2], opacity=0.7) fig.update_traces(diagonal_visible=False) plot(fig) # these 3 variables have the strongest separation for the target group = ["price", "ram", "hd"] # plot the group of 3 variables across categories fig = px.scatter_3d(df, x=group[0], y=group[1], z=group[2], color=colors[2], opacity=0.7) plot(fig) # plot two variables across categories fig = px.density_contour(df, x=group[0], y=group[1], marginal_x="histogram", marginal_y="box", color=colors[2]) plot(fig) # plot a singl variable across categories fig = px.strip(df, y=group[1], color=colors[2]) plot(fig)
def build_plot(is_anim, plot_type, df, progress=None, **kwargs) -> dict: params = dict(**kwargs) for k, v in params.items(): if v == amp_consts.NONE_SELECTED: params[k] = filter_none(params[k]) num_columns = df.select_dtypes(include=[np.number]).columns.to_list() if is_anim: time_column = params.pop("time_column", "") if ( time_column in df.select_dtypes( include=[np.datetime64, "datetime", "datetime64", "datetime64[ns, UTC]"] ).columns.to_list() ): df["time_step"] = df[time_column].dt.strftime("%Y/%m/%d %H:%M:%S") afc = "time_step" else: afc = time_column params["animation_frame"] = afc df = df.sort_values([afc]) if plot_type not in [ amp_consts.PLOT_PCA_3D, amp_consts.PLOT_PCA_2D, amp_consts.PLOT_PCA_SCATTER, amp_consts.PLOT_LDA_2D, amp_consts.PLOT_QDA_2D, amp_consts.PLOT_NCA, ]: x = params.get("x") params["range_x"] = ( None if x not in num_columns else [df[x].min(), df[x].max()] ) y = params.get("y") params["range_y"] = ( None if y not in num_columns else [df[y].min(), df[y].max()] ) if plot_type in [amp_consts.PLOT_SCATTER_3D, amp_consts.PLOT_PCA_3D]: z = params.get("z") params["range_z"] = ( None if z not in num_columns else [df[z].min(), df[z].max()] ) params["data_frame"] = df fig = None model_data = None column_names = None class_names = None if plot_type == amp_consts.PLOT_SCATTER: fig = px.scatter(**params) elif plot_type == amp_consts.PLOT_SCATTER_3D: fig = px.scatter_3d(**params) elif plot_type == amp_consts.PLOT_LINE: fig = px.line(**params) elif plot_type == amp_consts.PLOT_BAR: fig = px.bar(**params) elif plot_type == amp_consts.PLOT_HISTOGRAM: if "orientation" in params and params.get("orientation") == "h": params["x"], params["y"] = None, params["x"] fig = px.histogram(**params) elif plot_type == amp_consts.PLOT_BOX: fig = px.box(**params) elif plot_type == amp_consts.PLOT_VIOLIN: fig = px.violin(**params) elif plot_type == amp_consts.PLOT_DENSITY_HEATMAP: fig = px.density_heatmap(**params) elif plot_type == amp_consts.PLOT_DENSITY_CONTOUR: fc = params.pop("fill_contours") is True fig = px.density_contour(**params) if fc: fig.update_traces(contours_coloring="fill", contours_showlabels=True) elif plot_type == amp_consts.PLOT_PARALLEL_CATEGORIES: fig = px.parallel_categories(**params) elif plot_type == amp_consts.PLOT_PARALLEL_COORDINATES: fig = px.parallel_coordinates(**params) elif plot_type == amp_consts.PLOT_SCATTER_MATRIX: fig = make_subplots( rows=len(num_columns), cols=len(num_columns), shared_xaxes=True, row_titles=num_columns, ) color_column = params.get("color") if color_column is not None: template_colors = pio.templates[params.get("template")].layout["colorway"] if template_colors is None: template_colors = pio.templates[pio.templates.default].layout["colorway"] color_count = len(df[color_column].unique()) if len(template_colors) >= color_count: pass else: template_colors = np.repeat( template_colors, (color_count // len(template_colors)) + 1 ) template_colors = template_colors[:color_count] else: template_colors = 0 legend_added = False step = 0 total = len(num_columns) ** 2 matrix_diag = params["matrix_diag"] matrix_up = params["matrix_up"] matrix_down = params["matrix_down"] for i, c in enumerate(num_columns): for j, l in enumerate(num_columns): progress(step, total) step += 1 if i == j: if matrix_diag == "Nothing": continue elif matrix_diag == "Histogram": mtx_plot_kind = "Histogram" else: mtx_plot_kind = "Scatter" else: if ( (i == j) or (i > j and matrix_up == "Scatter") or (i < j and matrix_down == "Scatter") ): mtx_plot_kind = "Scatter" elif (i > j and matrix_up == "Nothing") or ( i < j and matrix_down == "Nothing" ): continue elif (i > j and matrix_up == "2D histogram") or ( i < j and matrix_down == "2D histogram" ): mtx_plot_kind = "2D histogram" else: mtx_plot_kind = "Error" if isinstance(template_colors, int) or mtx_plot_kind == "2D histogram": if mtx_plot_kind == "Histogram": add_histogram(fig=fig, x=df[c], index=i + 1) elif mtx_plot_kind == "Scatter": add_scatter( fig=fig, x=df[c], y=df[l], row=j + 1, col=i + 1, ) elif mtx_plot_kind == "2D histogram": add_2d_hist(fig=fig, x=df[c], y=df[l], row=j + 1, col=i + 1) else: for color_parse, cat in zip( template_colors, df[color_column].unique() ): df_cat = df[df[color_column] == cat] if mtx_plot_kind == "Histogram": add_histogram( fig=fig, x=df_cat[c], index=i + 1, name=cat, marker=color_parse, legend=not legend_added, ) elif mtx_plot_kind == "Scatter": add_scatter( fig=fig, x=df_cat[c], y=df_cat[l], row=j + 1, col=i + 1, name=cat, marker=color_parse, legend=not legend_added, ) legend_added = True fig.update_xaxes( title_text=c, row=j + 1, col=i + 1, ) if c == 0: fig.update_yaxes( title_text=l, row=j + 1, col=i + 1, ) fig.update_layout(barmode="stack") elif plot_type in [ amp_consts.PLOT_PCA_2D, amp_consts.PLOT_PCA_3D, amp_consts.PLOT_PCA_SCATTER, ]: X = df.loc[:, num_columns] ignored_columns = params.pop("ignore_columns", []) if ignored_columns: X = X.drop( list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1 ) column_names = X.columns.to_list() scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) model_data = PCA() x_new = model_data.fit_transform(X) pc1_lbl = f"PC1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)" pc2_lbl = f"PC2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)" x = x_new[:, 0] y = x_new[:, 1] df[pc1_lbl] = x * (1.0 / (x.max() - x.min())) df[pc2_lbl] = y * (1.0 / (y.max() - y.min())) params["x"] = pc1_lbl params["y"] = pc2_lbl if is_anim: params["range_x"] = [-1, 1] params["range_y"] = [-1, 1] try: sl = params.pop("show_loadings") is True except: sl = None if plot_type in [amp_consts.PLOT_PCA_3D]: z = x_new[:, 2] pc3_lbl = f"PC3 ({model_data.explained_variance_ratio_[2] * 100:.2f}%)" df[pc3_lbl] = z * (1.0 / (z.max() - z.min())) params["z"] = pc3_lbl if is_anim: params["range_z"] = [-1, 1] fig = px.scatter_3d(**params) if sl: loadings = np.transpose(model_data.components_[0:3, :]) m = 1 / np.amax(loadings) loadings = loadings * m xc, yc, zc = [], [], [] for i in range(loadings.shape[0]): xc.extend([0, loadings[i, 0], None]) yc.extend([0, loadings[i, 1], None]) zc.extend([0, loadings[i, 2], None]) fig.add_trace( go.Scatter3d( x=xc, y=yc, z=zc, mode="lines", name="Loadings", showlegend=False, line=dict(color="black"), opacity=0.3, ) ) fig.add_trace( go.Scatter3d( x=loadings[:, 0], y=loadings[:, 1], z=loadings[:, 2], mode="text", text=num_columns, opacity=0.7, name="Loadings", ), ) elif plot_type in [amp_consts.PLOT_PCA_3D]: fig = px.scatter(**params) if sl: loadings = np.transpose(model_data.components_[0:2, :]) m = 1 / np.amax(loadings) loadings = loadings * m xc, yc = [], [] for i in range(loadings.shape[0]): xc.extend([0, loadings[i, 0], None]) yc.extend([0, loadings[i, 1], None]) fig.add_trace( go.Scatter( x=xc, y=yc, mode="lines", name="Loadings", showlegend=False, line=dict(color="black"), opacity=0.3, ) ) fig.add_trace( go.Scatter( x=loadings[:, 0], y=loadings[:, 1], mode="text", text=column_names, opacity=0.7, name="Loadings", ), ) elif plot_type in [amp_consts.PLOT_PCA_SCATTER]: l = lambda x, y: x == y params_ = { "data_frame": x_new, "labels": {str(i): f"PC {i+1}" for i in range(x_new.shape[1] - 1)}, } if params["color"] is not None: params_["color"] = df[params["color"]] if params["dimensions"] is not None: params_["dimensions"] = range( min( params["dimensions"], x_new.shape[1] - 1, ) ) if is_anim: params_["range_x"] = [-1, 1] params_["range_y"] = [-1, 1] fig = px.scatter_matrix(**params_) fig.update_traces(diagonal_visible=False) elif plot_type in [amp_consts.PLOT_LDA_2D, amp_consts.PLOT_QDA_2D]: X = df.loc[:, num_columns] ignored_columns = params.pop("ignore_columns", []) if ignored_columns: X = X.drop( list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1 ) column_names = X.columns.to_list() if params["target"] in df.select_dtypes(include=["object"]).columns.to_list(): t = df[params["target"]].astype("category").cat.codes elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list(): t = df[params["target"]].astype("int") else: t = df[params["target"]] class_names = df[params["target"]].unique() scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) if plot_type == amp_consts.PLOT_LDA_2D: model_data = LinearDiscriminantAnalysis(solver=params.pop("solver", "svd")) elif plot_type == amp_consts.PLOT_QDA_2D: model_data = QuadraticDiscriminantAnalysis(store_covariance=True) x_new = model_data.fit(X, y=t).transform(X) label_root = "LD" if plot_type == amp_consts.PLOT_LDA_2D else "QD" pc1_lbl = f"{label_root}1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)" pc2_lbl = f"{label_root}2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)" x = x_new[:, 0] y = x_new[:, 1] df[pc1_lbl] = x / np.abs(x).max() df[pc2_lbl] = y / np.abs(y).max() params["x"] = pc1_lbl params["y"] = pc2_lbl if is_anim: params["range_x"] = [-1, 1] params["range_y"] = [-1, 1] params.pop("target") sl = params.pop("show_loadings") is True fig = px.scatter(**params) if sl: loadings = np.transpose(model_data.coef_[0:2, :]) loadings[:, 0] = loadings[:, 0] / np.abs(loadings[:, 0]).max() loadings[:, 1] = loadings[:, 1] / np.abs(loadings[:, 1]).max() # m = 1 / np.amax(loadings) # loadings = loadings * m xc, yc = [], [] for i in range(loadings.shape[0]): xc.extend([0, loadings[i, 0], None]) yc.extend([0, loadings[i, 1], None]) fig.add_trace( go.Scatter( x=xc, y=yc, mode="lines", name="Loadings", showlegend=False, line=dict(color="black"), opacity=0.3, ) ) fig.add_trace( go.Scatter( x=loadings[:, 0], y=loadings[:, 1], mode="text", text=column_names, opacity=0.7, name="Loadings", ), ) elif plot_type in [amp_consts.PLOT_NCA]: X = df.loc[:, num_columns] ignored_columns = params.pop("ignore_columns", []) if ignored_columns: X = X.drop( list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1 ) column_names = X.columns.to_list() if params["target"] in df.select_dtypes(include=["object"]).columns.to_list(): t = df[params["target"]].astype("category").cat.codes elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list(): t = df[params["target"]].astype("int") else: t = df[params["target"]] class_names = df[params["target"]].unique() scaler = StandardScaler() scaler.fit(X) X = scaler.transform(X) model_data = NeighborhoodComponentsAnalysis( init=params.pop("init", "auto"), n_components=min(len(column_names), params.pop("n_components", 2)), ) x_new = model_data.fit(X, y=t).transform(X) df["x_nca"] = x_new[:, 0] df["y_nca"] = x_new[:, 1] params["x"] = "x_nca" params["y"] = "y_nca" if is_anim: params["range_x"] = [-1, 1] params["range_y"] = [-1, 1] params.pop("target") fig = px.scatter(**params) elif plot_type == amp_consts.PLOT_CORR_MATRIX: fig = px.imshow( df[num_columns].corr(method=params.get("corr_method")).values, x=num_columns, y=num_columns, ) else: fig = None if plot_type in amp_consts.PLOT_IS_3D: fig.update_layout(scene={"aspectmode": "cube"}) if fig is not None: fig.update_layout( height=params["height"], template=params["template"], legend={"traceorder": "normal"}, ) if ("size" not in params) or ( (params["size"] is None) or (params["size"] == amp_consts.NONE_SELECTED) ): fig.update_traces( marker=dict( size=8, line=dict(width=2), # color="DarkSlateGrey"), opacity=0.7, ), selector=dict(mode="markers"), ) return { k: v for k, v in zip( ["figure", "model_data", "column_names", "class_names"], [fig, model_data, column_names, class_names], ) if v is not None }
def article_vs_headline_plot(df_in): # Read in data # df = pd.read_csv(FILE_PATH) df = df_in df = df[["headline", "headline_score", "article_score", "news_desk"]].loc[(df["headline_score"] != 0) & (df["article_score"] != 0) & (df["section_name"] != "Business Day") & (df["news_desk"] != "Media") & (df["news_desk"] != "National")] # ['National' 'Business' 'Politics' 'Science' 'Climate'] # Line colors lines_colors_dict = { "Society": "rgba(30, 144, 255, 0.7)", # "dodgerblue", "Business": "rgba(255, 215, 0, 0.7)", # "gold", "Politics": "rgba(178, 34, 34, 0.7)", # "firebrick", "Science": "rgba(34, 139, 34, 0.7)", # "forestgreen", "Climate": "rgba(255, 140, 0, 0.7)", # "darkorange" "Arts&Leisure": "rgba(138, 43, 226, 0.7)", # "blueviolet", } # Marker fill colors with 50% opacity markers_colors_dict = { "Society": "rgba(30, 144, 255, 0.2)", # "dodgerblue", "Business": "rgba(255, 215, 0, 0.2)", # "gold", "Politics": "rgba(178, 34, 34, 0.2)", # "firebrick", "Science": "rgba(34, 139, 34, 0.2)", # "forestgreen", "Climate": "rgba(255, 140, 0, 0.2)", # "darkorange" "Arts&Leisure": "rgba(138, 43, 226, 0.2)", # "blueviolet", } df["markers_colors"] = df["news_desk"].map(markers_colors_dict) df["lines_colors"] = df["news_desk"].map(lines_colors_dict) df.head(10) fig = go.Figure() i = 0 for desk in df["news_desk"].unique(): df_current = df.loc[df["news_desk"] == desk] fig.add_trace( px.density_contour( df_current, x="headline_score", y="article_score", )["data"][0]) fig.data[i * 3].update( name=desk, line={ "color": df_current["lines_colors"].loc[df_current["news_desk"] == desk].unique()[0], "width": 1, }, legendgroup=desk, showlegend=True, hovertemplate="", hoverinfo="skip", ) fig.add_trace( go.Scatter( x=df_current["headline_score"], y=df_current["article_score"], )) fig.data[(i * 3) + 1].update( mode="markers", marker={ "color": df_current["markers_colors"], "line": { "color": "rgba(105, 105, 105, .5)", "width": 0.3 }, # dimgrey }, text=desk, hovertemplate="Headline: %{x}<br>Article: %{y}<extra></extra>", legendgroup=desk, showlegend=False, ) MODELS_FILEPATH = os.path.join("news_app", "static", "resources", "saved_models") pickle_filename = f"pickle_model_{desk}.pkl" with open(os.path.join(MODELS_FILEPATH, pickle_filename), "rb") as file: model = pickle.load(file) x_trace = [-1, 1] y_trace = [model.predict([[-1]])[0], model.predict([[1]])[0]] fig.add_trace( go.Scatter( x=x_trace, y=y_trace, mode="lines", line={ "color": lines_colors_dict[desk], "width": 2, "dash": "dot", }, legendgroup=desk, name=desk, showlegend=False, text=df_current["news_desk"], hovertemplate="%{text}<extra></extra>", )) i += 1 fig.layout.update(title="Article vs Headline Score", title_x=0.5, xaxis={"title": { "text": "Headline Scores" }}, yaxis={"title": { "text": "Article Scores" }}, paper_bgcolor="white", plot_bgcolor="ghostwhite", legend={"title": { "text": "<b>News Desks<b>" }}) fig_data = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder) return fig_data
facet_col="day", facet_col_wrap=3, category_orders={"day": ["Thur", "Fri", "Sat", "Sun"]}, ) fig.write_html(os.path.join(dir_name, "facet_wrap_ragged.html")) gapminder = px.data.gapminder() fig = px.area(gapminder, x="year", y="pop", color="continent", line_group="country") fig.write_html(os.path.join(dir_name, "area.html")) # #### Visualize Distributions iris = px.data.iris() fig = px.density_contour(iris, x="sepal_width", y="sepal_length") fig.write_html(os.path.join(dir_name, "density_contour.html")) iris = px.data.iris() fig = px.density_contour( iris, x="sepal_width", y="sepal_length", color="species", marginal_x="rug", marginal_y="histogram", ) fig.write_html(os.path.join(dir_name, "density_contour_marginal.html"))
y="Area Income", color="Clicked", marginal_y="histogram", marginal_x="histogram", trendline="ols") fig.update_layout( title_text='Relation Between Customer Age and Area Income', # title of plot xaxis_title_text='Age of Customer', # xaxis label yaxis_title_text='Area Income', # yaxis label ) fig.show() # **Create a jointplot showing the kde distributions of Daily Time spent on site vs. fig = px.density_contour(ad_data, x="Age", y="Daily Time Spent on Site", marginal_y="histogram", marginal_x="histogram") fig.update_layout( title_text= 'Relation Between Customer Age and Time Spent on Site', # title of plot xaxis_title_text='Age of Customer', # xaxis label yaxis_title_text='Time Spent on Site Daily', # yaxis label ) fig.show() # ** Create a jointplot of 'Daily Time Spent on Site' vs. 'Daily Internet Usage'** fig = px.scatter(ad_data, x="Daily Time Spent on Site", y="Daily Internet Usage",
import plotly.express as px import pandas as pd flights = pd.read_csv("data_science/datasets/flights.csv") # Show in a graph the distribution of the distances of the flights fig = px.histogram(flights, x='distance', nbins=20) fig.show() # Show in a graph number of flights per origin fig = px.histogram(flights, x='origin') fig.show() # Show in a graph the relationship between dep_time and arr_time fig = px.scatter(flights, x='dep_time', y='arr_time') fig.show() # Show in a graph the relationship between dep_delay and origin fig = px.box(flights, x='origin', y='dep_delay') fig.show() # Show in a graph the relationship between arr_delay and dep_delay fig = px.density_contour(flights, x='arr_delay', y='dep_delay') fig.show() # Show in a graph the relationship between air_time, distance and origin fig = px.scatter(flights, x='air_time', y='distance', color='origin') fig.show()