def pl(df, r, var): tmp = df[df.randomSeed.isin(r)] plot(px.line(tmp, height=300 * len(r), x="tick", y = var, color="FirmNumID", line_dash="scenario", facet_row="randomSeed" ))
def plMelt(df, r, vars, id_vars=["randomSeed","scenario","tick","FirmNumID"]): tmp = df[df.randomSeed.isin(r)] tmp = tmp.melt(id_vars=id_vars, value_vars=vars) plot(px.line(tmp, height=300 * len(r), x="tick", y= "value", color="FirmNumID", line_dash="scenario", facet_col="variable", facet_row="randomSeed" ))
def plot_subr_line_cat(filtered_json_week, nperc): filtered_df_week = pd.read_json(filtered_json_week, orient='split') filtered_df_week_cat = get_flag_perc(filtered_df_week, ['subreddit_cat', 'week']) subr_line_cat = px.line(filtered_df_week_cat, x="week", y="ISFLAG" if nperc == "n" else "perc_FLAG", color="subreddit_cat") for l in subr_line_cat.data: l.update(mode='markers+lines') return subr_line_cat
def update_figure(selected_year): filtered_lc = lc[lc['issue_year'] <= selected_year] return px.line(filtered_lc.groupby(['issue_d', 'term' ]).mean()[['int_rate']].reset_index(), x='issue_d', y='int_rate', color='term', labels={ 'issue_d': 'Issue Date', 'int_rate': 'Interest Rate' })
def update_figure(selected_year): filtered_lc = lc[lc['issue_year'] <= selected_year] return px.line(filtered_lc.groupby(['issue_d', 'term' ]).sum()[['loan_amnt']].reset_index(), x='issue_d', y='loan_amnt', labels={ 'issue_d': 'Issue Date', 'loan_amnt': 'Origination Principal' }, color='term')
def plot_subr_line_20(filtered_json_week, nperc): filtered_df_week = pd.read_json(filtered_json_week, orient='split') filtered_df_week_20 = get_flag_perc( filtered_df_week[filtered_df_week.subreddit.isin(top20subreddits)], groups=['subreddit', 'week']) subr_line_20 = px.line(filtered_df_week_20, x="week", y="ISFLAG" if nperc == "n" else "perc_FLAG", color="subreddit") for l in subr_line_20.data: l.update(mode='markers+lines') return subr_line_20
def update_hourly_graph(query, selected_label_id, min_prob, max_prob, selected_iteration_id, clickData): try: query_dict = parse_qs(query[1:]) project_id = query_dict['project'][0] selected_date = clickData['points'][0]['x'] with dashapp.server.app_context(): per_file = db.session.query( ModelOutput.file_name, func.count(ModelOutput.id).label('count')).filter( ModelOutput.iteration_id == selected_iteration_id, ModelOutput.label_id == selected_label_id, ModelOutput.probability >= float(min_prob), ModelOutput.probability <= float(max_prob)).group_by( ModelOutput.file_name).subquery() per_hour = db.session.query( MonitoringStation.name.label('station'), func.sum(per_file.columns.count).label('count'), func.date(AudioFile.timestamp).label('date'), func.extract('hour', AudioFile.timestamp).label('hour') ).join( AudioFile, per_file.columns.file_name == AudioFile.name).join( Equipment, AudioFile.sn == Equipment.serial_number).join( MonitoringStation).group_by('station').group_by( 'date').group_by('hour').filter( MonitoringStation.project_id == project_id, func.date(AudioFile.timestamp) == selected_date).order_by('hour') df = pd.read_sql(per_hour.statement, db.session.bind) fig = px.line(df, x='hour', y='count', color='station', title='Hourly count for ' + selected_date) fig.update_traces(mode='markers') except: fig = px.line() return fig
def update_hourly_graph(query, selected_label_id, min_prob, max_prob, selected_iteration_id, clickData): try: project_id = get_project_id(query) selected_date = clickData['points'][0]['x'] with dashapp.server.app_context(): per_file = db.session.query( ModelOutput.file_name, func.count(ModelOutput.id).label('count')).filter( ModelOutput.iteration_id == selected_iteration_id, ModelOutput.label_id == selected_label_id, ModelOutput.probability >= float(min_prob), ModelOutput.probability <= float(max_prob)).group_by( ModelOutput.file_name).subquery() per_hour = db.session.query( func.min(MonitoringStation.id).label('station_id'), MonitoringStation.name.label('station'), func.sum(per_file.columns.count).label('count'), func.date(AudioFile.timestamp).label('date'), func.extract( 'hour', AudioFile.timestamp).label('hour')).join( AudioFile, per_file.columns.file_name == AudioFile.name).join( AudioFile.monitoring_station ).group_by('station').group_by('date').group_by( 'hour').filter( MonitoringStation.project_id == project_id, func.date(AudioFile.timestamp) == selected_date).order_by('hour') df = pd.read_sql(per_hour.statement, db.session.bind) station_colors = get_station_colors(project_id) label = Label.query.get(selected_label_id) fig = px.line(df, x='hour', y='count', custom_data=['station_id', 'station'], color='station', color_discrete_map=station_colors, title='Hourly count for {label} on {date}'.format( label=label, date=selected_date)) fig.update_traces(mode='markers') except: fig = go.Figure() fig.update_layout(title='Hourly count', annotations=[{ 'text': 'Select a day', 'showarrow': False, 'font': { 'size': 28 } }]) return fig
def global_plot_create(data, x, y, title, xaxis, yaxis): fig = px.line(data, x=x, y=y, color='Country/Region', width=1000, height=800) # we assign the size of the plot, assigning the width and the height. fig.update_layout(title=title, # in this part we assign the layout to include the xaxis and yaxis xaxis_title= xaxis, yaxis_title = yaxis, legend_title_text='Countries', yaxis_type="log", yaxis_tickformat = 'f', xaxis_gridcolor = 'LightGreen', # the grid will make the line that appears on graph to be more flexible to be studied yaxis_gridcolor = 'LightGreen', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') return fig
def us_plot_create(data, x, y, title, xaxis, yaxis): fig = px.line(data, x=x, y=y, color='Province_State', width=1000, height=800) fig.update_layout(title=title, xaxis_title= xaxis, yaxis_title = yaxis, legend_title_text='United Kingdom', yaxis_type="log", yaxis_tickformat = 'f', xaxis_gridcolor = 'LightGreen', yaxis_gridcolor = 'LightGreen', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') return fig
def update_graph(query, selected_label_id, min_prob, max_prob, start_hour, end_hour, selected_iteration_id): try: if selected_label_id == None: raise project_id = get_project_id(query) with dashapp.server.app_context(): per_file = db.session.query( ModelOutput.file_name, func.count(ModelOutput.id).label('count')).filter( ModelOutput.iteration_id == selected_iteration_id, ModelOutput.label_id == selected_label_id, ModelOutput.probability >= float(min_prob), ModelOutput.probability <= float(max_prob)).group_by( ModelOutput.file_name).subquery() per_day = db.session.query( func.min(MonitoringStation.id).label('station_id'), MonitoringStation.name.label('station'), func.sum(per_file.columns.count).label('count'), func.date(AudioFile.timestamp).label('date')).join( AudioFile, per_file.columns.file_name == AudioFile.name ).join(AudioFile.monitoring_station).filter( MonitoringStation.project_id == project_id, func.extract('hour', AudioFile.timestamp) >= start_hour, func.extract('hour', AudioFile.timestamp) <= end_hour, ).group_by('station').group_by('date').order_by('date') df = pd.read_sql(per_day.statement, db.session.bind) station_colors = get_station_colors(project_id) label = Label.query.get(selected_label_id) fig = px.line( df, x='date', y='count', custom_data=['station_id', 'station'], color='station', color_discrete_map=station_colors, title='Daily count for {label} {start}:00-{end}:59'.format( label=label, start=start_hour, end=end_hour)) fig.update_traces(mode='markers') except: fig = go.Figure() fig.update_layout(title='Daily count', annotations=[{ 'text': 'Please select a label', 'showarrow': False, 'font': { 'size': 28 } }]) return fig
def linechar(chosed_asin, table_chosed_asin, asin_month_rank): fig = px.line(table_chosed_asin, x=table_chosed_asin['date'], y=table_chosed_asin[f'{chosed_asin}'], title=f'{chosed_asin}的当月排名情况(红线为当月排名)') fig.add_shape(type='line', x0=table_chosed_asin['date'].iloc[0], y0=asin_month_rank, x1=table_chosed_asin['date'].iloc[-1], y1=asin_month_rank, line=dict(color='Red'), xref='x', yref='y') return fig
def showPlots(chart_select): if chart_select == 'Scatterplots': st.sidebar.subheader("Scatterplot Settings") try: x_values = st.sidebar.selectbox('X axis', options=numeric_columns) y_values = st.sidebar.selectbox('Y axis', options=numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.scatter(data_frame=df, x=x_values, y=y_values, color=color_value) # display the chart st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Lineplots': st.sidebar.subheader("Line Plot Settings") try: x_values = st.sidebar.selectbox('X axis', options=numeric_columns) y_values = st.sidebar.selectbox('Y axis', options=numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.line(data_frame=df, x=x_values, y=y_values, color=color_value) st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Histogram': st.sidebar.subheader("Histogram Settings") try: x = st.sidebar.selectbox('Feature', options=numeric_columns) bin_size = st.sidebar.slider("Number of Bins", min_value=10, max_value=100, value=40) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.histogram(x=x, data_frame=df, color=color_value) st.plotly_chart(plot) except Exception as e: print(e) if chart_select == 'Boxplot': st.sidebar.subheader("Boxplot Settings") try: y = st.sidebar.selectbox("Y axis", options=numeric_columns) x = st.sidebar.selectbox("X axis", options=non_numeric_columns) color_value = st.sidebar.selectbox("Color", options=non_numeric_columns) plot = px.box(data_frame=df, y=y, x=x, color=color_value) st.plotly_chart(plot) except Exception as e: print(e)
def update_graph(user_id): # select from condition table # progression data query = 'select * from user_ALSFRS_score where user_id = ' + str(user_id) user_progression = pd.read_sql(query, con=engine) user_progression['report_date'] = pd.to_datetime( user_progression['report_date'], format='%Y-%m-%d') user_progression['report_date'] = user_progression['report_date'].map( dt.datetime.toordinal) user_progression = user_progression.sort_values(by='report_date') time = user_progression['report_date'].to_numpy() score = user_progression['score'].to_numpy() # condition data query = 'select * from user_condition where user_id = ' + str(user_id) user_conditions = pd.read_sql(query, con=engine) # generate user condition code to predict progression speed # and get true user progression speed user_conditions_code = np.asarray( fun.getConditions(CONDITION_MAP, user_conditions)) # pedict progression speed progression_speed_predicted = reg_progression.predict( user_conditions_code.reshape(1, -1)) predicted_score = progression_speed_predicted[0][ 0] * time + progression_speed_predicted[0][1] # shift up or down based on subject initial score predicted_score = predicted_score - (predicted_score[0] - score[0]) df_predicted = pd.DataFrame({ 'report_date': time, 'score': predicted_score }) # begin plot #true_m, true_b = np.polyfit(time, score, 1) user_progression['report_date'] = user_progression['report_date'].map( dt.datetime.fromordinal) df_predicted['report_date'] = df_predicted['report_date'].map( dt.datetime.fromordinal) figure = px.scatter(user_progression, x="report_date", y="score", trendline="ols") figure.update_traces(name='Actural user data', showlegend=True) figure2 = px.line(df_predicted, x='report_date', y='score') figure2.update_traces(name='Predicted progression', showlegend=True) figure2.update_traces(line_color='#147852') figure.add_trace(figure2.data[0]) return figure
def summary_plot2(n_clicks, bank2, y2, rprt_date2): plot_data = data_source[(data_source['RSSD'].isin(bank2)) & (data_source['date_index'] >= rprt_date2[0]) & (data_source['date_index'] <= rprt_date2[1])] sum_plot = px.line( plot_data, x="Report_Date", y=y2, color="bank", hover_name="bank", line_shape="spline", labels={ 'avg_os': 'Average Loan Balance (in ths)', 'alll': 'Loan Loss Reserve (in ths)', 'alll_rate': 'Loan Loss Reserve (in %)', 'ncl': 'Net Credit Loss (in ths)', 'ncl_rate': 'Net Credit Loss (in %)', 'alll_to_ncl': 'ALLL / NCL Ratio', 'alll_to_nonaccrual': 'ALLL / Non-accrual Ratio' }, template='plotly_dark', ) sum_plot['layout'].update( { 'plot_bgcolor': 'rgba(0,0,0,0)', 'paper_bgcolor': 'rgba(0,0,0,0)', }, xaxis=dict(showgrid=True, gridcolor='#747480', gridwidth=0.5, zerolinecolor='#747480', zerolinewidth=1, linecolor='#FFE600', linewidth=2), yaxis=dict(showgrid=True, gridcolor='#747480', gridwidth=0.5, zerolinecolor='#747480', zerolinewidth=1, linecolor='#FFE600', linewidth=2)) return sum_plot
def plot_subr_line_cat(filtered_json_week, nperc): filtered_df_week = pd.read_json(filtered_json_week, orient='split') filtered_df_week_cat = get_flag_perc(filtered_df_week, ['subreddit_cat', 'week']) subr_line_cat = px.line(filtered_df_week_cat, x="week", y="ISFLAG" if nperc == "n" else "perc_FLAG", color="subreddit_cat") for l in subr_line_cat.data: l.update(mode='markers+lines') tickvals0 = get_xlabs((min([min(a.x) for a in subr_line_cat.data]), max([max(a.x) for a in subr_line_cat.data]))) subr_line_cat.update_layout( xaxis=dict(tickmode='array', tickvals=tickvals0, ticktext=week_day_df['label'][tickvals0])) return subr_line_cat
def update_detailed_file_graph(query, file_click_data, daily_click_data): try: project_id = get_project_id(query) if callback_context.triggered[0][ 'prop_id'] == 'file-graph.clickData': selected_date = file_click_data['points'][0]['x'] if callback_context.triggered[0][ 'prop_id'] == 'daily-graph.clickData': selected_date = daily_click_data['points'][0]['x'] with dashapp.server.app_context(): project_files = AudioFile.query.join( AudioFile.monitoring_station).filter( MonitoringStation.project_id == project_id, func.date(AudioFile.timestamp) == selected_date ).with_entities( AudioFile.size, AudioFile.timestamp, MonitoringStation.name.label('station')).subquery() bytes_per_hour = db.session.query( func.sum(project_files.c.size).label('bytes'), func.extract('hour', project_files.c.timestamp).label('hour'), project_files.c.station).group_by('station').group_by( 'hour').order_by('hour') df = pd.read_sql(bytes_per_hour.statement, db.session.bind) df['MB'] = df['bytes'] / 1024 / 1024 station_colors = get_station_colors(project_id) fig = px.line(df, x='hour', y='MB', color='station', color_discrete_map=station_colors, title='Data recorded per hour on ' + selected_date) fig.update_traces(mode='markers') fig.update_layout(hovermode='x') except: fig = go.Figure() fig.update_layout(title='Data recorded per hour', annotations=[{ 'text': 'Select a day', 'showarrow': False, 'font': { 'size': 28 } }]) return fig
def plot_tweets_time(df): ''' Plots tweets by week for each unique handle ''' df_weekly_count = df.groupby(['date_week', 'handle'], as_index=False).count().iloc[:, 0:3] df_weekly_count.columns = ['week', 'handle', 'number of tweets'] # create plot fig = px.line(df_weekly_count, x='week', y='number of tweets', color="handle", color_discrete_map=colour_dict, title="Number of Tweets by Week", height=400) fig.update_layout({"showlegend": False}) fig.update_layout(margin=dict(l=0, r=0, t=30, b=30)) return fig
def global_plot_create(data, x, y, title, xaxis, yaxis): fig = px.line(data, x=x, y=y, color='Country/Region', width=800, height=600) fig.update_layout( title=title, xaxis_title=xaxis, yaxis_title=yaxis, legend_title_text='Countries', #yaxis_type="log", yaxis_tickformat='f', xaxis_gridcolor='LightBlue', yaxis_gridcolor='LightBlue', paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)') return fig
def comisiones_por_mes(df): df_productos = df.loc[:, [ "pk_cid", "pk_partition", ] + PRODUCTS] df_ventas_mes = df_productos[df_productos[PRODUCTS].sum( axis=1) >= 1].pivot_table(values=PRODUCTS, index=['pk_cid', 'pk_partition']) df_ventas_mes.reset_index(inplace=True) df_ventas_mes.set_index('pk_partition', inplace=True) df_ventas_mes.drop_duplicates(keep='first', inplace=True) df_ventas_producto_mes = df_ventas_mes.groupby( by=df_ventas_mes.index).sum() del (df_ventas_producto_mes['pk_cid']) df_ventas_producto_mes = df_ventas_producto_mes[3:] fig = px.line(df_ventas_producto_mes, x=df_ventas_producto_mes.index, y=PRODUCTS, title='Evolución de ventas por producto') return fig, df_ventas_producto_mes
def activos_web(df_clientes): df_numero_clientes_activos = df_clientes[ df_clientes["active_customer"] == 1].groupby(by=["pk_partition"])[[ "pk_cid" ]].count().sort_values(by="pk_partition", ascending=False) df_numero_clientes = df_clientes.groupby(by=["pk_partition"])[[ "pk_cid" ]].count().sort_values(by="pk_partition", ascending=False) df_evolucion_clientes_activos_web = pd.concat( [df_numero_clientes, df_numero_clientes_activos], axis=1) df_evolucion_clientes_activos_web.columns = ["total", "activos_web"] df_evolucion_clientes_activos_web["porcentaje"] = round( df_evolucion_clientes_activos_web["activos_web"] / df_evolucion_clientes_activos_web["total"] * 100, 2) df_evolucion_clientes_activos_web.reset_index(inplace=True) fig = px.line(df_evolucion_clientes_activos_web, x="pk_partition", y="porcentaje", range_y=[0, 50], title='Evolución tasa de navegación en la web') return fig, df_evolucion_clientes_activos_web
def tsa_figrue(x,y,color,facet_col,facet_row,summarizer='mean'): agg_cols = [] groups = [x,color,facet_col,facet_row] for var in groups: if((var is not None) and (var in tsa_df.columns)): agg_cols.append(var) graph_df = tsa_df.copy() df = graph_df.groupby(agg_cols).agg({y:summarizer}).reset_index() fig = px.line( df, x, y, color=color, facet_col=facet_col, facet_row=facet_row, facet_col_wrap=4 ) if(not(facet_col is None) or not(facet_row is None)): fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1])) return fig
def line_chart(df, option): g = px.line( df, x='Date', y='Price', color='Winner', # animation_frame='Date', color_discrete_sequence=[ '#FF1493', '#120052', '#652EC7', '#00C2BA', '#82E0BF', '#55E0FF' ], title='Betting Odds Over Time') g.update_traces(mode='lines', opacity=.75, line=dict(width=4)) g.update_yaxes(title='Implied Probability', showgrid=True, gridwidth=1, gridcolor='#D4D4D4') g.update_layout(plot_bgcolor='white') g.update_xaxes(title='Date', showgrid=False, gridwidth=1, gridcolor='#D4D4D4') st.plotly_chart(g, use_container_width=True)
def update_trace(value_cam_ID, value_time): if value_time == 'Hour': query = 'SELECT time, cam_name, cam_ID, AVG(vehicles) as average_vehicles FROM traffic_cams WHERE time >= NOW() - INTERVAL 1 HOUR and mod(minute(time),5) = 0 GROUP BY CONCAT(cam_ID, time)' if value_time == 'Day': query = 'SELECT time, cam_name, cam_ID, AVG(vehicles) as average_vehicles FROM traffic_cams WHERE time >= NOW() - INTERVAL 24 HOUR and mod(minute(time),120) = 0 GROUP BY CONCAT(cam_ID, time)' if value_time == 'Week': query = 'SELECT time, cam_name, cam_ID, AVG(vehicles) as average_vehicles FROM traffic_cams WHERE time >= NOW() - INTERVAL 168 HOUR and mod(minute(time),840) = 0 GROUP BY CONCAT(cam_ID, time)' df = pd.read_sql(query, con=engine) df['time_by_m'] = df['time'].dt.floor('1Min') df = df.drop_duplicates(['time_by_m', 'cam_ID']) df['time_mark'] = df['time_by_m'].astype(str) df.sort_values(by='time') df2 = df.loc[df['cam_ID'].isin(value_cam_ID)] figure = px.line(df2, x='time_mark', y="average_vehicles", color="cam_name", line_shape='spline') return figure
def update_graph_live(n): global df_global_saison global df_global_cc if (n % 150 == 0): ryzom = weather(True) else: ryzom = weather(False) df_global_saison = ryzom.getSeason() df = pd.DataFrame(ryzom.getData()) df_dates = df.loc[df['dates']==datetime.now().strftime("%H:%M"),:] df_global_cc = get_CC_libelle(df_dates.iloc[0,2]) fig = px.line (df,x = "dates", y = "cc" ,color = "continents", title= "Saison : " + df_global_saison + " conditions climatiques : " + df_global_cc) fig.update_yaxes(tickmode = "array",tickvals = [0,1670,3340,5000,6666,8340,10000],ticktext = ["Best", "Good 16.7%","Good 33.4%","Bad 50%","Bad 66.6%","Worst 83.4%","Worst"],showgrid = True) fig.update_xaxes(tickmode = "linear",showline = True,showgrid = True, dtick = 6) fig.update_layout(legend_uirevision='true') fig.add_vline(x=datetime.now().strftime("%H:%M"), line_width=1, line_dash="dash", line_color="red") fig.update_xaxes(showline=True, linewidth=2, linecolor='black', gridcolor='rgba(211, 211, 211, 0.75)') #fig.update_layout({'plot_bgcolor': 'rgba(0, 0, 0, 0)','paper_bgcolor': 'rgba(0, 0, 0, 0)'}) #fig.update_yaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey') return fig
def calcula_ingresos_por_mes(df): df_ventas_producto_mes = comisiones_por_mes(df)[1] df_ingresos = pd.DataFrame() l_ahorro = ['long_term_deposit', 'pension_plan', 'short_term_deposit'] l_inversion = ['funds', 'securities'] l_financiacion = ['credit_card', 'loans', 'mortgage'] l_cuenta = [ 'debit_card', 'em_account_p', 'em_account_pp', 'em_acount', 'emc_account', 'payroll', 'payroll_account' ] for c in df_ventas_producto_mes.columns: if c in l_ahorro or c in l_inversion: df_ingresos[c] = df_ventas_producto_mes[c] * 40 elif c in l_financiacion: df_ingresos[c] = df_ventas_producto_mes[c] * 60 else: df_ingresos[c] = df_ventas_producto_mes[c] * 10 fig = px.line(df_ingresos, x=df_ingresos.index, y=PRODUCTS, title='Evolución de ingresos por producto') return fig, df_ingresos
def evolucion_clientes(df_clientes): _df = df_clientes[df_clientes[PRODUCTS].sum(axis=1) >= 1].groupby( by=["pk_partition"])[["pk_cid"]].count().sort_values(by="pk_partition", ascending=False) _df["ant"] = _df["pk_cid"].shift(-1) _df["dif"] = _df["pk_cid"] - _df["ant"] df_n_cli = df_clientes.groupby(by=["pk_partition"])[[ "pk_cid" ]].count().sort_values(by="pk_partition", ascending=False) df_evolucion_clientes = pd.concat([df_n_cli, _df], axis=1) df_evolucion_clientes.columns = [ "total", "con_producto", "siguiente", "dif" ] df_evolucion_clientes["porcentaje"] = round( df_evolucion_clientes["con_producto"] / df_evolucion_clientes["total"] * 100, 2) df_evolucion_clientes.reset_index(inplace=True) fig = px.line(df_evolucion_clientes, x="pk_partition", y="porcentaje", range_y=[50, 100], title='Evolución de la tasa de conversión') return fig, df_evolucion_clientes
def update_file_graph(query): try: project_id = get_project_id(query) with dashapp.server.app_context(): project_files = AudioFile.query.join( AudioFile.monitoring_station ).filter( MonitoringStation.project_id == project_id).with_entities( AudioFile.size, AudioFile.timestamp, MonitoringStation.name.label('station')).subquery() bytes_per_day = db.session.query( func.sum(project_files.c.size).label('bytes'), func.date(project_files.c.timestamp).label('date'), project_files.c.station).group_by('station').group_by( 'date').order_by('date') df = pd.read_sql(bytes_per_day.statement, db.session.bind) df['MB'] = df['bytes'] / (1024 * 1024) station_colors = get_station_colors(project_id) fig = px.line(df, x='date', y='MB', color='station', color_discrete_map=station_colors, title='Data recorded per day') fig.update_traces(mode='markers') fig.update_layout(hovermode='x') except: fig = go.Figure() fig.update_layout(title='Data recorded per day', annotations=[{ 'text': 'Loading...', 'showarrow': False, 'font': { 'size': 28 } }]) return fig
def graph(): location = input('please Enter the location of the CVS file\n') Xvalue = input('\nPlease enter the X Value\n') Yvalue = input('\nPlease enter the Y Value\n') title = input('\nPlease enter the Title of the graph\n') color = input("\nWhat should be the color of the graph base on?\n") graphValue = input('\nwhat type of graph Do you want?\n') df = pd.read_csv(location) print(df) graph = graphValue.lower() if 'line' in graph: graph = pe.line(df, x=Xvalue, y=Yvalue, color=color, title=title) graph.show() elif 'line' in graph: graph = pe.bar(df, x=Xvalue, y=Yvalue, title=title, color=color) graph.show() elif 'scatter' in graph: graph = pe.scatter(df, x=Xvalue, y=Yvalue, color=color, title=title) graph.show()
def main(): menu=['主页','排名进度','合并查询','语义分析','物品识别','关于'] choice = st.sidebar.selectbox('Menu',menu) engine = create_engine('mysql+pymysql://chencheng:iKWz@4*[email protected]:3306') catelist=pd.read_csv('Files/category__.csv') @st.cache def load_data(category_id): data_sql=f''' select category_id,category_name,asin,ranking,snapshotted_at as date from mws_data.project_20_listings where category_id='{category_id}' ''' data_engine = create_engine('mysql+pymysql://chencheng:iKWz@4*[email protected]:3306') data=pd.read_sql(data_sql,data_engine) data['date']=pd.to_datetime(pd.to_datetime(data['date']).dt.date) return data if choice == '主页': st.title('傲基2.0品类管理') st.header('品类详情') st.markdown('针对每个asin,日排名规则为:每日排名取当日排名的中位数值') elif choice == '排名进度': st.title('当月排名进度') category = st.selectbox('品类名称:',catelist['category_name']) catid=catelist[catelist['category_name']==category]['category_id'].values[0] st.markdown(f'#### 已选中 **{category}** , 品类ID: **{catid}**') data = None while data is None: data_load_state=st.text('正在加载数据') data=load_data(catid) table=data.groupby(['asin','date'])['ranking'].agg('median').apply(lambda x:int(x)) t1=table.unstack(level=1) # with open('app.html') as f: # t1= f.read() # components.html(t1,height=800) st.write(t1) data_load_state.success('✔️ 数据加载完成') time.sleep(1) data_load_state.text('') chosed_asin = st.selectbox('Asin',data.asin.unique()) a1=pd.DataFrame(t1.loc[f'{chosed_asin}',:].dropna()).transpose() a1_rank=a1.apply(lambda x: int(np.median(x)),axis=1)[0] st.dataframe(a1) #画图模块 t2=pd.DataFrame(t1.loc[f'{chosed_asin}']) t2['date']=t2.index.get_level_values(0) t2.reset_index(drop=True) fig = px.line(t2, x=t2['date'], y=t2[f'{chosed_asin}'], title=f'{chosed_asin}的当月排名情况(红线为当月排名)') fig.add_shape(type='line', x0=t2['date'].iloc[0], y0=a1_rank, x1=t2['date'].iloc[-1], y1=a1_rank, line=dict(color='Red'), xref='x', yref='y') st.plotly_chart(fig, use_container_width=True) #画图模块 st.info(f'ASIN {chosed_asin}的当月排名为: {int(a1_rank)}') a2=t1.apply(lambda x: np.median(x),axis=1).sort_values() if int(a2[0])==1: st.success(f'当前品类下月排名最高的Asin为{a2.index[0]}, 月排名为: {int(a2[0])}') elif int(a2[0])<=8: st.info(f'当前品类下月排名最高的Asin为{a2.index[0]}, 月排名为: {int(a2[0])}') else: st.warning(f'当前品类下月排名最高的Asin为{a2.index[0]}, 月排名为: {int(a2[0])}') d1,d2=st.beta_columns(2) with d1: start_time = st.date_input("根据日期查看",t2['date'].iloc[0]) with d2: end_time = st.date_input("结束日期",t2['date'].iloc[-1]) if str(start_time) in t1.columns: st.write(t1.loc[:,start_time:end_time]) t2=t1[f'{start_time}'].sort_values() top1=t2[0] topn=t2.where(t2<=8).count() if top1==1: st.success(f'{start_time}日 {t2.index[0]} 的日排名为第一!') elif topn>=4: st.success('有四个个以上asin进入前八!') else: st.warning('没有排名靠前的Asin') else: st.write('数据缺失') with st.beta_expander('原始数据详情',expanded=False): i=st.number_input('输入你想要看到的条数',min_value=1,value=50,step=50) detail=data.iloc[:i,:] st.write(detail) elif choice == '合并查询': ci=st.multiselect('品类ID',data['category_id'].unique()) newdate=st.multiselect('日期',data[(data['category_id'].isin(ci))].date.unique()) asin=st.multiselect('ASIN',data[data['category_id'].isin(ci)&(data['date'].isin(newdate))].asin.unique()) newtable=data[(data['category_id'].isin(ci))&(data['asin'].isin(asin))&(data['date'].isin(newdate))] st.write(newtable[['category_id','category_name','asin','ranking','date']]) elif choice == '语义分析': st.subheader('亚马逊商品评论智能分析') st.set_option('deprecation.showPyplotGlobalUse', False) engine = create_engine('mysql+pymysql://root:aukey@[email protected]:3306') sql1='''select distinct asin from amazon.asin_review ''' asins = pd.read_sql(sql1,engine) chosedasin=st.selectbox('Asin:',asins['asin'].unique()) sql=f''' select review_star_rating,review_info, right(review_date,17) as review_date from amazon.asin_review where asin='{chosedasin}' ''' review = pd.read_sql(sql,engine) st.write(review) no=st.number_input('输入你想要看到的评论序号',min_value=1,value=1,step=1) ct=review.iloc[no,1] raw_text =st.text_area('选择序号自动引用评论,或在下框手动输入文本',f'{ct}') docx = nlp(raw_text) c_tokens = [ token.text for token in docx ] c_lemma = [token.lemma_ for token in docx] c_pos = [word.tag_ for word in docx] new_df = pd.DataFrame(zip(c_tokens,c_lemma,c_pos),columns=['Tokens','Lemma','POS']) adj=new_df[(new_df['POS']=='JJ')|(new_df['POS']=='NN')]['Lemma'].value_counts() wc=str(adj).replace('dtype','').replace('Lemma','').replace('int64','').replace('Name','') c1,c2,c3,c4=st.beta_columns(4) with c1: qf=st.button('开始切分') with c2: cy=st.button('生成词云') with c3: fx=st.button('情感分析') with c4: ck=st.button('查看动名词') if fx: blob = TextBlob(raw_text) sent_res=[] sent_res.append(blob.polarity) sent_res.append(blob.subjectivity) st.success(f'该评论的偏向性为:{sent_res[0]},主观性为:{sent_res[1]}') if sent_res[0]>0.3: st.success('该客户颇为喜爱此商品!') elif sent_res[0]>=-0.3 and sent_res[0]<=0.3: st.info('该客户对此商品没有明显偏向') else: st.warning('该客户对产品很不满!') if cy: wordcloud = WordCloud().generate(wc) plt.imshow(wordcloud,interpolation='bilinear') plt.axis("off") st.pyplot() if ck: st.write(pd.DataFrame(adj.rename('Count')).transpose()) if qf: st.dataframe(new_df)
def main(): st.title('Here is Stocks') df = pd.read_csv( 'https://robintrack.net/api/most_popular.csv?limit=100000') df = df.head(2500).reset_index() df['symbol_name'] = df['symbol'] + ' | ' + df['name'] # st.write(df.head(3)) # l=df['symbol_name'].to_list() l = df['symbol'].to_list() stocks = st.sidebar.multiselect('Select stocks', l) # st.write(stocks) stocks = str(stocks) stocks = stocks.replace('[', '').replace(']', '').replace('\'', '').replace(' ', '') stocks = st.sidebar.text_input( 'Or add more below (comma separated, please)', stocks) # st.write(stocks) # st.write(type(stocks)) # stocks = stocks.str.replace('[','').replace(']','').replace('\'','').replace(' ','') # st.write(stocks) stocks = list(stocks.split(',')) # st.write(type(stocks)) # st.write(stocks) # st.write(stocks) # s=df.loc[df.symbol_name.isin(stocks)] # s=s['symbol'].to_list() # more_stocks = st.sidebar.text_input('Add more below',str(s)) # more_stocks = [more_stocks] # st.write(more_stocks) # st.write([more_stocks]) import datetime end = datetime.date.today() start = datetime.date.today() - datetime.timedelta(days=30) start_date = st.sidebar.date_input('Start date', start) end_date = st.sidebar.date_input('End date', end) # if start_date < end_date: # st.success('Start date: `%s`\n\nEnd date:`%s`' % (start_date, end_date)) # else: # st.error('Error: End date must fall after start date.') # start = '2020-07-01' # end = '2020-07-21' if st.sidebar.button('Go'): if len(stocks) > 0: dft = dr.get_data_yahoo(stocks, start_date, end_date) dft = dft.stack() dft = dft.reset_index() dft = dft.groupby('Symbols').apply(pct_change) # st.write(dft) fig = px.line(dft, x='Date', y='Close', color='Symbols') # fig.show() st.plotly_chart(fig) fig = px.line(dft, x='Date', y='Pct Chg', color='Symbols') st.plotly_chart(fig) x = dft.groupby('Date').agg({'Close': 'sum'}).reset_index() x['portfolio_pct_chg'] = 100 * (1 - x.iloc[0].Close / x.Close) dft = pd.merge(dft, x, how='inner', left_on='Date', right_on='Date') fig = px.line(dft, x='Date', y='portfolio_pct_chg') st.plotly_chart(fig) st.write(dft)
color=color_value) else: plot = px.scatter(data_frame=df, x=x_value, y=y_value) # display chart in streamlit st.plotly_chart(plot) if chart_select == 'Histogram': st.sidebar.subheader("Settings for Histogram") x = st.sidebar.selectbox(label="Feature", options=numeric_columns) bin_size = st.sidebar.slider(label="Number of bins", min_value=10, max_value=100, value=50) plot = px.histogram(data_frame=df, x=x, nbins=bin_size) st.plotly_chart(plot) if chart_select == 'Lineplots': st.sidebar.subheader("Settings for Line plots.") x_value = st.sidebar.selectbox(label='X axis', options=numeric_columns) y_value = st.sidebar.selectbox(label='Y axis', options=numeric_columns) plot = px.line(data_frame=df, x=x_value, y=y_value) # display the chart st.plotly_chart(plot) except Exception as e: print(e)