g = sns.FacetGrid(df, col='winby') g.map(plt.hist, 'R_Age', bins=20) sns.lmplot(x="B__Round1_Strikes_Body Significant Strikes_Attempts", y="B__Round1_Strikes_Body Significant Strikes_Landed", col="winner", hue="winner", data=df, col_wrap=2, size=6) cnt_srs = df['R_Location'].value_counts().head(15) trace = go.Bar( x=cnt_srs.index, y=cnt_srs.values, marker=dict(color=cnt_srs.values, ), ) layout = go.Layout(title='Most Popular cities for Red fighters') data = [trace] fig = go.Figure(data=data, layout=layout) offline.iplot(fig, filename="Ratio") cnt_srs = df['B_Location'].value_counts().head(15) trace = go.Bar( x=cnt_srs.index, y=cnt_srs.values, marker=dict(color=cnt_srs.values, ),
def plot_evaluation_metrics(self, metric_dict: Dict, config_names: List = None, xlabel: str = None, ylabel: str = "Metric value", title: str = None, showlegend: bool = True): """Returns a barplot of the train and test values of ``metric_dict`` of ``config_names``. Value of a metric for all ``config_names`` are plotted as a grouped bar. Train and test values of a metric are plot side-by-side for easy comparison. Parameters ---------- metric_dict : `dict` [`str`, `callable`] Evaluation metrics to compute. Same as `~greykite.framework.framework.benchmark.benchmark_class.BenchmarkForecastConfig.get_evaluation_metrics`. To get the best visualization, keep number of metrics <= 2. config_names : `list` [`str`], default None Which config results to plot. A list of config names. If None, uses all the available config keys. xlabel : `str` or None, default None x-axis label. ylabel : `str` or None, default "Metric value" y-axis label. title : `str` or None, default None Plot title. showlegend : `bool`, default True Whether to show the legend. Returns ------- fig : `plotly.graph_objs.Figure` Interactive plotly bar plot. """ evaluation_metrics_df = self.get_evaluation_metrics( metric_dict=metric_dict, config_names=config_names) # This function groups by config name evaluation_metrics_df = (evaluation_metrics_df.drop( columns=["split_num"]).groupby("config_name").mean().dropna( how="all")) # Rearranges columns so that train and test error of a config are side by side plot_df = pd.DataFrame() for metric_name in metric_dict.keys(): plot_df[f"train_{metric_name}"] = evaluation_metrics_df[ f"train_{metric_name}"] plot_df[f"test_{metric_name}"] = evaluation_metrics_df[ f"test_{metric_name}"] if title is None: title = "Average evaluation metric across rolling windows" data = [] # Each row (index) is a config. Adds each row to the bar plot. for index in plot_df.index: data.append( go.Bar(name=index, x=plot_df.columns, y=plot_df.loc[index].values)) layout = go.Layout( xaxis=dict(title=xlabel), yaxis=dict(title=ylabel), title=title, showlegend=showlegend, barmode="group", ) fig = go.Figure(data=data, layout=layout) return fig
with open('trendtweets.csv', 'w', newline='') as trendfile: #puts trend list items in CSV writeto = csv.writer(trendfile, delimiter=',') writeto.writerows(trendlistexport) xtrends = [] trendvolumes = [] for item in trendlistexport: xvalue = item[0] xtrends.append(xvalue) for item in trendlistexport: yvalue = item[1] trendvolumes.append(yvalue) del xtrends[0] del trendvolumes[0] data = [go.Bar( x=xtrends, y=trendvolumes)] data = data layout = go.Layout( title='Trending Topic vs. Volume of Tweets', xaxis=dict( title='Trending Topic', titlefont=dict( family='Courier New, monospace', size=18, color='#7f7f7f' ) ), yaxis=dict( title='Volume of Tweets', titlefont=dict(
from plotly import graph_objs from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot init_notebook_mode(connected=True) # In[31]: get_ipython().magic('matplotlib inline') df = raw_data.airline_sentiment neg = len(df[raw_data["airline_sentiment"] == "negative"]) pos = len(df[raw_data["airline_sentiment"] == "positive"]) neu = len(df[raw_data["airline_sentiment"] == "neutral"]) dist = [ graph_objs.Bar( x=["negative","neutral","positive"], y=[neg, neu, pos], )] plotly.offline.iplot({"data":dist, "layout":graph_objs.Layout(title="Sentiment type distribution in Airline dataset")}) # In[33]: get_ipython().magic('matplotlib inline') # Plot histogram of tweets per airline airline_count = Counter(raw_data['airline2']) ac_df = pd.DataFrame.from_dict(airline_count, orient = 'index') ac_df.plot(kind='bar')
u_name = pd.DataFrame(fake__users.name.str.split(' ', 1).tolist(), columns=['first', 'last']) user_name = u_name.groupby('first', as_index=False).size().reset_index(name='counts') user_name = user_name.sort_values('counts', ascending=False).head(20) # bar plot #first names first_name = u_name.groupby('first', as_index=False).size().reset_index(name='counts') first_name = first_name.sort_values('counts', ascending=False).head(20) df = go.Bar(x=first_name['counts'], y=first_name['first'], orientation='h', name='First Name') #last names first_name = u_name.groupby('last', as_index=False).size().reset_index(name='counts') first_name = first_name.sort_values('counts', ascending=False).head(20) df1 = go.Bar(x=first_name['counts'], y=first_name['last'], orientation='h', name='Last Name') fig = tools.make_subplots(rows=1, cols=2, subplot_titles=('first Name', 'Last Name')) fig.append_trace(df, 1, 1) fig.append_trace(df1, 1, 2)
html.Div([ dcc.Markdown('''Shopping'''), sliders[5], ], style={"width": "15em", "padding": "1em", "display": "inline-block"}), html.Div([ dcc.Markdown('''Geld'''), sliders[6], ], style={"width": "15em", "padding": "1em", "display": "inline-block"}), html.Div([ html.Div([ dcc.Graph( id = "detail_view_of_pois", figure = { "data": [ go.Bar( x = ["Bildung","Gesundheit", "Freizeit", "Shopping", "Geld", "Öffentliche Gebäude", "Gastwirtschaft"], y = [0,0,0,0,0,0,0]) ], "layout" : go.Layout( title = "POIs", yaxis = dict( tick0 = 0, dtick = 1) ) } ) ], style={"display": "inline-block"}), html.Div([ dcc.Graph( id = "housing_distribution", figure = {
html.Div([ html.Div([ html.H5("Feature Comparisons with Gender"), html.P("These graphs represent a comparison between the Gender and several other features presented in the data set. ", style={'color': 'gray', 'fontSize': 14}) ], className="twelve columns"), ], className="row"), html.Div([ html.Div([ dcc.Graph( figure=go.Figure( data=[ go.Bar( x=gender_disabled_df['Gender'], y=gender_disabled_df['Disabled'], name='Disabled', marker=go.bar.Marker( color='rgb(225, 128, 0)' ) ), go.Bar( x=gender_not_disabled_df['Gender'], y=gender_not_disabled_df['Disabled'], name='Not Disabled', marker=go.bar.Marker( color='rgb(55, 83, 109)' ) ) ], layout=go.Layout( title='Gender vs Disability', showlegend=False,
yaxis=dict(title='NIV')) fig = go.Figure(data, layout) py.iplot(fig) # - # Average age at onset when NIV is used: ALS_proc_df[ALS_proc_df.NIV == 1]['Age at onset'].mean() # + configure_plotly_browser_state() ALS_proc_NIV_3R = ALS_proc_df.groupby(['3R', 'NIV' ]).REF.count().to_frame().reset_index() data = [ go.Bar(x=ALS_proc_NIV_3R[ALS_proc_NIV_3R.NIV == 0]['3R'], y=ALS_proc_NIV_3R[ALS_proc_NIV_3R.NIV == 0]['REF'], name='Not used'), go.Bar(x=ALS_proc_NIV_3R[ALS_proc_NIV_3R.NIV == 1]['3R'], y=ALS_proc_NIV_3R[ALS_proc_NIV_3R.NIV == 1]['REF'], name='Using NIV') ] layout = go.Layout(barmode='group') fig = go.Figure(data=data, layout=layout) py.iplot(fig, filename='grouped-bar') # - # Average 3R value when NIV is used: ALS_proc_df[ALS_proc_df.NIV == 1]['3R'].mean() # **Comments:** Clearly, there's a big dependence of the use of NIV with the respiratory symptoms indicated by 3R, as expected.
def layout(): indicadores = json.load(open('indicadores.json')) datasets = indicadores['datasets'] # ============================== # Barras por publicador # ============================== publicadores = indicadores['datasets_por_publicador'] publicadores_yaxis = [] for x in publicadores: publicadores_yaxis.append('<br>'.join(wrap(x['_id']['publicador']))) barras_publicador = dcc.Graph(figure=go.Figure( data=[ go.Bar( y=publicadores_yaxis, x=[x['datasets'] for x in publicadores], orientation='h', ) ], layout=go.Layout(height=6000, margin=go.layout.Margin(l=600), yaxis={ 'categoryorder': 'array', 'categoryarray': publicadores_yaxis })), ) # ============================== # Barras por fuente # ============================== fuentes = indicadores['datasets_por_fuente'] fuentes_yaxis = [] for x in fuentes: fuentes_yaxis.append('<br>'.join(wrap(x['_id']['fuente']))) barras_fuentes = dcc.Graph(figure=go.Figure( data=[ go.Bar( y=fuentes_yaxis, x=[x['datasets'] for x in fuentes], orientation='h', ) ], layout=go.Layout(height=6000, margin=go.layout.Margin(l=600), yaxis={ 'categoryorder': 'array', 'categoryarray': fuentes_yaxis })), ) # ============================== # Barras por frecuencia # ============================== frecuencias = indicadores['datasets_por_frecuencia'] barras_frecuencias = dcc.Graph(figure=go.Figure(data=[ go.Bar(y=[x['_id']['frecuencia'] for x in frecuencias], x=[x['datasets'] for x in frecuencias], orientation='h') ])) # ============================== # Barras por formato # ============================== formatos = indicadores['recursos_por_formato'] barras_formatos = dcc.Graph(figure=go.Figure(data=[ go.Bar(y=[x['_id']['formato'] for x in formatos], x=[x['recursos'] for x in formatos], orientation='h') ])) # ============================== # Torta por actualización # ============================== actualizados = len([ x for x in datasets if x['dias'] <= dias_frecuencias[x['frecuencia']] ]) desactualizados = len( [x for x in datasets if x['dias'] > dias_frecuencias[x['frecuencia']]]) torta_actualizacion = dcc.Graph( id='graph', figure=go.Figure(data=[ go.Pie( values=[actualizados, desactualizados], labels=['Actualizados', 'Desactualizados'], ) ])) # ============================== # Tabla desactualizados # ============================== datasets = [ x for x in datasets if x['dias'] > dias_frecuencias[x['frecuencia']] ] datasets = sorted(datasets, key=lambda x: x['dias'])[::-1][:10] datasets = [{ 'Titulo': d['titulo'], 'Frecuencia de actualización': d['frecuencia'], 'Días desactualizado': int(d['dias']), 'Fuente': d['fuente'], 'Publicador': d['publicador'], 'URL de descarga': d['url'] } for d in datasets] tabla_datasets = dash_table.DataTable(id='table', columns=[{ "name": i, "id": i } for i in datasets[0].keys()], data=datasets, style_table={'overflowX': 'scroll'}) # ============================== # Nube de keywords # ============================== keywords = { x['_id']['keyword']: x['datasets'] for x in indicadores['datasets_por_keyword'] } buffered_keywords = BytesIO() WordCloud(width=500, height=700, background_color='white').generate_from_frequencies( keywords).to_image().save(buffered_keywords, format="JPEG") img_str_keywords = base64.b64encode(buffered_keywords.getvalue()) nube_keywords = html.Img( src="data:image/jpeg;base64,{}".format(img_str_keywords.decode())) # ============================== # Nube de búsquedas # ============================== busquedas = { x['_id']['keyword']: x['datasets'] for x in indicadores['datasets_por_keyword'] } buffered_busquedas = BytesIO() WordCloud(width=500, height=700, background_color='white').generate_from_frequencies( busquedas).to_image().save(buffered_busquedas, format="JPEG") img_str_busquedas = base64.b64encode(buffered_busquedas.getvalue()) nube_busquedas = html.Img( src="data:image/jpeg;base64,{}".format(img_str_busquedas.decode())) return html.Div([ html.Div([ html.Div([ html.H3('Actualización', className='text-center'), torta_actualizacion, ], className='col-xs-4 text-center'), html.Div([ html.H3('Frecuencias', className='text-center'), barras_frecuencias, ], className='col-xs-4 text-center'), html.Div([ html.H3('Formatos', className='text-center'), barras_formatos, ], className='col-xs-4 text-center'), ], className='row'), html.H3('Top 10 datasets desactualizados', className='text-center'), tabla_datasets, html.Div([ html.Div( [html.H3('Keywords', className='text-center'), nube_keywords], className='col-xs-6 text-center'), html.Div([ html.H3('Búsquedas', className='text-center'), nube_busquedas ], className='col-xs-6 text-center'), ], className='row margin-bottom-xl'), html.Div([ html.Div([ html.H3('Publicadores', className='text-center'), barras_publicador, ], className='col-xs-6 text-center'), html.Div([ html.H3('Fuentes', className='text-center'), barras_fuentes, ], className='col-xs-6 text-center'), ], className='row') ], className='container')
def update_agg_mode_bar_chart(selected_ucr, selected_neighbourhood, selected_agg_mode, selected_n, start_date, end_date): df = crime_main_geo[(crime_main_geo['UCR'].isin(selected_ucr)) & (crime_main_geo['Name'].isin(selected_neighbourhood))] df = df.rename(columns={'d1': 'DATE'}) df = df[(df['DATE'] >= start_date) & (df['DATE'] <= end_date)] # AGGREGATION OF CRIME REPORTS df_grouped = df.groupby(by=selected_agg_mode).count()[["CODE" ]].reset_index() df_grouped.columns = [selected_agg_mode, 'COUNT'] fig_agg_mode = go.Figure( go.Bar(x=df_grouped[selected_agg_mode], y=df_grouped['COUNT'], marker_color='lightsalmon')) fig_agg_mode.update_xaxes(type='category') fig_agg_mode.update_layout(height=390, yaxis_title="Number of Crime Reports") # TOP CRIME CATEGORIES df_top = df.value_counts('OFFENSE_CODE_GROUP').to_frame().reset_index() df_top.columns = ['OFFENSE_CODE_GROUP', 'COUNT'] df_top = df_top.head(selected_n) fig_top_crimes = go.Figure( go.Bar(x=df_top['COUNT'], y=df_top['OFFENSE_CODE_GROUP'], orientation='h', marker_color='indianred')) fig_top_crimes.update_layout(height=400, yaxis=dict(autorange="reversed")) # PROPORTION OF CRIME CLASSES colors_1 = ['#E0BBE4', '#957DAD', '#D291BC', '#FEC8D8'] df_donut_crime_classes = df["CRIME_CLASS"].value_counts().to_frame( ).reset_index() df_donut_crime_classes.columns = ["CRIME_CLASS", 'COUNT'] fig_donut_crime_class = go.Figure(data=[ go.Pie(labels=df_donut_crime_classes["CRIME_CLASS"], values=df_donut_crime_classes['COUNT'], hole=0.4) ]) fig_donut_crime_class.update_traces(marker=dict(colors=colors_1)) fig_donut_crime_class.update_layout(legend=dict( orientation="v", yanchor="bottom", y=-0.3, xanchor="right", x=1)) fig_donut_crime_class.update_layout(margin=dict(t=20, b=20, l=20, r=20)) # PROPORTION OF SHOOTING INCIDENTS colors_2 = ["#85DE77", "#FF756D"] df_donut_shooting = df["SHOOTING"].value_counts().to_frame().reset_index() df_donut_shooting.columns = ["SHOOTING", 'COUNT'] fig_donut_shooting = go.Figure(data=[ go.Pie(labels=df_donut_shooting["SHOOTING"], values=df_donut_shooting['COUNT'], hole=0.4) ]) fig_donut_shooting.update_traces(marker=dict(colors=colors_2)) fig_donut_shooting.update_layout(legend=dict( orientation="v", yanchor="bottom", y=-0.07, xanchor="right", x=1)) fig_donut_shooting.update_layout(margin=dict(t=0, b=0, l=0, r=0)) # CHOROPLETH MAP OF CRIME COUNTS BY GEOGRAPHICAL REGIONS df_choro = df.groupby(by="GEOID10").count()[['OFFENSE_CODE_GROUP' ]].reset_index() df_choro['GEOID10'] = df_choro['GEOID10'].astype('str') df_choro = df_choro[df_choro['GEOID10'] != '0'] df_choro_merged = pd.merge(boston_polygon, df_choro, how='left', on='GEOID10') fig_choro = go.Figure( go.Choroplethmapbox(geojson=boston_geo_ok, locations=df_choro_merged['GEOID10'], z=df_choro_merged['OFFENSE_CODE_GROUP'], colorscale="Blues", featureidkey="GEOID10", marker_opacity=0.5, marker_line_width=0.7)) fig_choro.update_layout(mapbox_style="carto-positron", mapbox_zoom=10, mapbox_center={ "lat": 42.33, "lon": -71.09 }) fig_choro.update_layout(margin={ "r": 0, "t": 0, "l": 0, "b": 0 }, height=930) fig_choro.update_geos(fitbounds="locations", visible=True) return fig_agg_mode, fig_top_crimes, fig_donut_crime_class, fig_donut_shooting, fig_choro
def updateGraph(dataFields:list, filterIndex:int, graphType:int, binSize:int): """ updates the graph based on the chosen data fields, data filters, graph type, and bin size (the latter if histogram is selected) """ # title of the graph, set to the filename for now title = 'Without filter' if len(dataFields) == 0: return go.Figure(layout=dict(title=title)) # empty graph if filterIndex is 0: fList = ['isMale'] elif filterIndex is 1: fList = ['analyticMajor'] elif filterIndex is 2: fList = ['nativeEnglish'] # function which filters a piece of data # depending on the filters the user selected def dataFilter(data:dict) -> bool: for name in fList: return True filteredDataSet = tuple(filter(dataFilter, dataSet)) if len(filteredDataSet) == 0: return go.Figure(layout=dict(title=title)) # empty graph # convert the data being plotted into numbers try: traceValues = [ [ float(d[field]) for d in filteredDataSet ] for field in dataFields ] except ValueError: return go.Figure(layout=dict(title="Error: Can't plot non-numeric data on a numeric axis.")) # turn the position on the graph type slider into a graph type name graphType = GRAPHTYPE_CHOICES[graphType] if graphType == 'Histogram': out = ff.create_distplot( traceValues, dataFields, show_curve=False, show_rug=False, bin_size=binSize, ) out.layout['title'] = title return out if graphType == 'Density Plot': out = ff.create_distplot( traceValues, dataFields, show_hist=False, show_rug=False, ) out.layout['title'] = title return out layout = dict(title=title) # layout used by all of the graph types below if graphType == 'Violin Plot': traces = [ dict( type='violin', name=field, y=values, ) for field,values in zip(dataFields,traceValues) ] elif graphType == 'Box Plot': traces = [ go.Box( name=field, y=values, ) for field,values in zip(dataFields,traceValues) ] elif graphType == 'Dot Plot': traces = [ dict( type='scatter', name=field, y=[d[field] for d in filteredDataSet], x=[d[DATA_IDFIELD] for d in filteredDataSet], mode='markers', ) for field in dataFields ] layout['xaxis'] = dict( title=DATA_IDFIELD, type='category', titlefont=dict( size=12, ), ) elif graphType == 'Bar Plot': traces = [ go.Bar( name=field, y=[d[field] for d in filteredDataSet], x=[d[DATA_IDFIELD] for d in filteredDataSet], ) for field in dataFields ] layout['xaxis'] = dict( title=DATA_IDFIELD, type='category', titlefont=dict( size=12, ), ) return go.Figure(data=traces, layout=layout)
# color_continuous_scale=["#FFF1A8", "#FFD608"], range_color=(min(df_proov_cum['Pct acumulado proveedores']), max(df_proov_cum['Pct acumulado proveedores'])), mapbox_style="carto-positron", zoom=4, center={ "lat": 4.570868, "lon": -74.2973328 }, opacity=0.5) fig_map_2.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) # 2. Bar Plot trace_1 = go.Bar(x=df_proov_cum['Pct acumulado proveedores'], y=df_proov_cum['Departamento'], orientation='h') layout = go.Layout(hovermode='closest') fig = go.Figure(data=[trace_1], layout=layout) # fig.update_layout(font=dict(color="#252525", family="Roboto"), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', xaxis_title="% concentracion 10 primeros contratistas", yaxis=dict(dtick=1), margin={ "r": 0, "t": 0, "l": 0, "b": 0 })
def update_plots(n_clicks, start_date, end_date, show_exponential, normalise_by_pop, align_cases_check, align_cases_input, align_deaths_check, align_deaths_input, align_active_cases_check, align_active_cases_input, align_daily_cases_check, align_daily_cases_input, align_daily_deaths_check, align_daily_deaths_input, saved_json_data, *args): print(n_clicks, start_date, end_date, args) start_date = datetime.datetime.strptime(start_date, '%Y-%m-%d').date() end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d').date() country_names = [] for country in args: country_names.extend(country) if saved_json_data is None: country_data = {} else: country_data = json.loads(saved_json_data) for i, country in enumerate(country_names): if country not in country_data.keys(): try: data = get_data(country) country_data[country] = data except Exception as e: print(e) country_names.remove(country) continue out = [] for title in [ 'Cases', 'Deaths', 'Currently Infected', 'Daily New Cases', 'Daily New Deaths' ]: if normalise_by_pop: axis_title = f"{title} (% of population)" else: axis_title = title if title == 'Cases': align_countries = align_cases_check align_input = align_cases_input elif title == 'Deaths': align_countries = align_deaths_check align_input = align_deaths_input elif title == 'Currently Infected': align_countries = align_active_cases_check align_input = align_active_cases_input elif title == 'Daily New Cases': align_countries = align_daily_cases_check align_input = align_daily_cases_input elif title == 'Daily New Deaths': align_countries = align_daily_deaths_check align_input = align_daily_deaths_input figs = [] if align_countries: xaxis_title = f'Days since the total confirmed cases reached {align_input}' if normalise_by_pop: xaxis_title += '% of the population' else: xaxis_title = '' layout_normal = { 'yaxis': { 'title': axis_title, 'type': 'linear', 'showgrid': True }, 'xaxis': { 'title': xaxis_title, 'showgrid': True }, 'showlegend': True, 'margin': { 'l': 70, 'b': 100, 't': 0, 'r': 0 }, 'updatemenus': [ dict(buttons=list([ dict(args=[ "yaxis", { 'title': axis_title, 'type': 'linear', 'showgrid': True } ], label="Linear", method="relayout"), dict(args=[ "yaxis", { 'title': axis_title, 'type': 'log', 'showgrid': True } ], label="Logarithmic", method="relayout") ]), direction="down", pad={ "r": 10, "t": 10, "b": 10 }, showactive=True, x=0., xanchor="left", y=1.2, yanchor="top"), ] } # if normalise_by_pop: # layout_normal['yaxis']['tickformat'] = '%.2f' layout_daily_plot = copy.deepcopy(layout_normal) layout_daily_plot['updatemenus'].append( dict(buttons=list([ dict(args=[{ "visible": [False, False] + [False, False, True] * len(country_names) if show_exponential else [False] + [False, True] * len(country_names) }], label="Bar", method="update"), dict(args=[{ "visible": [True, True] + [True, True, False] * len(country_names) if show_exponential else [True] + [True, False] * len(country_names) }], label="Scatter", method="update") ]), direction="down", pad={ "r": 10, "t": 10, "b": 10 }, showactive=True, x=0.2, xanchor="left", y=1.2, yanchor="top"), ) if show_exponential: figs.append( go.Scatter( x=[datetime.date(2020, 2, 20)] if not align_countries else [0], y=[0], mode='lines', line={ 'color': 'black', 'dash': 'dash' }, showlegend=True, visible=False if title in [ 'Daily New Cases', 'Daily New Deaths' ] else 'legendonly', name=fr'Best exponential fits', yaxis='y1', legendgroup='group2', )) label = fr'COUNTRY : best fit (doubling time)' else: label = fr'COUNTRY' figs.append( go.Scatter( x=[datetime.date(2020, 2, 20)] if not align_countries else [0], y=[0], mode='lines+markers', line={'color': 'black'}, showlegend=True, visible=False if title in ['Daily New Cases', 'Daily New Deaths'] else 'legendonly', name=label, yaxis='y1', legendgroup='group2', )) for i, c in enumerate(country_names): print(c) if country_data[c] is None: print("Cannot retrieve data from country:", c) continue if title == 'Daily New Cases': dates = country_data[c]['Cases']['dates'][1:] xdata = np.arange(len(dates)) ydata = np.diff( np.array(country_data[c]['Cases']['data']).astype('float')) elif title == 'Daily New Deaths': dates = country_data[c]['Deaths']['dates'][1:] xdata = np.arange(len(dates)) ydata = np.diff( np.array( country_data[c]['Deaths']['data']).astype('float')) elif title not in country_data[c]: continue else: dates = country_data[c][title]['dates'] xdata = np.arange(len(dates)) ydata = country_data[c][title]['data'] ydata = np.array(ydata).astype('float') date_objects = [] for date in dates: date_objects.append( datetime.datetime.strptime(date, '%Y-%m-%d').date()) date_objects = np.asarray(date_objects) if normalise_by_pop: ydata = ydata / POPULATIONS[c] * 100 if align_countries: if title in ['Cases', 'Deaths']: idx_when_n_cases = np.abs(ydata - align_input).argmin() elif title in ['Currently Infected', 'Daily New Cases']: ydata_cases = np.array( country_data[c]['Cases']['data']).astype('float') ydata_cases = ydata_cases / POPULATIONS[ c] * 100 if normalise_by_pop else ydata_cases idx_when_n_cases = np.abs(ydata_cases - align_input).argmin() elif title in ['Daily New Deaths']: ydata_cases = np.array( country_data[c]['Deaths']['data']).astype('float') ydata_cases = ydata_cases / POPULATIONS[ c] * 100 if normalise_by_pop else ydata_cases idx_when_n_cases = np.abs(ydata_cases - align_input).argmin() if title in ['Daily New Cases', 'Daily New Deaths']: idx_when_n_cases -= 1 xdata = xdata - idx_when_n_cases model_date_mask = (date_objects <= end_date) & (date_objects >= start_date) model_dates = [] model_xdata = [] date = start_date d_idx = min(xdata[model_date_mask]) while date <= end_date: model_dates.append(date) model_xdata.append(d_idx) date += datetime.timedelta(days=1) d_idx += 1 model_xdata = np.array(model_xdata) b, logA = np.polyfit(xdata[model_date_mask], np.log(ydata[model_date_mask]), 1) lin_yfit = np.exp(logA) * np.exp(b * model_xdata) if show_exponential: if np.log(2) / b > 1000 or np.log(2) / b < 0: double_time = 'no growth' else: double_time = fr'{np.log(2) / b:.1f} days to double' label = fr'{c.upper():<10s}: {np.exp(b):.2f}^t ({double_time})' else: label = fr'{c.upper():<10s}' figs.append( go.Scatter( x=date_objects if not align_countries else xdata, y=ydata, hovertext=[ f"Date: {d.strftime('%d-%b-%Y')}" for d in date_objects ] if align_countries else '', mode='lines+markers', marker={'color': colours[i]}, line={'color': colours[i]}, showlegend=True, visible=False if title in ['Daily New Cases', 'Daily New Deaths'] else True, name=label, yaxis='y1', legendgroup='group1', )) if show_exponential: if np.log(2) / b < 0: show_plot = False else: show_plot = True figs.append( go.Scatter( x=model_dates if not align_countries else model_xdata, y=lin_yfit, hovertext=[ f"Date: {d.strftime('%d-%b-%Y')}" for d in model_dates ] if align_countries else '', mode='lines', line={ 'color': colours[i], 'dash': 'dash' }, showlegend=False, visible=False if title in [ 'Daily New Cases', 'Daily New Deaths' ] else show_plot, name=fr'Model {c.upper():<10s}', yaxis='y1', legendgroup='group1', )) if title in ['Daily New Cases', 'Daily New Deaths']: figs.append( go.Bar(x=date_objects if not align_countries else xdata, y=ydata, hovertext=[ f"Date: {d.strftime('%d-%b-%Y')}" for d in date_objects ] if align_countries else '', showlegend=True, visible=True, name=label, marker={'color': colours[i]}, yaxis='y1', legendgroup='group1')) layout_out = copy.deepcopy(layout_daily_plot) else: layout_out = copy.deepcopy(layout_normal) out.append({'data': figs, 'layout': layout_out}) # Plot 'New Cases vs Total Cases' and 'New Deaths vs Total Deaths' for title in ['Cases', 'Deaths']: fig_new_vs_total = [] for i, c in enumerate(country_names): l = 7 # Number of days to look back cases = np.array(country_data[c][title]['data']).astype('float') xdata = np.copy(cases[l:]) ydata = np.diff(cases) len_ydata = len(ydata) # Compute new cases over the past l days ydata = np.sum([ np.array(ydata[i:i + l]) for i in range(len_ydata) if i <= (len_ydata - l) ], axis=1) dates = country_data[c][title]['dates'][l:] date_objects = [] for date in dates: date_objects.append( datetime.datetime.strptime(date, '%Y-%m-%d').date()) date_objects = np.asarray(date_objects) mask = xdata > 100 if title == 'Cases' else xdata > 10 xdata = xdata[mask] ydata = ydata[mask] date_objects = date_objects[mask] if normalise_by_pop: xdata = xdata / POPULATIONS[c] * 100 ydata = ydata / POPULATIONS[c] * 100 fig_new_vs_total.append( go.Scatter( x=xdata, y=ydata, hovertext=[ f"Date: {d.strftime('%d-%b-%Y')}" for d in date_objects ], mode='lines+markers', marker={'color': colours[i]}, line={'color': colours[i]}, showlegend=True, name=fr'{c.upper():<10s}', yaxis='y1', legendgroup='group1', )) if normalise_by_pop: yaxis_title = f'New {title} (% of population) per week (log scale)' # {l} days' xaxis_title = f'Total {title} (% of population) (log scale)' else: yaxis_title = f'New {title} per week' # {l} days)' xaxis_title = f'Total {title}' layout_new_vs_total = { 'yaxis': { 'title': yaxis_title, 'type': 'log', 'showgrid': True }, 'xaxis': { 'title': xaxis_title, 'type': 'log', 'showgrid': True }, 'showlegend': True, 'margin': { 'l': 70, 'b': 100, 't': 50, 'r': 0 }, } out.append({'data': fig_new_vs_total, 'layout': layout_new_vs_total}) out.append(json.dumps(country_data)) out.append(None) return out
WH_S_2012=WH_S[WH_S['Year']==2012] WH_S_2012=pd.DataFrame(WH_S.groupby('Product_Category', as_index=False)['Order_Demand'].mean()) WH_S_2012= WH_S_2012.sort_values('Order_Demand', ascending=False) WH_S_2013=WH_S[WH_S['Year']==2013] WH_S_2013=pd.DataFrame(WH_S_2013.groupby('Product_Category', as_index=False)['Order_Demand'].mean()) WH_S_2013=WH_S_2013.sort_values('Order_Demand', ascending=False) WH_S_2014=WH_S[WH_S['Year']==2014] WH_S_2014=pd.DataFrame(WH_S_2014.groupby('Product_Category', as_index=False)['Order_Demand'].mean()) WH_S_2014=WH_S_2014.sort_values('Order_Demand', ascending=False) WH_S_2015=WH_S[WH_S['Year']==2015] WH_S_2015=pd.DataFrame(WH_S_2015.groupby('Product_Category', as_index=False)['Order_Demand'].mean()) WH_S_2015=WH_S_2015.sort_values('Order_Demand', ascending=False) WH_S_2016=WH_S[WH_S['Year']==2016] WH_S_2016=pd.DataFrame(WH_S_2016.groupby('Product_Category', as_index=False)['Order_Demand'].mean()) WH_S_2016=WH_S_2016.sort_values('Order_Demand', ascending=False) trace1 = go.Bar(x=WH_S_2012['Product_Category'], y=WH_S_2012['Order_Demand'], name='Year_2012') trace2 = go.Bar(x=WH_S_2013['Product_Category'], y=WH_S_2013['Order_Demand'], name='Year_2013') trace3 = go.Bar(x=WH_S_2014['Product_Category'], y=WH_S_2014['Order_Demand'], name='Year_2014') trace4 = go.Bar(x=WH_S_2015['Product_Category'], y=WH_S_2015['Order_Demand'], name='Year_2015') trace5 = go.Bar(x=WH_S_2016['Product_Category'], y=WH_S_2016['Order_Demand'], name='Year_2016') fig = tools.make_subplots(rows=2, cols=5) fig.append_trace(trace5, 1, 1) fig.append_trace(trace4, 1, 2) fig.append_trace(trace3, 1, 3) fig.append_trace(trace2, 1, 4) fig.append_trace(trace1, 1, 5) layout=fig['layout'].update(height=500, width=1200, title='Order demand vs product category with respect to all years for '+ str (Warehouse[i]),xaxis=dict( title='Product Category', titlefont=dict( family='Courier New, monospace', size=18,
def PCA_sumplot(Z, Z_chose, Theta_record, pca_obj, fig_dens_I=[], new_data=[], Ncols=2, PC_select=2, height=600, width=1000): titles = [ 'probab', 'Ave. PC coordinates among kde sampled theta vectors', 'loadings of PC {}'.format(PC_select + 1) ] fig_subplots = tools.make_subplots(rows=int(len(titles) / float(Ncols)) + (len(titles) % Ncols > 0), cols=Ncols, subplot_titles=tuple(titles)) for gp in range(len(titles)): pos1 = int(float(gp) / Ncols) + 1 pos2 = gp - (pos1 - 1) * Ncols + 1 title = titles[gp] if gp == 0: zprime = Z[Z_chose] bandwidth = estimate_bandwidth(zprime, quantile=0.2, n_samples=500) X_plot = np.linspace(-2, 8, 100)[:, np.newaxis] kde_plot = KernelDensity(kernel='gaussian', bandwidth=bandwidth).fit(zprime) log_dens = kde_plot.score_samples(X_plot) trace = go.Scatter(x=X_plot[:, 0], y=np.exp(log_dens), mode='lines', fill='tozeroy', line=dict(color='red', width=2)) fig_subplots.append_trace(trace, pos1, pos2) if len(fig_dens_I): fig_subplots.append_trace(fig_dens_I[0], pos1, pos2) if gp == 1: feat_sum = np.sum(new_data, axis=0) trace = go.Bar( x=['PC {}'.format(x + 1) for x in range(new_data.shape[1])], y=feat_sum, marker=dict(color='rgb(0,0,205)')) fig_subplots.append_trace(trace, pos1, pos2) fig_subplots['layout']['yaxis' + str(gp + 1)].update(title='mean') if gp == 2: times_data = [ list(Theta_record[x]['comb'][:, 0]) for x in Theta_record.keys() ] times_data = np.array(times_data) times_av = np.mean(times_data, axis=0) times_av = [int(x) for x in times_av / 1000] times_av = ['{}k y'.format(x) for x in times_av] Xcomps = pca_obj.components_ trace = go.Bar(x=times_av, y=Xcomps[PC_select, :], marker=dict(color='rgb(0,0,205)')) fig_subplots.append_trace(trace, pos1, pos2) fig_subplots['layout']['yaxis' + str(gp + 1)].update(title='eigen value') fig_subplots['layout'].update(height=height, width=width) layout = go.Layout(title=title) fig = go.Figure(data=fig_subplots, layout=layout) iplot(fig_subplots)
temp=app_train['NAME_CONTRACT_TYPE'].value_counts() x=temp.index y=temp.values plt.pie(x=temp.values,explode=(0.1,0),labels=temp.index,startangle=80,autopct='%1.1f%%', colors=['#F1BF1B','#B1F11B'],frame=False,radius=1.5) temp = app_train['NAME_INCOME_TYPE'].value_counts() temp_y0 = [] temp_y1=[] for val in temp.index: temp_y1.append(np.sum(app_train["TARGET"][app_train["NAME_INCOME_TYPE"]==val] == 1)) temp_y0.append(np.sum(app_train["TARGET"][app_train["NAME_INCOME_TYPE"]==val] == 1)) trace1 = go.Bar(x= temp.index, y = (temp_y1 / temp.sum()) * 100,name='YES') #go is plotly.graph_objs trace2 = go.Bar(x= temp.index, y = (temp_y0 / temp.sum()) * 100,name='NO') #go is plotly.graph_objs data = [trace1, trace2] layout = go.Layout( title = "Income sources of Applicant's in terms of loan is repayed or not in %", #barmode='stack', width = 1000, xaxis=dict( title='Income source', tickfont=dict( size=14, color='rgb(107, 107, 107)' ) ), yaxis=dict(
def impact_on_phenotype(mo_uncorrected_file, mo_corrected_file, fdrth=0.05, saveto='./impact_on_phenotype'): """ :param fdrth: fdr cutoff :param saveto: path to save file :param mo_uncorrected_file: mageck uncorrected output :param mo_corrected_file: mageck corrected output :return: None """ pre = pd.read_csv(mo_uncorrected_file, compression='infer', sep="\t", index_col='id') post = pd.read_csv(mo_corrected_file, compression='infer', sep="\t", index_col='id') pre.sort_index(inplace=True) post.sort_index(inplace=True) pre_genes = set(pre.index.tolist()) preD = set(pre[(pre['neg|fdr'] < fdrth) & (pre['pos|fdr'] >= fdrth)].index.tolist()) preE = set(pre[(pre['neg|fdr'] >= fdrth) & (pre['pos|fdr'] < fdrth)].index.tolist()) preNULL = pre_genes.difference(preD.union(preE)) post_genes = set(post.index.tolist()) postD = set(post[(post['neg|fdr'] < fdrth) & (post['pos|fdr'] >= fdrth)].index.tolist()) postE = set(post[(post['neg|fdr'] >= fdrth) & (post['pos|fdr'] < fdrth)].index.tolist()) postNULL = post_genes.difference(postD.union(postE)) aDD = len(preD.intersection(postD)) aDN = len(preD.intersection(postNULL)) aDE = len(preD.intersection(postE)) aND = len(preNULL.intersection(postD)) aNN = len(preNULL.intersection(postNULL)) aNE = len(preNULL.intersection(postE)) aED = len(preE.intersection(postD)) aEN = len(preE.intersection(postNULL)) aEE = len(preE.intersection(postE)) cm = np.matrix([[aDD, aDN, aDE], [aND, aNN, aNE], [aED, aEN, aEE]]) IMPACTEDg = ((np.triu(cm, 1) + np.tril(cm, -1)) / cm.sum()).sum() * 100 IMPACTED_phenGenes = (cm[0, 1] + cm[2, 1] + cm[0, 2] + cm[1, 2]) / (cm[[0, 2]]).sum() * 100 DISTORTEDg = (cm[2, 0] + cm[0, 2]) / cm.sum() * 100 DISTORTED_phenGenes = (cm[2, 0] + cm[0, 2]) / (cm[[0, 2]]).sum() * 100 cm_fitmat = np.divide(cm.T, np.concatenate([cm.sum(1).T] * 3)) * 100 original_counts = cm.sum(1).T.tolist()[0] label_list = ['loss_of_fitness', 'no_phenotype', 'gain_of_fitness'] mod_label = ["{}<br>{}</br>".format(original_counts[i], label_list[i]) for i in range(3)] barplot_list = [] for i in range(3): trace1 = go.Bar( x=mod_label, y=cm_fitmat[i, :].tolist()[0], name=label_list[i] ) barplot_list.append(trace1) layout = go.Layout( title='Impact on phenotype', xaxis=dict( title="Original Counts", ), yaxis=dict(title='% of genes', ), barmode='stack', legend=dict( x=1, y=1, orientation="v" ), annotations=[ dict( x=1.17, y=1.01, align="right", valign="top", text='Corrected Counts', showarrow=False, xref="paper", yref="paper", xanchor="left", yanchor="top" )] ) figure = go.Figure(data=barplot_list, layout=layout) py.plot(figure, filename=saveto + '_barchart.html', auto_open=False, config=PlotData.plotly_conf(cfprm={'theme': 'ggplot'})) pichart_dict = {('Overall impact', 'Rest of the genes'): [IMPACTEDg, 100 - IMPACTEDg], ('Overall distortion', 'Rest of the genes'): [DISTORTEDg, 100 - DISTORTEDg], ('Impact (G/L fitness genes)', 'Rest of the genes'): [IMPACTED_phenGenes, 100 - IMPACTED_phenGenes], ('Distortion (G/L fitness genes)', 'Rest of the genes'): [DISTORTED_phenGenes, 100 - DISTORTED_phenGenes]} pi_colors = ['red', 'blue', 'green', 'orange'] pi_x = [[0, .48], [.52, 1], [0, .48], [.52, 1]] pi_y = [[0, .49], [0, .49], [.51, 1], [.51, 1]] # pie chart pie_list = [] count = 0 for label_tuple in pichart_dict: pie_data = { 'labels': list(label_tuple), 'values': pichart_dict[label_tuple], 'type': 'pie', 'domain': {'x': pi_x[count], 'y': pi_y[count] }, 'marker': {'colors': [pi_colors[count], '#FFFFFF'], 'line': {'color': '#000000', 'width': 0.5}, }, 'hoverinfo': 'label+percent', 'textinfo': 'label' } count += 1 pie_list.append(pie_data) layout = go.Layout( title="Impact of Correction on Genes", ) figure = go.Figure(data=pie_list, layout=layout) py.plot(figure, filename=saveto + '_piechart.html', auto_open=False, config=PlotData.plotly_conf()) return None
def evolution_bars_plot(df): """FIXME! briefly describe function :param df: :returns: :rtype: """ x = df['Date'].values fig = go.Figure() config = {'displayModeBar': True} fig.add_traces(data=[ go.Bar(name='Décès', x=x, y=df['Décès'].values, marker_color='red'), go.Bar(name='Guéris', x=x, y=df['Guéris'].values, marker_color='darkgreen'), go.Bar(name='Cas hospitalisés', x=x, y=df['Cas hospitalisés'], marker_color='gold') ]) fig.add_trace({ 'x': df['Date'], 'y': df['Cas confirmés'], 'name': 'Cas confirmés' }) fig.add_traces(data=[ go.Bar(name='Nouveaux décès', x=x, y=df['Nouveaux décès'].values, marker_color='red', visible=False), go.Bar(name='Nouveaux guéris', x=x, y=df['Nouveaux guéris'].values, marker_color='darkgreen', visible=False), go.Bar(name='Nouveaux cas', x=x, y=df['Nouveaux cas'], marker_color='gold', visible=False) ]) fig.update_yaxes(automargin=True) # Add dropdown fig.update_layout( updatemenus=[ dict(type='dropdown', active=0, buttons=list([ dict( label="Cumul", method="update", args=[{ "visible": [True, True, True, True, False, False, False] }], ), dict( label="Par jour", method="update", args=[{ "visible": [False, False, False, False, True, True, True] }, { "title": 'Suivi journalier du Covid-19 en Guinée', }], ) ]), direction="down", showactive=True, x=0.01, xanchor="center", y=1.02, yanchor='middle', bgcolor='darkred', font=dict(color='gray', size=14)), ], template='plotly_dark', barmode='stack', hovermode='x', xaxis_tickangle=-60, legend_orientation="h", legend=dict(x=0, y=-0.3), margin=dict(t=40, b=0, l=25, r=3), title=dict(text='Évolution du Covid-19 en Guinée : données cumulées', x=0.5, y=0.1, font=dict(size=12.5)), ) return fig
main_list = [] separator = ',' for item in data.cuisines: item_list = item.split(',') new_list = [] for it in item_list: str_content = it.strip() new_list.append(str_content) main_list.append(separator.join(sorted(new_list))) data['new_cuisines'] = main_list data['new_cuisines'][0] # Visualization location_count = data.groupby(['place']).size().reset_index(name="count") location_count = location_count.sort_values('count', ascending=True) location_count.head() inputdata = [go.Bar(x=location_count['place'], y=location_count['count'])] py.iplot(inputdata, filename='basic-bar') plt.show() rate_group_count = data.groupby(['rate']).size().reset_index(name="count") rate_group_count = rate_group_count.sort_values('count', ascending=True) rate_group_count.head() inputdata = [go.Bar(x=rate_group_count['rate'], y=rate_group_count['count'])] py.iplot(inputdata, filename='basic-bar') plt.show() name_group_count = data.groupby(['name']).size().reset_index(name="count") name_group_count = name_group_count.sort_values('count', ascending=False) #name_group_count.head() top20 = name_group_count[0:20] top20 inputdata = [go.Bar(x=top20['name'], y=top20['count'])] py.iplot(inputdata, filename='basic-bar')
import dash_html_components as html import dash_core_components as dcc import plotly.graph_objs as go import numpy as np import pandas as pd data = pd.read_csv("/Users/veronikapeskova/projects/portfolio/plotly_dash/apps_app/apps/data/category.csv") bar_data = [ go.Bar( y=data['Category'].value_counts().sort_values().to_dense().keys(), x=data['Category'].value_counts().sort_values(), orientation='h', text="Number of Apps in Category", )] layout = go.Layout( height=800, title='Number of Apps in each category', hovermode='closest', yaxis=dict(title='Category', gridwidth=2, domain=[0.1, 1]), showlegend=False ) fig = go.Figure(data=bar_data, layout=layout) graph = html.Div( [ dcc.Graph( id='my-graph',
def bar_mul(player_name1: str, player_name2: str, metric1: str, metric2=None): '''Presents either a singular or grouped bar graph for a pair of players. Parameters ---------- player_name1: string The name of the first player being searched for. player_name2: string The name of the second player being searched for. metric1: string The first metric to be visualized. metric2 (optional): string The second metric to be visualized. Returns ------- None ''' f_name1 = player_name1.split()[0] s_name1 = player_name1.replace(f_name1 + " ", "") f_name2 = player_name2.split()[0] s_name2 = player_name2.replace(f_name2 + " ", "") if metric2: players = [labels_dict[metric1], labels_dict[metric2]] fig = go.Figure(data=[ go.Bar(name=player_name1, x=players, y=[ new_sample_data.query( "first_name==@f_name1 and second_name==@s_name1") [metric1].sum(), new_sample_data.query( "first_name==@f_name1 and second_name==@s_name1") [metric2].sum() ]), go.Bar(name=player_name2, x=players, y=[ new_sample_data.query( "first_name==@f_name2 and second_name==@s_name2") [metric1].sum(), new_sample_data.query( "first_name==@f_name2 and second_name==@s_name2") [metric2].sum() ]) ]) fig.update_layout(barmode='group') else: fig = px.bar( x=[player_name1, player_name2], y=[ new_sample_data.query( "first_name==@f_name1 and second_name==@s_name1") [metric1].sum(), new_sample_data.query( "first_name==@f_name2 and second_name==@s_name2") [metric1].sum() ], labels={ 'x': labels_dict[metric1], 'y': 'Value' }, title=f'{player_name1} vs {player_name2}: {labels_dict[metric1]}') fig.show()
cufflinks.go_offline(connected=True) init_notebook_mode(connected=True) plotly.tools.set_credentials_file(username='******', api_key='d1Ip8twrCBnED72ibDBX') data = pd.read_csv('antibiotics_data.csv') posData = data.loc[data["Gram Staining "] == 'positive'] negData = data.loc[data["Gram Staining "] == 'negative'] bacteria = data["Bacteria "] trace1 = go.Bar(x=bacteria, y=negData["Penicilin"], name='Negative Penicilin', text=negData["Penicilin"], textposition='auto', marker=dict(color='rgb(234, 177, 202)', )) trace2 = go.Bar(x=bacteria, y=negData["Streptomycin "], name='Negative Streptomycin', text=negData["Streptomycin "], textposition='auto', marker=dict(color='rgb(243, 208, 208)', )) trace3 = go.Bar(x=bacteria, y=negData["Neomycin"], text=negData["Neomycin"], name='Negative Neomycin', textposition='auto',
t =(join_df["Male FTC headcount"]+join_df["Female FTC headcount"]+ join_df["Male OEC headcount"]+join_df["Female OEC headcount"]) join_df["Male FTC Proportion"]=(join_df["Male FTC headcount"]/t) join_df["Female FTC Proportion"]=(join_df["Female FTC headcount"]/t) join_df["Male OEC Proportion"]=(join_df["Male OEC headcount"]/t) join_df["Female OEC Proportion"]=(join_df["Female OEC headcount"]/t) join_df = join_df.round(decimals=2) join_df = join_df.drop(["Male FTC headcount","Female FTC headcount","Male OEC headcount","Female OEC headcount"],axis=1) yaxis=["Male FTC Proportion","Female FTC Proportion","Male OEC Proportion","Female OEC Proportion"] trace = [go.Bar( x=[join_df["grade"],join_df["year"]], y=join_df[i], name=i ) for i in yaxis] layout=go.Layout(title=dict(text="Headcount for FTC and OEC by grade by proportion", y=0.9, x=0.5, xanchor="center", yanchor="top"), barmode="stack", yaxis=dict(tickformat="%"), legend=dict(title=None, y=1.02, yanchor="bottom", x=1, xanchor="right", orientation="h"), legend_title_text='Click legend to filter data')
def sent_cust(target): my_path = os.path.abspath(os.path.dirname('__file__')) path_in_ngrams = os.path.join(my_path, "../data/cpickle/") path_in_pos = os.path.join(my_path, "../data/pos/") import colorlover as cl from IPython.display import HTML colors_all = cl.scales['11']["qual"]["Set3"] colors_all.extend(cl.scales['11']["qual"]["Paired"]) colors_all.extend(cl.scales['11']["qual"]["Set3"]) import plotly.plotly as py import plotly.graph_objs as go data = [] my_path = os.path.abspath(os.path.dirname('__file__')) path_in_sent = os.path.join(my_path, "data/yelp_sentiment/") dat = pickle.load(open(path_in_sent + "yelp_sent.p", "rb")) dt = {} mata = [] for aes, dars in dat.items(): i = -1 data = [] dars = (dars / len(dars.columns))/4*100 for col in dars.columns: i = i + 1 trace = go.Bar( y=list(dars.index), x=list(dars[col]), name=str(col), orientation='h', legendgroup=str(aes), marker=dict(color=colors_all[i], line=dict(color=colors_all[i], width=1))) data.append(trace) mata.append(trace) layout = go.Layout( barmode='stack', bargap=0.2, title="Service, Food, Preparation and Location Sentiment <br>" "(Please Use Legend To Toggle)", #width=500, height=500, xaxis=dict( showgrid=False, zeroline=False, showline=False, ), yaxis=dict( showgrid=False, zeroline=False, showline=False, ), ) fig = go.Figure(data=data, layout=layout) # py.iplot(fig, filename='marker-h-bar') dt[aes] = fig fig = go.Figure(data=mata, layout=layout) dt["full"] = fig ### For Quarter: my_path = os.path.abspath(os.path.dirname('__file__')) path_in_sent = os.path.join(my_path, "data/yelp_sentiment/") dat = pickle.load(open(path_in_sent + "yelp_sent.p", "rb")) ke = [] i = -1 for aes, dars in dat.items(): i = i + 1 ke.append(aes) if i == 0: va = pd.DataFrame(index=dars.index) va = pd.concat((va, dars), axis=1) ra = va[va.index.str.contains("-TQ")] df_rank = pd.DataFrame() for col in ra.columns: df_rank[col] = ra[col].rank(ascending=0) df_rank = df_rank.astype(int) bjri_tq = va[va.index == (target + "-TQ ")] # .iloc[0,:] # .sort_values() bjri_tq = bjri_tq.T bjri_tq_rank = df_rank[df_rank.index == (target + "-TQ ")] bjri_tq["rank"] = bjri_tq_rank.T bjri_tq = bjri_tq.sort_values((target + "-TQ "), ascending=True) trace0 = go.Bar( x=bjri_tq[(target + "-TQ ")].values, y=bjri_tq.index, orientation='h', text=["Overall Position: " + str(s) for s in list(bjri_tq["rank"].values)], marker=dict( color='rgb(158,202,225)', line=dict( color='rgb(8,48,107)', width=1.5, ) ), opacity=0.6 ) data = [trace0] layout = go.Layout( annotations=[dict( showarrow=False, text =str(int(bjri_tq[(target + "-TQ ")].mean()*100)) +'/100', #xanchor="right", x=1, y=3, xref='x', yref='y', opacity=0.1, font=dict( color="black", size=30 ),)], height=800, xaxis=dict( showgrid=False, zeroline=False, showline=False, ), yaxis=dict( showgrid=False, zeroline=False, showline=False, # tickangle=45, tickfont=dict( size=10), ), margin=go.Margin( l=90, r=0, b=0, t=70, pad=0 ), title=bjri_tq.columns[0] + ' Sentiment Report', ) fig = go.Figure(data=data, layout=layout) dt["quarter"] = fig ba = va[va.index.str.contains("-TQ")].T.sum() ba.index = [s[:-4] for s in list(ba.index)] ba = ba/len(va.columns)*100 trace1 = go.Bar( x=ba.index, y=ba.values, marker=dict( color='Lightgrey', line=dict( color='rgb(8,48,107)', width=1.5, ) ), opacity=1 ) layout = go.Layout( height=150, xaxis=dict( showgrid=False, zeroline=False, showline=False, ), yaxis=dict( showgrid=False, zeroline=False, showline=False,), margin=go.Margin( l=0, r=0, b=23, t=20, pad=0 ), ) data = [trace1] fig_national = go.Figure(data=data, layout=layout) dt["small"] = fig_national return dt
def update_histogram(datePicked, selection): date_picked = dt.strptime(datePicked, "%Y-%m-%d") monthPicked = date_picked.month - 4 dayPicked = date_picked.day - 1 [xVal, yVal, colorVal] = get_selection(monthPicked, dayPicked, selection) layout = go.Layout( bargap=0.01, bargroupgap=0, barmode="group", margin=go.layout.Margin(l=10, r=0, t=0, b=50), showlegend=False, plot_bgcolor="#323130", paper_bgcolor="#323130", dragmode="select", font=dict(color="white"), xaxis=dict( range=[-0.5, 23.5], showgrid=False, nticks=25, fixedrange=True, ticksuffix=":00", ), yaxis=dict( range=[0, max(yVal) + max(yVal) / 4], showticklabels=False, showgrid=False, fixedrange=True, rangemode="nonnegative", zeroline=False, ), annotations=[ dict( x=xi, y=yi, text=str(yi), xanchor="center", yanchor="bottom", showarrow=False, font=dict(color="white"), ) for xi, yi in zip(xVal, yVal) ], ) return go.Figure( data=[ go.Bar(x=xVal, y=yVal, marker=dict(color=colorVal), hoverinfo="x"), go.Scatter( opacity=0, x=xVal, y=yVal / 2, hoverinfo="none", mode="markers", marker=dict(color="rgb(66, 134, 244, 0)", symbol="square", size=40), visible=True, ), ], layout=layout, )
] layout = go.Layout(title='Comparison of amazon ratings of Electronic Items') fig = go.Figure(data=data, layout=layout) pyo.plot(fig, filename='box.html') # doing a barplot of average rating and number of ratings for a particular item item_rating_count = top_items[top_items['Item'] == 'B005CT56F8'] bar_data = item_rating_count.groupby('Month').mean() bar_data['Counts'] = item_rating_count.groupby('Month').size() bar_data = bar_data.reset_index() # ploting bar plot using plotly months = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ] trace1 = go.Bar(x=months, y=bar_data['Rating'], name='Rating', marker=dict(color='#FFD700')) trace2 = go.Bar(x=months, y=bar_data['Counts'], name='Counts', marker=dict(color='#9EA0A1')) data = [trace1, trace2] layout = go.Layout( title='Bar diagram- average rating and no_of_counts per each month -2013') fig = go.Figure(data=data, layout=layout) pyo.plot(fig, filename='bar.html')
import plotly.graph_objs as go import plotly.offline as pyo import pandas as pd df = pd.read_csv('./salaries.csv') df = df.sort_values('Salary', ascending=False) df = df[df['Age'] > 30] data = [go.Bar(x=df['Name'], y=df['Salary'])] layout = go.Layout(title='Salary by Name', xaxis=dict(title='Name'), yaxis=dict(title='Salary')) fig = go.Figure(data=data, layout=layout) pyo.plot(fig)
def plot_InfSites_gens(mrcas, point_up, root_lib, range_theta, Theta=1, mut_rate=9e-8, height=500, width=900): from structure_tools.Coal_tools import tree_descent_gen hap_frame = [[''.join([str(x) for x in z])] for z in mrcas] hap_frame = list(range(len(mrcas))) hap_frame = pd.DataFrame(hap_frame, columns=['hap_id']) hap_frame["hap"] = [''.join([str(x) for x in z]) for z in mrcas] Ncols = 1 titles = [''.join([str(x) for x in y]) for y in mrcas] fig = [] vals = [] for gp in range(len(titles)): title = titles[gp] sink, starters = get_sinks(mrcas[gp], root_lib, point_up) t1 = time.time() if len(starters): #node_weigths, paths_reverse = tree_descent(root_lib,point_up,sink,init= starters,Theta= thet) #probe_rec= node_weigths[0][0] node_weigths, paths_reverse, node_bins, paths_vector = tree_descent_gen( root_lib, point_up, sink, Theta=Theta, mu=mut_rate) paths_vector = paths_reverse[0][0] average_gen = np.mean(paths_vector) vals.append(average_gen) sort_vals = np.argsort(vals) vals = [vals[x] for x in sort_vals] titles = [titles[x] for x in sort_vals] fig = [go.Bar(x=['hap: {}'.format(x) for x in range(len(titles))], y=vals)] layout = go.Layout(title='gens until first hap appearence', barmode='group', xaxis=dict(title='hap'), yaxis=dict(title='Gen')) Figure = go.Figure(data=fig, layout=layout) hap_frame['t'] = [round(c, 3) for c in vals] hap_frame = hap_frame.sort_values('t') return hap_frame, Figure
print("Graph of OVH") total_epsilon_per_choice = 0 total_ip_weight = 0 graph_pairs = {} num_weights = 0 for pair in ovh_data: ip = pair[0] weights = pair[1] total_weight = 0 graph_pairs[ip] = 0 for weight in weights: total_weight += weight graph_pairs[ip] += ip_epsilons[ip] * weight #print(graph_pairs[ip]) # Still need the particular probability that this as picks this guard total_ip_weight += weight total_epsilon_per_choice += (ip_epsilons[ip] * weight) if total_weight > 0: graph_pairs[ip] = graph_pairs[ip] / total_weight total_epsilon_per_choice = total_epsilon_per_choice / total_ip_weight ## Graph of epsilon values versus Ip Addresses for Given AS graph_ips = graph_pairs.keys() graph_epsilons = [] graph_ips = sorted(graph_ips) for key in graph_ips: graph_epsilons.append(graph_pairs[key]) graph_data = [go.Bar(x=graph_ips, y=graph_epsilons)] py.plot(graph_data, filename='basic-bar.html')
fig.append_trace(trace0, 1, 1) fig.append_trace(trace1, 1, 1) fig.append_trace(trace2, 1, 1) fig.append_trace(trace3, 1, 1) fig.append_trace(trace4, 1, 1) number = [age0[-1], age1[-1], age2[-1], age3[-1]] percent = [round(100.0 * (float(x) / float(total[-1])), 1) for x in number] age_bracket = ['18-24 yrs', '25-44 yrs', '45-65 yrs', '65+ yrs'] trace5 = go.Bar( x=age_bracket, y=percent, marker=dict( color='rgba(50, 171, 96, 0.6)', line=dict(color='rgba(50, 171, 96, 1.0)', width=1), ), name='Percent distribution by age, July 2017', ) layout = dict( title='Total number of homeless adults', yaxis1=dict(showgrid=False, showline=False, showticklabels=True, domain=[0, 0.85], range=[0, 5200], title='Number of homeless adults'), yaxis2=dict(showgrid=False, showline=True,