def test_add_traces(self): # Add two traces self.figure.add_traces([ go.Sankey(arrangement="snap"), go.Histogram2dContour(line={"color": "cyan"}), ]) # Check access properties self.assertEqual(self.figure.data[-2].type, "sankey") self.assertEqual(self.figure.data[-2].arrangement, "snap") self.assertEqual(self.figure.data[-1].type, "histogram2dcontour") self.assertEqual(self.figure.data[-1].line.color, "cyan") # Check message new_uid1 = self.figure.data[-2].uid new_uid2 = self.figure.data[-1].uid self.figure._send_addTraces_msg.assert_called_once_with([ { "type": "sankey", "arrangement": "snap" }, { "type": "histogram2dcontour", "line": { "color": "cyan" } }, ])
def update_sankey_fig(month_index): #calculate output based on month [sank_source, sank_target, sank_value] = sankeyMap(month_index) #return figure return { 'data': [ go.Sankey( node={ 'label': sankey_labels, }, link={ "source": sank_source, # indices correspond to labels, eg A1, A2, A2, B1, ... "target": sank_target, "value": sank_value }) ], 'layout': { 'title': '10 most common crimes, with district and outcome, month of {}'. format(month_list[month_index]), 'plot_bgcolor': colors['background'], 'paper_bgcolor': colors['background'], 'font': { 'color': colors['text'], 'size': 14 } } }
def generate_figure_sankey(year): scope = gen_sankey_data(year) return { 'data': [ go.Sankey( opacity=0.5, domain={ 'x': [0, 1], 'y': [0, 1] }, orientation='h', textfont={'size': 10}, arrangement='freeform', node=dict(pad=12, thickness=10, line=dict(width=0), label=scope['node'], color=node_color), link=dict(source=scope['source'], target=scope['target'], value=scope['value'], color=scope['color']), ) ], 'layout': { 'title': '{} 정부 세입/세출 예산편성현황(억원)'.format(year), 'height': 2048 } }
def set_sankey_lists(): line_dict = dict(color="green", width=0.5) label_list = [] source_list = [] target_list = [] value_list = [] for input_file in files_dict_in: for input_jobs in files_dict_in[input_file]: label_list.append("{0}({1})".format( input_jobs, jobs_dict[input_jobs]["program"])) try: for output_jobs in files_dict_out[input_file]: source_list.append(jobs_dict[input_jobs]["idx"]) target_list.append(jobs_dict[output_jobs]["idx"]) value_list.append(1) except: pass node_dict = dict(pad=15, thickness=20, line=line_dict, label=label_list, color="blue") link_dict = dict(source=source_list, target=target_list, value=value_list) fig = go.Figure(data=[go.Sankey(node=node_dict, link=link_dict)]) xaxis_dict = dict(rangeslider=dict(visible=True), type="linear") fig.update_layout(xaxis=xaxis_dict, title_text="Complex Sankey Diagram", font_size=10) plot(fig, auto_open=True)
def dibujar_sankey(primer_ageb, segundo_ageb): # Leer archivo para datos de sankey df_sankey = pd.read_csv( './data/production_data/viajes_ecobici_entre_estaciones.csv') # Filtrar dataframe df_sankey = df_sankey[(df_sankey['CVE_AGEB_retiro'] == primer_ageb) & (df_sankey['CVE_AGEB_arribo'] == segundo_ageb)] # Estructura de sankey estaciones_retiro = df_sankey['nombre_estacion_retiro'].unique().tolist() estaciones_arribo = df_sankey['nombre_estacion_arribo'].unique().tolist() label = estaciones_retiro + estaciones_arribo indices_retiro = list(range(len(estaciones_retiro))) indices_arribo = list(range(len(estaciones_retiro), len(label))) source = indices_retiro * len(indices_arribo) target = duplicate_target(indices_arribo, len(indices_retiro)) value = df_sankey['Numero_de_viajes'].tolist() # Dibujar sankey data_sankey = go.Sankey(node={ 'pad': 15, 'thickness': 10, 'line': { 'color': 'black', 'width': 0.5 }, 'label': label, 'color': '#00b386' }, link={ 'source': source, 'target': target, 'value': value }, textfont={'family': 'Raleway'}) layout_sankey = go.Layout( title_text='Viajes entre estaciones (retiro a arribo)', font={'family': 'Raleway'}, margin={ 'l': 0, 'r': 5, 't': 25, 'b': 5 }, plot_bgcolor='#f9f9f9') figure_sankey = go.Figure(data=data_sankey, layout=layout_sankey) return (figure_sankey)
def create_figure_sankey(df_flow): # create dataframe for step total to migration background zero_step = df_flow.groupby(['migr'], as_index=False).sum() \ .rename(columns={'migr': 'target'}) \ .sort_values(['target'], ascending=[True]) zero_step['source'] = 'total' # create dataframe for step migration background to region first_step = df_flow.groupby(['migr', 'region'], as_index=False).sum() \ .rename(columns={'migr': 'source', 'region': 'target'}) \ .sort_values(['source', 'target'], ascending=[True, True]) # create dataframe for step region to country second_step = df_flow.groupby(['region', 'country'], as_index=False).sum() \ .rename(columns={'region': 'source', 'country': 'target'}) \ .sort_values(['source', 'target'], ascending=[True, True]) # concatenate three dataframes source_target = pd.concat([zero_step, first_step, second_step], axis=0, ignore_index=True) # convert all origins to numbers/indices source_target_cat = source_target.copy() source_target_cat['source'] = source_target_cat['source'].map( migr_to_index) source_target_cat['target'] = source_target_cat['target'].map( migr_to_index) # parameters for figure labels = [features_long[k] for k in migr_to_index.keys()] source = source_target_cat['source'].tolist() target = source_target_cat['target'].tolist() counts = source_target_cat['count'].tolist() # create figure line_sankey = go.sankey.node.Line(color='rgb(76,153,160)', width=0.5) node_sankey = go.sankey.Node(pad=15, thickness=20, line=line_sankey, label=labels, color='rgb(153,216,201, 0.5)') link_hovertemplate = 'Origin: %{source.label}<br>Destination %{target.label}' link_sankey = go.sankey.Link(source=source, target=target, value=counts, label=labels, hovertemplate=link_hovertemplate, color='rgb(204,236,230, 0.5)') fig_sankey = go.Figure( data=[go.Sankey(node=node_sankey, link=link_sankey)]) fig_sankey.update_layout(**fig_layout_defaults, margin=dict(t=0, l=30, r=30, b=10)) return fig_sankey
def generate_funding_chart_sankey(funding_data): data = json.loads(funding_data)['country_funding'] funding_total = data['total_funded'] if funding_total == 0: return '' i = 1 sources = [] targets = [] values = [] colors = ["black"] labels = ['Funding'] link_labels = [] for funding_source in sorted(data['funding_source'], key=lambda x: x['totalFunding'], reverse=True)[:10]: sources.append(i) targets.append(0) values.append(funding_source['totalFunding']) labels.append(funding_source['name']) link_labels.append('{:02.2f}%'.format(funding_source['totalFunding'] / funding_total * 100)) colors.append(rand_color.generate(hue='orange')[0]) i = i + 1 for funding_dest in sorted(data['funding_destination'], key=lambda x: x['totalFunding'], reverse=True)[:10]: sources.append(0) targets.append(i) values.append(funding_dest['totalFunding']) labels.append(funding_dest['name']) link_labels.append('{:02.2f}%'.format(funding_dest['totalFunding'] / funding_total * 100)) colors.append(rand_color.generate(hue='blue')[0]) i = i + 1 trace1 = go.Sankey(type='sankey', node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels, color=colors), link=dict(source=sources, target=targets, value=values, label=link_labels), name='Stunting') return dcc.Graph( id='chart-sankey', figure={ 'data': [trace1], 'layout': go.Layout( title='Funding source and destination for Nutrition(10 largest)' ) })
def filter_sankey_graph(th, score, node): if node: node = node["id"] _G_s = nx.MultiDiGraph() for u, v, n, d in G_s.edges(data=True, keys=True): if d[score] > th: _G_s.add_edge(u, v, n, **d) _G_s.add_nodes_from(G_s.nodes(data=True)) edges = nx.to_pandas_edgelist(_G_s) if len(edges) < 1: fig = dict() return fig # add same color scheme as network graph for node_s in _G_s.nodes(): if " Post" in node_s: original_node = str(node_s).split(sep=" Post")[1] elif "Pre " in node_s: original_node = str(node_s).split(sep="Pre ")[1] else: original_node = str(node_s) new_color = color_map_nodes[original_node.strip()] G_s.nodes[node_s]["color"] = new_color nodes = G_s.nodes() node_map = {cluster: id for id, cluster in enumerate(list(nodes))} sankey = go.Sankey(node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=list(nodes), color=[ f'rgb{tuple(d["color"][0:3])}' for n, d in G_s.nodes(data=True) ]), link=dict( source=list(edges["source"].map(node_map)), target=list(edges["target"].map(node_map)), value=list(edges[score]), label=edges["interaction"])) data = [sankey] layout = go.Layout(autosize=True, title=f"Interactions: {node}", font=dict(size=font_size)) fig = go.Figure(data=data, layout=layout) return fig
def setUp(self): # Construct initial scatter object self.figure = go.Figure(data=[ go.Scatter(y=[3, 2, 1], marker={"color": "green"}), go.Bar(y=[3, 2, 1, 0, -1], marker={"opacity": 0.5}), go.Sankey(arrangement="snap"), ]) # Mock out the message methods self.figure._send_moveTraces_msg = MagicMock() self.figure._send_deleteTraces_msg = MagicMock()
def test_add_trace(self): # Add a trace self.figure.add_trace(go.Sankey(arrangement="snap")) # Check access properties self.assertEqual(self.figure.data[-1].type, "sankey") self.assertEqual(self.figure.data[-1].arrangement, "snap") # Check message self.figure._send_addTraces_msg.assert_called_once_with( [{"type": "sankey", "arrangement": "snap"}] )
def test_add_trace(self): # Add a trace self.figure.add_trace(go.Sankey(arrangement='snap')) # Check access properties self.assertEqual(self.figure.data[-1].type, 'sankey') self.assertEqual(self.figure.data[-1].arrangement, 'snap') # Check message new_uid = self.figure.data[-1].uid self.figure._send_addTraces_msg.assert_called_once_with( [{'type': 'sankey', 'arrangement': 'snap', 'uid': new_uid}])
def prepare_figure(df): source = [] target = [] value = [] label = [] color = [] keys = ['global'] + list(df.columns[:-1]) df['global'] = df['shifted'].apply(lambda val: 'in' if val >= 0 else 'out') if 'office' in df.columns: df['office'] = df['office'].apply( lambda s: qry.get_office_name(s, offices)) for i in range(0, len(keys) - 1): subset = df.groupby(keys[0:i + 2])['shifted'].sum().reset_index() subset = subset[subset['shifted'] != 0] for row in subset.iterrows(): record = row[1].to_dict() source_label = '-'.join( [record[key] for key in list(record.keys())[1:-2]]) target_label = '-'.join( [record[key] for key in list(record.keys())[1:-1]]) if record['shifted'] < 0: source_label, target_label = target_label, source_label color.append('rgba(255, 0, 0, 0.4)') else: color.append('rgba(0, 255, 0, 0.4)') """ if source_label == '': source_label = 'A' if target_label == '': target_label = 'B' """ if not source_label in label: label.append(source_label) source_index = label.index(source_label) if not target_label in label: label.append(target_label) target_index = label.index(target_label) source.append(source_index) target.append(target_index) value.append(abs(record['shifted'])) fig = { 'data': [ go.Sankey(link=dict(source=source, target=target, value=value, color=color), node=dict(label=label, color='blue')), ], 'layout': go.Layout(font_size=6, ) } return fig
def create_figure_sankey(df_outflow_top, df_outflow_rest, df_outflow_borough, pickup_zone): pickup_borough = zone_index_to_borough_index[pickup_zone] df_outflow_top['dropoff_borough'] = df_outflow_top.dropoff_zone.map( zone_index_to_borough_index) label_offset_zones = len(list(bmapper.values())) labels = list(bmapper.values()) + list(zmapper.values()) + list( [f'Other: {k}' for k in bmapper.values()]) # overwrite the borough label with one that includes the zone name start_name = f'{borough_index_to_name[pickup_borough]} - {zone_index_to_name[pickup_zone]}' labels[pickup_borough] = start_name source = df_outflow_borough.pickup_borough.astype( 'int').tolist() + df_outflow_top.dropoff_borough.astype('int').tolist( ) + df_outflow_rest.dropoff_borough.astype('int').tolist() zone_indices = (df_outflow_top.dropoff_zone.astype('int') + label_offset_zones).tolist() zone_indices_other = (df_outflow_rest.dropoff_zone.astype('int') + label_offset_zones).tolist() target = df_outflow_borough.dropoff_borough.astype( 'int').tolist() + zone_indices + zone_indices_other link_labels = [labels[t] for t in target] counts = np.array(df_outflow_borough['count_trips'].tolist() + df_outflow_top['count_trips'].tolist() + df_outflow_rest['count_trips'].tolist()) line_sankey = go.sankey.node.Line(color='black', width=0.5) node_sankey = go.sankey.Node(pad=15, thickness=20, line=line_sankey, label=labels, color='blue') link_hovertemplate = 'Origin: %{source.label}<br>Destination %{target.label}' link_sankey = go.sankey.Link(source=source, target=target, value=counts, label=link_labels, hovertemplate=link_hovertemplate) fig_sankey = go.Figure( data=[go.Sankey(node=node_sankey, link=link_sankey)]) title_text = f"Outflow of taxis from {zone_index_to_name[pickup_zone]} to other Boroughs, and top {n_largest} zones" fig_sankey.update_layout(title_text=title_text, font_size=10, **fig_layout_defaults) return fig_sankey
def generate_sankey_chart(): data = fts_api.get_wfp_funding() funding_total = data['total_funded'] i = 1 sources = [] targets = [] values = [] colors = ["black"] labels = ['WFP'] link_labels = [] for funding_source in sorted(data['funding_source'], key=lambda x: x['totalFunding'], reverse=True)[:15]: sources.append(i) targets.append(0) values.append(funding_source['totalFunding']) labels.append(funding_source['name']) link_labels.append('{:02.2f}%'.format(funding_source['totalFunding'] / funding_total * 100)) colors.append(rand_color.generate(hue='orange')[0]) i = i + 1 for funding_dest in sorted(data['funding_destination'], key=lambda x: x['totalFunding'], reverse=True)[:15]: sources.append(0) targets.append(i) values.append(funding_dest['totalFunding']) labels.append(funding_dest['name']) link_labels.append('{:02.2f}%'.format(funding_dest['totalFunding'] / funding_total * 100)) colors.append(rand_color.generate(hue='blue')[0]) i = i + 1 trace = go.Sankey(type='sankey', node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels, color=colors), link=dict(source=sources, target=targets, value=values, label=link_labels), name='Stunting') return trace
def make_chart(chart_data): fig = go.Figure(data=[ go.Sankey(arrangement='perpendicular', node=dict(pad=40, thickness=50, line=dict(width=0.5), label=chart_data['label'], color=chart_data['color']), link=dict(source=chart_data['source'], target=chart_data['target'], value=chart_data['value'], color=chart_data['link_color'])) ]) fig.update_layout( title_text= f'Alunos que concluiram o mestrado e onde se matricularam no doutorado ({", ".join(ANOS)})', font_size=10, height=1500) fig.show()
def sankey_update(month, year): values = [] df["Месяц"] = df["Месяц"].replace(months_dic) df_selected = df[(df["Год"] == year) & (df["Месяц"] == month)] for river in rivers_list: try: values.append(df_selected[df_selected["Наименование водного объекта"]==river].iloc[0, 2:13].to_list()) except: values.append(np.full(11, 0)) fig = go.Figure( data = [go.Sankey( node = dict( pad = 30, thickness = 2, line = dict(color = "black", width = 0.2), label = rivers_list, customdata = rivers_list, hovertemplate="%{customdata} имеет общее кол-во превышении %{value}<extra></extra>", color = "#049CE0" ), link = dict( source = source, target = target, value = np.array(values).flatten(), label = chemicals*14, color = colours*14, ) )] ) fig.update_layout( title_text="Диаграмма распространения химических вещевств", font={ "size": 10, "color": "white" } ) return fig
def build_sankey(df, degree): '''Function to plot a Sankey diagram (flow diagram) of college major to career, given user's degree and a dataframe listing common career paths for each degree.''' # All nodes of the graph should be in a single list (all_nodes), with two additional lists containing the source and target flows for each node data = df.where(df['education_groups'] == degree).dropna() all_nodes = data['identifier'].values.tolist( ) + data['top_jobs'].values.tolist() source_indices = [ all_nodes.index(identifier) for identifier in data['identifier'] ] target_indices = [all_nodes.index(top_job) for top_job in data['top_jobs']] # Get unique colours from given palette in a list of strings as required by the plotly package cmap = get_cmap_string(palette='plasma_r', domain=all_nodes) # Plot plotly figure using graphing objects package fig = go.Figure(data=[ go.Sankey( node=dict(pad=15, thickness=20, line=dict(color="black", width=0), label=all_nodes, color=['rgb' + str(s).strip('[]') for s in cmap]), link=dict( source=source_indices, target=target_indices, value=data['job_percent'] * 100, ), ) ]) # Resize figure for display on the webpage fig.update_layout(autosize=False, width=800, height=800) # Return figure as html to be displayed on webpage div = fig.to_html() return div
def test_add_traces(self): # Add two traces self.figure.add_traces([go.Sankey(arrangement='snap'), go.Histogram2dContour( line={'color': 'cyan'})]) # Check access properties self.assertEqual(self.figure.data[-2].type, 'sankey') self.assertEqual(self.figure.data[-2].arrangement, 'snap') self.assertEqual(self.figure.data[-1].type, 'histogram2dcontour') self.assertEqual(self.figure.data[-1].line.color, 'cyan') # Check message new_uid1 = self.figure.data[-2].uid new_uid2 = self.figure.data[-1].uid self.figure._send_addTraces_msg.assert_called_once_with( [{'type': 'sankey', 'arrangement': 'snap', 'uid': new_uid1}, {'type': 'histogram2dcontour', 'line': {'color': 'cyan'}, 'uid': new_uid2}])
def update_graph(selected_province): df_province = migration_df[migration_df['Source'] == selected_province] return { 'data': [ go.Sankey(orientation="h", node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=provinces, color=node_colors), link=dict(source=df_province['Source'].dropna(axis=0, how='any'), target=df_province['Target'].dropna(axis=0, how='any'), value=df_province['Value'].dropna(axis=0, how='any'))) ], 'layout': go.Layout( title='Interprovincial Migration', height=700, # px ) }
def make_plot(page): list_graphJSON = [] # Price Distribution if page == 1: data = [go.Histogram(x=df_dict['listings']['price'])] layout = go.Layout( title_text='Price Distributions', title_x=0.5, xaxis_title_text='Price', yaxis_title_text='Count') result = {'data': data, 'layout': layout} graphJSON = json.dumps(result, cls=plotly.utils.PlotlyJSONEncoder) list_graphJSON.append(graphJSON) if page == 2: sankey_data = (df_dict['listings'] .groupby(['neighbourhood_group_cleansed', 'neighbourhood_cleansed']) .agg(Count=('id', 'count'))).reset_index() neigh_group_map = {x:idx for idx, x in enumerate(sankey_data.neighbourhood_group_cleansed.unique())} colors = ["#ffbe0b","#fb5607","#8338ec","#ff006e","#3a86ff"] sankey_data['neigh_group'] = sankey_data.neighbourhood_group_cleansed.map(neigh_group_map) sankey_data['neigh'] = [x+5 for x in sankey_data.index.tolist()] sankey_data['neigh_colors'] = [colors[x] for x in sankey_data.neigh_group] data = go.Sankey( node = dict(pad=15, thickness=20, line=dict(color = "black", width = 0.5), label=(sankey_data.neighbourhood_group_cleansed.unique().tolist() + sankey_data.neighbourhood_cleansed.unique().tolist()), customdata=(sankey_data.neighbourhood_group_cleansed.unique().tolist() + sankey_data.neighbourhood_cleansed.unique().tolist()), color=["#ffbe0b","#fb5607","#8338ec","#ff006e","#3a86ff"] + sankey_data.neigh_colors.tolist(), hovertemplate='%{customdata} has total listing of %{value}<extra></extra>', ), link = dict(source=sankey_data.neigh_group.tolist(), target=sankey_data.neigh.tolist(), value=sankey_data.Count.tolist(), customdata=(sankey_data.neighbourhood_group_cleansed.unique().tolist() + sankey_data.neighbourhood_cleansed.unique().tolist()), hovertemplate=('Neighbourhood Group: <b>%{source.customdata}</b><br />' + 'Neighbourhood Name: <b>%{target.customdata}</b><br />') )) fig = go.Figure(data=[data]) fig.update_layout(font_size=10, height=1000, title_text='Number of Listings in Each Neighbourhood', title_x=0.5) list_graphJSON.append(fig.to_json()) if page == 3: sankey_data = (df_dict['listings'] .groupby(['neighbourhood_group_cleansed', 'neighbourhood_cleansed']) .agg(Count=('id', 'count'))).reset_index() neigh_group_map = {x:idx for idx, x in enumerate(sankey_data.neighbourhood_group_cleansed.unique())} colors = ["#ffbe0b","#fb5607","#ff006e","#8338ec","#3a86ff"] box_data = df_dict['listings'] neigh_group_cm = {x:y for x,y in zip(neigh_group_map, colors)} fig = go.Figure() for idx, neigh_group in enumerate(box_data.neighbourhood_group_cleansed.unique()): fig.add_trace( go.Box(x=box_data[box_data.neighbourhood_group_cleansed == neigh_group]['price'], name=neigh_group, marker_color=neigh_group_cm[neigh_group]) ) fig.update_layout( title_text='Price Boxplot For Each Neighbourhood Group', title_x=0.5, xaxis_title_text='Price', xaxis_zeroline=False) list_graphJSON.append(fig.to_json()) by_neigh = df_dict['listings'].groupby('neighbourhood_group_cleansed') fig = go.Figure() for neigh in ['West Region', 'East Region', 'Central Region', 'North-East Region', 'North Region'][::-1]: fig.add_trace(go.Violin(x=by_neigh.get_group(neigh)['price'], line_color=neigh_group_cm[neigh], name=neigh, showlegend=False)) fig.update_traces(orientation='h', side='positive', width=3, points=False) fig.update_layout( xaxis_showgrid=False, xaxis_zeroline=False, title_text='Price Distribution For Each Neighbourhood Group', title_x=0.5, xaxis_title_text='Price') list_graphJSON.append(fig.to_json()) if page == 4: by_room = df_dict['listings'].groupby('room_type') by_room_count = by_room.agg(Count=('id', 'count')).reset_index().sort_values('Count', ascending=False) data = go.Bar(x=by_room_count['room_type'], y=by_room_count['Count'], text=by_room_count['Count'], textposition='auto', marker_color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']) layout = go.Layout( title_text='Countplot For Room Type', title_x=0.5) fig = go.Figure(data=data, layout=layout) list_graphJSON.append(fig.to_json()) fig = go.Figure() for room in ['Entire home/apt', 'Private room', 'Hotel room', 'Shared room'][::-1]: fig.add_trace(go.Violin(x=by_room.get_group(room)['price'], name=room, showlegend=False)) fig.update_traces(orientation='h', side='positive', width=3, points=False) fig.update_layout( xaxis_showgrid=False, xaxis_zeroline=False, title_text='Price Distribution For Each Room Type', title_x=0.5, xaxis_title_text='Price') list_graphJSON.append(fig.to_json()) return list_graphJSON
def createSankeyChart(nbd): """This function creates a figure (chart) that is a Sankey Chart for the neighborhood that is input Args: nbd ([type]): [Neighborhood Group] Returns a Sankey figure object """ nbd_col = ("neighbourhood_group_cleansed" if nbd == "All" else "neighbourhood_cleansed") if nbd != "All": three_proptype_df = rental_df[rental_df["neighbourhood_group_cleansed"] == nbd].copy() else: three_proptype_df = rental_df.copy() three_proptype_df = three_proptype_df[[nbd_col, "property_type_class"]].copy() # Limit types of property to House, Private Room and Shared Room three_proptype_df = three_proptype_df[ three_proptype_df["property_type_class"].str.contains( "House|Private Room|Shared Room|Condominium|Seviced apartment|Apartment|Townhouse" )] # we are not interested in Houseboats in the Sankey chart three_proptype_df = three_proptype_df[ ~three_proptype_df["property_type_class"].str.contains("Houseboat")] label_list = three_proptype_df[nbd_col].unique().tolist() label_list.sort() label_list += three_proptype_df["property_type_class"].unique().tolist() # Create a new column count_listings with number of listings per neighborhood/neighborhood group and propert type sankey_df = three_proptype_df.groupby([ nbd_col, "property_type_class" ]).agg(count_listings=("property_type_class", "count")) sankey_df = sankey_df.reset_index() # Create an dictionary of the indices of the nodes we are going to link (the sankey cchart links are created between these indices) label_idx_dict = {} for idx, label in enumerate(label_list): label_idx_dict[label] = idx # Use the dictionary to map the nodes to the indizes in the dataframe sankey_df["nbd_idx"] = sankey_df[nbd_col].map(label_idx_dict) sankey_df["prop_idx"] = sankey_df["property_type_class"].map( label_idx_dict) color_list = full_color_list[:len(three_proptype_df[nbd_col].unique(). tolist())] group_color = dict( zip( list(sankey_df.groupby(nbd_col).groups.keys()), color_list, )) sankey_df["color_link"] = sankey_df[nbd_col].map(group_color) source = sankey_df["nbd_idx"].tolist() target = sankey_df["prop_idx"].tolist() values = sankey_df["count_listings"].tolist() # There are as many colors as nodes = 17 + 3 if nbd == "All": color_node = full_color_list + ["#befdb7", "#1B03A3", "#FEFCD7"] else: color_node = full_color_list[:len(label_list)] # For every neighborhood we use the same color for the link color_link = sankey_df["color_link"].tolist() fig = go.Figure(data=[ go.Sankey( node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=label_list, color=color_node, customdata=label_list, hovertemplate= "%{customdata} has %{value} listings<extra></extra>", ), link=dict( source=source, target=target, value=values, color=color_link, hovertemplate="Link from %{source.customdata}<br />" + "to %{target.customdata}<br />has %{value} listings<extra></extra>", ), ) ]) fig.update_layout( title_text="Available houses and rooms", font_size=12, title_font_color=dashboard_colors["medium-blue-grey"], font=dict(size=12, color=dashboard_colors["medium-blue-grey"]), ) # Set the theme fig.layout.template = "custom_dark" return fig
id='sankey', #style={'width': 900,'height': 600}, figure={ 'data': [ go.Sankey( #valueformat = '.0', #valuesuffix = 'TWh', node = dict( pad = 15, thickness = 15, line = dict(color = 'black', width = 0.5), label = validationSanKey2['Label'].drop_duplicates().tolist(), #label = validationSanKey2['Label'], #label = ["A1", "A2", "B1", "B2", "C1", "C2"], color = 'blue' #color = validationSanKey2['Color'] ), link = dict( #source = validationSanKey2['Source'].drop_duplicates().tolist(), #target = validationSanKey2['Target'].drop_duplicates().tolist(), source = validationSanKey2['Source'], target = validationSanKey2['Target'], value = validationSanKey2['Value'], label = validationSanKey2['Value'] #color = validationSanKey2['Color'] ) ), ], 'layout': go.Layout( #legend={'x': 0.30, 'y': 1}, #rangemode = "tozero",
dfsankey = dfsankey2.append(dfsankey3) # Agregar source y target del sankey all_nodes = dfsankey.a.values.tolist() + dfsankey.b.values.tolist() source_indices = [all_nodes.index(a) for a in dfsankey.a] target_indices = [all_nodes.index(b) for b in dfsankey.b] # graficar sankey fig = go.Figure( data=[ go.Sankey( node=dict( pad=20, thickness=20, line=dict(color="black", width=1.0), label=all_nodes, ), link=dict( source=source_indices, target=target_indices, value=dfsankey.Quantity, ), ) ] ) fig.update_layout( title_text="Tecnologias y Planes en Telefonica", font=dict(size=10, color="white"), plot_bgcolor="red", paper_bgcolor="#343332", )
def get_sankey_diagram(self, hidde, zoom_out, slider_values, highlight_seq, click_data, checklist, threshold, consensustable_data, consensustree_data): if not self.sequences: raise PreventUpdate() # RANGE START / END range_start = min(self.column_dict[slider_values[0]]) range_end = max(self.column_dict[slider_values[1]] ) if slider_values[1] in self.column_dict else len( self.diagram) label = [] source = [] target = [] value = [] link_color = [] colors = dict(A="#FF9AA2", C="#B5EAD7", G="#C7CEEA", T="#FFDAC1") # FILTER SEQUENCES (AFFINITY TREE) if click_data: tree_node_id = click_data['points'][0]['pointIndex'] full_consensustable = pd.read_json(consensustable_data) consensustree_data = json.loads(consensustree_data) tree = consensustree.dict_to_tree(consensustree_data) node_details_df = consensustable.get_consensus_details_df( tree_node_id, full_consensustable, tree) filtered_sequences = node_details_df["SEQID"].tolist() diagram_filtered = self.construct_diagram(sequences_values=[ self.sequences[seq] for seq in filtered_sequences ]) for i in range(len(self.column_dict)): if i not in diagram_filtered: diagram_filtered[i] = dict( base="", sources={}, targets={}, ) else: tree_node_id = None filtered_sequences = list(self.sequences.keys()) diagram_filtered = copy.deepcopy(self.diagram) if zoom_out: # EXTREAME ZOOM-OUT checklist = [1, 2] threshold = len(self.sequences) * 0.2 range_start = 0 range_end = len(self.column_dict) - 1 if 3 in checklist: sequences_values = [ self.sequences[seq] for seq in filtered_sequences ] new_sequences_values = [ self._remove_snp(self.consensus_sequence, sequence) for sequence in sequences_values ] diagram_filtered = self.construct_diagram( sequences_values=new_sequences_values) for i in range(len(self.diagram)): if i not in diagram_filtered: diagram_filtered[i] = dict( base="", sources={}, targets={}, ) if 2 in checklist and threshold > 0: # WEAK CONNECTIONS weak_nodes = list() for node_id in sorted( diagram_filtered.keys())[range_start:range_end + 1]: node = diagram_filtered[node_id] if node["sources"] and (all([ x in weak_nodes for x in node["sources"].keys() ]) or all([x <= threshold for x in node["sources"].values()])): weak_nodes.append(node_id) diagram_filtered[node_id]["sources"] = {} diagram_filtered[node_id]["targets"] = {} else: diagram_filtered[node_id]["sources"] = { key: value for key, value in node["sources"].items() if value > threshold } diagram_filtered[node_id]["targets"] = { key: value for key, value in node["targets"].items() if value > threshold } for node_id in sorted(diagram_filtered.keys())[range_end - 1:range_start:-1]: node = diagram_filtered[node_id] if node["targets"] and all( [x in weak_nodes for x in node["targets"].keys()]): weak_nodes.append(node_id) diagram_filtered[node_id]["sources"] = {} diagram_filtered[node_id]["targets"] = {} else: diagram_filtered[node_id]["sources"] = { key: value for key, value in node["sources"].items() if key not in weak_nodes } diagram_filtered[node_id]["targets"] = { key: value for key, value in node["targets"].items() if key not in weak_nodes } if 1 in checklist: # CONCAT VERTICLES diagram_filtered, diagram_reorganization = self._bound_vertices( diagram_filtered, range_start, range_end) else: diagram_reorganization = dict() if highlight_seq and highlight_seq in filtered_sequences: # HIGHLIGHT SEQUENCE highlight_seq_nodes = [ node_id for node_id in self.sequences[highlight_seq] if node_id not in diagram_reorganization.keys() ] else: highlight_seq_nodes = [] for node_id in sorted(diagram_filtered.keys())[range_start:range_end + 1]: label.append(diagram_filtered[node_id]["base"]) for t in diagram_filtered[node_id]["targets"]: if t <= range_end: source.append(node_id - range_start) target.append(t - range_start) value.append(diagram_filtered[node_id]["targets"][t]) link_color.append("#D3D3D3") # HIGHLIGHT SEQUENCE if highlight_seq_nodes and node_id in highlight_seq_nodes: s_id = highlight_seq_nodes.index(node_id) if highlight_seq_nodes[s_id + 1] == t: value[-1] -= 1 source.append(node_id - range_start) target.append(t - range_start) value.append(1) link_color.append("#342424") colors = dict(A="#FF9AA2", C="#B5EAD7", G="#C7CEEA", T="#FFDAC1") fig = go.Figure( data=go.Sankey( arrangement="snap", node=dict(label=[ l if len(l) < 5 else f"{l[0]}...{l[-1]}({len(l)})" for l in label ], pad=10, color=[ colors[l] if l in colors else "gray" for l in label ]), link=dict(source=source, target=target, value=value, color=link_color)), layout=dict( # height=300, # width=1600 )) return fig, str(tree_node_id)
""".format(all_ctgov, all_ctgov-all_applicable, not_due, due_cert, due) print(inc_crit) # + import plotly.graph_objs as go fig = go.Figure(data=[go.Sankey( arrangement = 'freeform', node = dict( pad = 15, thickness = 10, line = dict(color = "black", width = 0.5), label = ["All CT.gov: {}".format(all_ctgov), "Not Applicable: {}".format(all_ctgov-all_applicable), "Applicable: {}".format(all_applicable), "Not Due: {}".format(not_due), "Received Certificate: {}".format(due_cert), "Due: {}".format(due), ""], color = "#1f77b4", x = [0, .799 , .5, .798, .798, .798], y = [0, .3, .9, .8, .9, .99] ), link = dict( source = [0, 0, 2, 2, 2], # indices correspond to labels, eg A1, A2, A2, B1, ... target = [1, 2, 3, 4, 5], value = [all_ctgov, all_applicable, not_due, due_cert, due], color = ['#ADE1CA', 'lightblue', 'grey', 'grey', '#EA573A'] ))]) fig #fig.write_image(parent + "/Figures/sankey.svg") # - # # Trends
def fig_sankey(label, color, source, target, value, title=""): """Generate sankey image. Args: label (list): List with node labels. color (list): List with node colors. source (list): List with link source id. target (list): List with linke target id. value (list): List with link value. title (str, optional): Title. Defaults to "". Returns: plotly.graph_objs.Figure """ trace = go.Sankey( arrangement='fixed', orientation='v', valueformat=".0f", node=dict( pad=20, thickness=40, line=dict(color="black", width=0), label=label, color=color, hovertemplate= "%{label}<br>Number of messages: %{value}<extra></extra>", # x=x, # y=y ), link=dict( source=source, target=target, value=value, hovertemplate= "%{source.label} ---> %{target.label}<extra>%{value}</extra>")) data = [trace] layout = { 'title': dict(text=title), 'annotations': [{ 'text': "Senders", 'font': { 'size': 13, 'color': 'rgb(116, 101, 130)', }, 'showarrow': False, 'align': 'center', 'x': 0.5, 'y': 1.1, 'xref': 'paper', 'yref': 'paper', }, { 'text': "Receivers", 'font': { 'size': 13, 'color': 'rgb(116, 101, 130)', }, 'showarrow': False, 'align': 'center', 'x': 0.5, 'y': -.1, 'xref': 'paper', 'yref': 'paper', }] } fig = go.Figure(data=data, layout=layout) return fig
#pivot 图 between_exchange_transfer['count'] = [1 for i in range(len(between_exchange_transfer))] between_exchange_transfer_pivot = pd.DataFrame.pivot_table(between_exchange_transfer,values='Eth_Value',index = ['From_origin'],columns = ['To_destination'], \ aggfunc = np.sum) between_exchange_transfer_pivot.to_excel('between_exchange_transfer_pivot.xlsx') between_exchange_transfer['count'] = [1 for i in range(len(between_exchange_transfer))] between_exchange_transfer_pivot_count = pd.DataFrame.pivot_table(between_exchange_transfer,values='count',index = ['From_origin'],columns = ['To_destination'], \ aggfunc = np.sum) between_exchange_transfer_pivot_count.to_excel('between_exchange_transfer_pivot_count.xlsx') #根据count画桑基图 fig = go.Figure(data=[go.Sankey( node = dict( pad = 15, thickness = 20, line = dict(color = "black", width = 0.5), label = ['Binance_ori','Bitfinex_ori','Gemini_ori','Huobi_ori','Kraken_ori','Poloniex_ori', \ 'Binance_des','Bitfinex_des','Gemini_des','Huobi_des','Kraken_des','Poloniex_des'], color = ["Red","Yellow","green","Blue","Purple","Brown","Red","Yellow","green","Blue","Purple","Brown"] ), link = dict( source = [0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,4,4,4,4,4,4,5,5,5,5,5,5], #从上往下每列依次为,label[source[i]]到label[target[i]]的流数量有label[value[i]] target = [6,7,8,9,10,11,6,7,8,9,10,11,6,7,8,9,10,11,6,7,8,9,10,11,6,7,8,9,10,11,6,7,8,9,10,11,], value = [843,166,140,127,570,155,248,0,8,26,102,36,150,13,71,0,37,33,68,33,0,1,1,2,366,120,44,6,0,49,36,4,1,3,3,0] ))]) fig.update_layout(title_text="Transfer pattern between exchanges Sankey Diagram", font_size=15) #fig.show(renderer = 'png', width = 1200, height = 600) fig.show()
labelNames.sort() print(labelNames) for i in range(len(metaData["source"])): diagramInfo["source"].append(labelNames.index(metaData["source"][i])) diagramInfo["target"].append(labelNames.index(metaData["target"][i])) diagramInfo["value"].append(metaData["value"][i]) fig = go.Figure(data=[go.Sankey( node = dict( pad = 20, thickness = 20, line = dict(color = "purple", width = 0), label = labelNames, color = "#574ae2" ), link = diagramInfo, textfont=dict(size = 28) )]) fig.write_image('Data/static/Diagram'+str(imageNum[0])+'.jpg' , width=1920, height=1080, scale=1)
def plot_sankey(link_df, node_df, label_colors=None, title="Basic Sankey Diagram"): """Takes link_df and node_df as inputs: link_df: | source | target | num ---+--------+--------+----- 0 | 0 | 8 | 114 1 | 0 | 9 | 57 ... node_df: | idx | label ---+-----+------- 0 | 0 | cat_1 1 | 1 | cat_2 label_colors should be hex """ num_steps = node_df['step'].nunique() - 1 node_df.loc[:, 'x_pos'] = node_df['step'].astype( 'category').cat.codes / num_steps node_df.loc[:, 'y_pos'] = 0.001 sankey = go.Sankey(arrangement="snap", node={ 'pad': 15, 'thickness': 20, 'line': { 'color': 'black', 'width': 0.5 }, 'x': node_df['x_pos'].to_list(), 'y': node_df['y_pos'].to_list(), 'label': node_df['label'].to_list() }, link={ 'source': link_df['source'].to_list(), 'target': link_df['target'].to_list(), 'value': link_df['num'].to_list(), }) if label_colors: node_df['color'] = node_df['label'].apply( lambda cat: hex_to_rgba(label_colors[cat], opacity=0.9)) sankey.node.color = node_df['color'].to_list() sankey.link.color = sankey.node.color sankey.link.color = [ update_rgba_opacity(sankey.node.color[idx], 0.3) for idx in sankey.link.source ] else: label_colors = generate_label_colors(node_df['label'].unique(), default_colors) node_df['color'] = node_df['label'].apply( lambda cat: label_colors[cat]) sankey.node.color = node_df['color'].to_list() fig = go.Figure(data=sankey) fig.update_layout(title_text=title, font_size=10) return fig
0.3382304929807378, 0.3813254978778975, 0.25334639242572643 ] vals += [ 0.5208446362515413, 0.31336621454993836, 0.42824907521578296, 0.2953514180024661, 0.3394327990135635, 0.20393341553637485 ] # vals += [0.5001363512407963, 0.30160894464139626, 0.38396509408235613, 0.27188437414780475, 0.2877011180801745, 0.18271066266703026] vals = [100 * _ for _ in vals] fig = go.Figure(data=[ go.Sankey(node=dict(pad=50, thickness=10, line=dict(color="gray", width=2), label=pfs + roles + ds, color=[col_yes, col_no] + ["#6ccc86"] * 6), link=dict(source=[0, 1] * len(roles) + [2, 3, 4, 5, 6, 7] * len(ds), target=[2] * len(pfs) + [3] * len(pfs) + [4] * len(pfs) + [5] * len(pfs) + [6] * len(pfs) + [7] * len(pfs) + [8, 8, 8, 8, 8, 8], value=vals + [73.5, 62.8, 71.4, 65.6, 67.4, 58.9])) ]) # print (df[df["Q5"] == "Data Scientist"]["Q9_Part_1"].value_counts().values[0]) # print (df[df["Q5"] == "Data Scientist"]["Q9_Part_2"].value_counts().values[0]) # print (df[df["Q5"] == "Data Scientist"]["Q9_Part_3"].value_counts().values[0]) # print (df[df["Q5"] == "Data Scientist"]["Q9_Part_4"].value_counts().values[0]) # print (df[df["Q5"] == "Data Scientist"]["Q9_Part_5"].value_counts().values[0]) # print (df[df["Q5"] == "Data Scientist"]["Q9_Part_6"].value_counts().values[0]) # a = [2554, 1393, 2323, 1697, 1884, 972]