def plot_chord_chart(nodes, links): """Plot a chord chart.""" lg.info("Plotting chart ...") # Turn dict of vertices and chords into DataFrames nodes_df = pd.DataFrame(nodes) links_df = pd.DataFrame(links) # Merge DataFrames into source data source_data = links_df.merge(nodes_df, how="left", left_on="source", right_index=True) source_data = source_data.merge(nodes_df, how="left", left_on="target", right_index=True) # Create chord chart chord_from_df = Chord(source_data, source="name_x", target="name_y", value="value") # Create chord chart file as html output_file("chord_from_df.html", mode="inline") # Show chart show(chord_from_df)
def plot_chord(df, names): # Cleaning up received DataFrame df = organize(df) df = df[['family_id', 'region_id']] tf = df.drop_duplicates(keep='first') tl = df.drop_duplicates(keep='last') df = pd.concat([tf, tl]) # Generating nodes and links cnxns = [] for k, g in df.groupby('family_id'): [ cnxns.extend( (n1, n2, len(g)) for n1, n2 in combinations(g['region_id'], 2)) ] df = pd.DataFrame(cnxns, columns=['region1', 'region2', 'total']) df = df.groupby(['region1', 'region2']).agg('sum') df = df.reset_index() # Chord won't work with duplicated places df = df[df.region1 != df.region2] # Using only most relevant links df = df[df.total > 100] # Associating names df = pd.merge(df, names, how='inner', left_on='region1', right_on='cod_mun') df = df[['region2', 'total', 'cod_name']] df = pd.merge(df, names, how='inner', left_on='region2', right_on='cod_mun') df = df[['cod_name_x', 'cod_name_y', 'total']] # Making and saving and showing Chord chord = Chord(df, source='cod_name_x', target='cod_name_y', value='total') output_file('chord.html', mode='inline') show(chord)
values='pulse', label='diet', color='diet', title='exercise dataset') box2 = BoxPlot(data=exercise, values='pulse', label='diet', stack='kind', color='kind', title='exercise dataset') show(row(box1, box2)) # In[6]: # 弦图 Chord chord1 = Chord(data=exercise, source="id", target="kind") chord2 = Chord(data=exercise, source="id", target="kind", value="pulse") show(row(chord1, chord2)) # * bokeh.plotting # In[7]: from bokeh.plotting import figure import numpy as np p = figure(plot_width=400, plot_height=400) # 方框 p.square(np.random.randint(1, 10, 5), np.random.randint(1, 10, 5),
Freq = trips.groupby(["Start station", "End station"]).size().reset_index(name="Frequency") # Set the stations as nodes and the number of trips as links nodes_df = pd.DataFrame(Station) links_df = pd.DataFrame(Freq) # Left join node and link dataframes source_data = links_df.merge(nodes_df, how='left', left_on='Start station', right_index=True) source_data = source_data.merge(nodes_df, how='left', left_on='End station', right_index=True) # Find high-traffic stations source_data = source_data[source_data["Frequency"] > 3500] # Define chord chart StationChord = Chord(source_data, source="Start station", target="End station", value="Frequency") # Write output file output_file('StationChord.html', mode="inline") # Show chord chart show(StationChord)
@author: homemdasneves """ # create a chord diagram with relationships between bootcampers import random import pandas as pd import matplotlib as plt from bokeh.io import show from bokeh.charts import Chord path = "C:\\work\\projetos\\ie-ds-bootcamp\\ie-ds-bc-group3\\data\\" data = pd.read_csv(path + "Aug_2017.csv") aux = data.head(5) NUM_ORIG_DESTS = 50 SORT_BY_AVG = "ARR_DELAY" data_aux = data.groupby(by=["ORIGIN", "DEST"], as_index=False).mean() \ .sort_values(by = [SORT_BY_AVG], ascending=[False]).head(NUM_ORIG_DESTS) data_aux[SORT_BY_AVG + "_INT"] = data_aux[SORT_BY_AVG].astype(int) chord_from_df2 = Chord(data_aux, source="ORIGIN", target="DEST", value=SORT_BY_AVG + "_INT") show(chord_from_df2)
import pandas as pd #导入pandas,用于操作excel文件 from bokeh.charts import output_file, Chord #导入bokeh扩展包,用于绘制弦图 from bokeh.io import show #导入文件操作中的显示网页函数 df=pd.read_excel('Chord.xlsx')#打开excel文件 my_links=[] #备用空列表 for i in range(len(df.columns)):#外层循环,轮询excel每个列 # print(df[df.columns[i]]) for j in range(len(df.index)):#内层循环,轮询excel每一行 my_links.append({'target':(i+len(df.index)),'source':j,'value':df[df.columns[i]][j]})#生成从源到目标的字典,target跳过ABC index=list(df.index)#将excel行标签转化为列表 index.extend(df.columns)#合并列表,得到所有标签 my_nodes=pd.DataFrame([{'name':i} for i in index])#生成节点数据框 my_nodes_df = pd.DataFrame(my_nodes)#生成用于绘图的节点数据 my_links_df = pd.DataFrame(my_links)#生成用于绘图的节点间连接 my_source_data = my_links_df.merge(my_nodes_df, how='left', left_on='source', right_index=True)#合并数据,顺序按从源到目标的顺序 my_source_data = my_source_data.merge(my_nodes_df, how='left', left_on='target', right_index=True)#再次合并数据,顺序按从目标到源的顺序 chord_from_df = Chord(my_source_data, source="name_x", target="name_y", value="value")#绘制弦图 output_file('my_chord-diagram.html')#当前文件夹输出网页文件 show(chord_from_df)#显示图片
links_df['group_x'] = links_df['gate-name'].map( gates) # group of the first gate links_df['group_y'] = links_df['next-gate'].map( gates) # group of the second gate links_df = links_df[links_df['count'] > 5] # only select those with more than 5 transactions links_df = links_df[links_df['gate-name'] != links_df['next-gate']] from bokeh.charts import output_file, Chord from bokeh.io import show import bokeh chord_from_df = Chord(links_df, source='gate-name', target='next-gate', value='count') chord_from_df.plot_height = 1000 chord_from_df.plot_width = 1000 output_file('chord-diagram.html') show(chord_from_df) # okay so this kinda looks pretty crap # plan 2: assign each gate name an index gateIndex = {} for i, j in enumerate(gates.keys()): gateIndex[j] = i links_df['source'] = links_df['gate-name'].map(gateIndex) links_df['target'] = links_df['next-gate'].map(gateIndex)
from bokeh.charts import output_file, Chord from bokeh.io import show import pandas as pd data = pd.read_csv('../../samples/test3.csv') chord_from_df = Chord(data, source='name_x', target='name_y', value='value') output_file('chord.html') show(chord_from_df)