Beispiel #1
0
def plot_chord_chart(nodes, links):
    """Plot a chord chart."""
    lg.info("Plotting chart ...")
    # Turn dict of vertices and chords into DataFrames
    nodes_df = pd.DataFrame(nodes)
    links_df = pd.DataFrame(links)

    # Merge DataFrames into source data
    source_data = links_df.merge(nodes_df,
                                 how="left",
                                 left_on="source",
                                 right_index=True)
    source_data = source_data.merge(nodes_df,
                                    how="left",
                                    left_on="target",
                                    right_index=True)

    # Create chord chart
    chord_from_df = Chord(source_data,
                          source="name_x",
                          target="name_y",
                          value="value")

    # Create chord chart file as html
    output_file("chord_from_df.html", mode="inline")

    # Show chart
    show(chord_from_df)
Beispiel #2
0
def plot_chord(df, names):

    # Cleaning up received DataFrame
    df = organize(df)
    df = df[['family_id', 'region_id']]
    tf = df.drop_duplicates(keep='first')
    tl = df.drop_duplicates(keep='last')
    df = pd.concat([tf, tl])

    # Generating nodes and links
    cnxns = []
    for k, g in df.groupby('family_id'):
        [
            cnxns.extend(
                (n1, n2, len(g)) for n1, n2 in combinations(g['region_id'], 2))
        ]
    df = pd.DataFrame(cnxns, columns=['region1', 'region2', 'total'])
    df = df.groupby(['region1', 'region2']).agg('sum')
    df = df.reset_index()

    # Chord won't work with duplicated places
    df = df[df.region1 != df.region2]

    # Using only most relevant links
    df = df[df.total > 100]

    # Associating names
    df = pd.merge(df,
                  names,
                  how='inner',
                  left_on='region1',
                  right_on='cod_mun')
    df = df[['region2', 'total', 'cod_name']]
    df = pd.merge(df,
                  names,
                  how='inner',
                  left_on='region2',
                  right_on='cod_mun')
    df = df[['cod_name_x', 'cod_name_y', 'total']]

    # Making and saving and showing Chord
    chord = Chord(df, source='cod_name_x', target='cod_name_y', value='total')
    output_file('chord.html', mode='inline')
    show(chord)
Beispiel #3
0
               values='pulse',
               label='diet',
               color='diet',
               title='exercise dataset')
box2 = BoxPlot(data=exercise,
               values='pulse',
               label='diet',
               stack='kind',
               color='kind',
               title='exercise dataset')
show(row(box1, box2))

# In[6]:

# 弦图 Chord
chord1 = Chord(data=exercise, source="id", target="kind")
chord2 = Chord(data=exercise, source="id", target="kind", value="pulse")

show(row(chord1, chord2))

# * bokeh.plotting

# In[7]:

from bokeh.plotting import figure
import numpy as np

p = figure(plot_width=400, plot_height=400)
# 方框
p.square(np.random.randint(1, 10, 5),
         np.random.randint(1, 10, 5),
Freq = trips.groupby(["Start station",
                      "End station"]).size().reset_index(name="Frequency")

# Set the stations as nodes and the number of trips as links
nodes_df = pd.DataFrame(Station)
links_df = pd.DataFrame(Freq)

# Left join node and link dataframes
source_data = links_df.merge(nodes_df,
                             how='left',
                             left_on='Start station',
                             right_index=True)
source_data = source_data.merge(nodes_df,
                                how='left',
                                left_on='End station',
                                right_index=True)

# Find high-traffic stations
source_data = source_data[source_data["Frequency"] > 3500]

# Define chord chart
StationChord = Chord(source_data,
                     source="Start station",
                     target="End station",
                     value="Frequency")

# Write output file
output_file('StationChord.html', mode="inline")

# Show chord chart
show(StationChord)
@author: homemdasneves
"""

# create a chord diagram with relationships between bootcampers
import random
import pandas as pd
import matplotlib as plt

from bokeh.io import show
from bokeh.charts import Chord

path = "C:\\work\\projetos\\ie-ds-bootcamp\\ie-ds-bc-group3\\data\\"
data = pd.read_csv(path + "Aug_2017.csv")

aux = data.head(5)

NUM_ORIG_DESTS = 50
SORT_BY_AVG = "ARR_DELAY"

data_aux = data.groupby(by=["ORIGIN", "DEST"], as_index=False).mean() \
    .sort_values(by = [SORT_BY_AVG], ascending=[False]).head(NUM_ORIG_DESTS)

data_aux[SORT_BY_AVG + "_INT"] = data_aux[SORT_BY_AVG].astype(int)

chord_from_df2 = Chord(data_aux,
                       source="ORIGIN",
                       target="DEST",
                       value=SORT_BY_AVG + "_INT")
show(chord_from_df2)
Beispiel #6
0
import pandas as pd #导入pandas,用于操作excel文件
from bokeh.charts import output_file, Chord #导入bokeh扩展包,用于绘制弦图
from bokeh.io import show #导入文件操作中的显示网页函数

df=pd.read_excel('Chord.xlsx')#打开excel文件
my_links=[] #备用空列表
for i in range(len(df.columns)):#外层循环,轮询excel每个列
   # print(df[df.columns[i]])
    for j in range(len(df.index)):#内层循环,轮询excel每一行
        my_links.append({'target':(i+len(df.index)),'source':j,'value':df[df.columns[i]][j]})#生成从源到目标的字典,target跳过ABC

index=list(df.index)#将excel行标签转化为列表
index.extend(df.columns)#合并列表,得到所有标签
my_nodes=pd.DataFrame([{'name':i} for i in index])#生成节点数据框

my_nodes_df = pd.DataFrame(my_nodes)#生成用于绘图的节点数据
my_links_df = pd.DataFrame(my_links)#生成用于绘图的节点间连接

my_source_data = my_links_df.merge(my_nodes_df, how='left',
                             left_on='source', right_index=True)#合并数据,顺序按从源到目标的顺序

my_source_data = my_source_data.merge(my_nodes_df, how='left',
                                left_on='target', right_index=True)#再次合并数据,顺序按从目标到源的顺序

chord_from_df = Chord(my_source_data, source="name_x", target="name_y", value="value")#绘制弦图
output_file('my_chord-diagram.html')#当前文件夹输出网页文件
show(chord_from_df)#显示图片


links_df['group_x'] = links_df['gate-name'].map(
    gates)  # group of the first gate
links_df['group_y'] = links_df['next-gate'].map(
    gates)  # group of the second gate

links_df = links_df[links_df['count'] >
                    5]  # only select those with more than 5 transactions
links_df = links_df[links_df['gate-name'] != links_df['next-gate']]

from bokeh.charts import output_file, Chord
from bokeh.io import show
import bokeh

chord_from_df = Chord(links_df,
                      source='gate-name',
                      target='next-gate',
                      value='count')
chord_from_df.plot_height = 1000
chord_from_df.plot_width = 1000
output_file('chord-diagram.html')
show(chord_from_df)

# okay so this kinda looks pretty crap
# plan 2: assign each gate name an index
gateIndex = {}
for i, j in enumerate(gates.keys()):
    gateIndex[j] = i

links_df['source'] = links_df['gate-name'].map(gateIndex)
links_df['target'] = links_df['next-gate'].map(gateIndex)
Beispiel #8
0
from bokeh.charts import output_file, Chord
from bokeh.io import show
import pandas as pd
data = pd.read_csv('../../samples/test3.csv')
chord_from_df = Chord(data, source='name_x', target='name_y', value='value')
output_file('chord.html')
show(chord_from_df)