コード例 #1
0
ファイル: sankey.py プロジェクト: poke1024/vectorian
    def _flow_to_sankey(self, match, flow):
        nodes = []
        node_mapping = collections.defaultdict(dict)
        spans = {'s': match.doc_span, 't': match.query}

        def token(name, i):
            idx = node_mapping[name]
            k = idx.get(i)
            if k is not None:
                return k
            idx[i] = len(nodes)
            nodes.append(' %s [%d] ' % (spans[name][i].text, i))
            return idx[i]

        edges = [(token('t', t), token('s', s), f)
                 for t, s, f in flow_edges(flow, self._cutoff)]

        if len(edges) < 1:
            logging.warning("no edges found")

        n = max(len(set(x[0] for x in edges)), len(set(x[1] for x in edges)))

        nodes = hv.Dataset(enumerate(nodes), 'index', 'label')
        return hv.Sankey((edges, nodes)).opts(width=self._width,
                                              height=n * self._height_per_node,
                                              labels='label',
                                              label_position='inner',
                                              cmap=self._cmap,
                                              node_padding=self._node_padding,
                                              show_values=False)
コード例 #2
0
ファイル: Flow.py プロジェクト: lightnerdevtech/pyADVISE
def gen_sankey_plot(nodes, edges, title_text='Sankey Chart'):
    options = hv.Store.options('bokeh')
    options.Sankey = hv.Options(
        'style',
        node_line_alpha=0,
        node_nonselection_alpha=0.2,
        node_size=10,
        node_line_width=0,
        edge_cmap=['#002F6C', '#BA0C2F', '#A7C6ED', '#212721'],
        cmap=['#002F6C', '#BA0C2F', '#212721', '#212721'],
        edge_nonselection_alpha=0.2,
        edge_line_alpha=0,
        edge_fill_alpha=0.7,
        edge_hover_alpha=1,
        edge_hover_color='#002F6C',
        label_text_font_size='8pt')

    sankey = hv.Sankey((edges, nodes),
                       ['From', 'To']).options(label_index='label',
                                               label_position='left',
                                               width=800,
                                               height=800,
                                               edge_color_index='To')
    # pass to bokeh for further changes
    renderer = hv.renderer('bokeh')
    plot = renderer.get_plot(sankey, doc=curdoc()).state

    # return plot
    return plot
コード例 #3
0
ファイル: plot_hv.py プロジェクト: parthigcar/MINICHEM
def plot_hv(input1,
            stoichiometric_dict,
            include_el,
            opfilename,
            Min=0,
            Max=1e9):
    """
    Plotting module
    input1: list of input elements
    include_el: list of element for which sankey chart is drawn
    Min: min mole number species to be included in chart
    Max: max mole number species to be included in the chart
    opfilename: opfilename with extension (allowed formats:
    ['html', 'json', 'auto', 'png', 'widgets', 'scrubber', 'auto', None]) 
    returns:
    saves sankey chart
    """
    f = open('released_sp.txt', 'r')
    data = f.readlines()
    sp_dict = {}
    for line in data:
        cols = line.split()
        sp_dict[cols[0]] = float(cols[1])
    e = []
    sp = []
    val = []
    hv.extension('bokeh')
    for i in include_el:
        for j in sp_dict:
            s1 = set()
            for j1 in stoichiometric_dict[j]:
                s1.add(j1[0])

            if i.upper() in s1:
                if i == j:
                    if sp_dict[j] > Min and sp_dict[j] < Max:
                        e.append(i)
                        sp.append(j + '*')
                        val.append(sp_dict[j])
                else:
                    if sp_dict[j] > Min and sp_dict[j] < Max:
                        e.append(i)
                        sp.append(j)
                        val.append(sp_dict[j])

    data = {'elements': e, 'species': sp, 'values': val}
    print(data)
    df = pd.DataFrame(data)
    print(df)
    sankey = hv.Sankey(df)

    # , show_values=False
    sankey.opts(label_position='right',
                edge_color='species',
                node_color='elements',
                labelled=['elements', 'species'],
                xlabel='part',
                show_values=False)
    hv.save(sankey, opfilename, backend='bokeh')
コード例 #4
0
ファイル: makegraphs.py プロジェクト: thecadams/sparkleverse
 def moves(self, moves_list):
     log.info("Generating moves...")
     df = pd.DataFrame(moves_list, columns=['from', 'to', 'value'])
     fig = hv.render(
         hv.Sankey(moves_list, ['from', 'to'],
                   vdims='value').opts(cmap='Dark2',
                                       edge_color='to',
                                       node_color='index'))
     self.pdf.savefig(fig)
     return self
コード例 #5
0
def generateGraph(choice):
    value_dim = hv.Dimension('Percentage', unit='%')
    if choice == 'Feelings about Future':
        sankey = hv.Sankey((fearEdges,fearNodes), ['From','To'], vdims=value_dim)
        #sankey = hv.Sankey([(fearTable[i],fearTable[j],k) for i,j,k in fearEdges], ['From','To'], vdims=value_dim)
        sankey.opts(title='How Tech Savviness Influences Feelings about Connectivity')
    elif choice == 'Importance Rankings':
        sankey = hv.Sankey((rankEdges,rankNodes), ['From','To'], vdims=value_dim)
        #sankey = hv.Sankey([(rankTable[i],rankTable[j],k) for i,j,k in rankEdges], ['From','To'], vdims=value_dim)
        sankey.opts(title='How Tech Savviness Influences Priorities when Purchasing New Devices')
    sankey.opts(labels='label',
                width=1000,
                height=900,
                cmap=jankyCmap,
                edge_color=dim('From').str(),
                fontsize={'title': 18, 'labels': 16},
                node_hover_fill_color='grey',
                tools=[hover])
    return sankey
コード例 #6
0
 def display(cls, trackqc: TrackQC, tracks: List[str] = None):
     """
     outputs a flow diagram between two tracks showing the proportion
     of the beads classified by their status (their mostCommonError)
     """
     frame = cls.dataframe(trackqc, tracks)
     nodes = cls.nodes(frame)
     edges = cls.edges(frame, nodes)
     return (hv.Sankey(
         (edges, hv.Dataset(nodes, "nodenumber")), ['From', 'To'],
         ['bead', 'Left']).options(label_index='mostcommonerror',
                                   edge_color_index='Left',
                                   color_index='mostcommonerror'))
コード例 #7
0
def plot_sankey(sankey_dataframe, filename, save=True):
    """The plotting code below is credited to the Holoviews user Gallery:
    http://holoviews.org/gallery/demos/bokeh/energy_sankey.html#bokeh-gallery-energy-sankey

    By default this function will save a PNG of the plot used. If not desired,
    set the "save" kwarg to False or change the hv.save() extension to the
    desired format."""

    sankey = hv.Sankey(sankey_dataframe, label='Citrulinated Proteins')
    sankey.opts(label_position='right', edge_color='target', node_color='index', cmap='tab20c')
    if save:
        # hv.save(sankey, filename+'.png')
        hv.save(sankey, filename+'.svg')
コード例 #8
0
def sankey_plot(sm,key1,key2,align_thr=0.1):
    """Generate a sankey plot
    
    Parameters
    ----------
    sm: SAMAP object
    
    key1 & key2: str, annotation vector keys for species 1 and 2

    align_thr: float, optional, default 0.1
        The alignment score threshold below which to remove cell type mappings.
    """    
    _,_,M = get_mapping_scores(sm,key1,key2)
    
    id1 = M.index[0].split('_')[0]
    id2 = M.columns[0].split('_')[0]
    d = M.values.copy()
    d[d<align_thr]=0
    x,y = d.nonzero()
    values = d[x,y]
    y = y + M.index.size
    nodes = np.append(q(M.index),q(M.columns))
    xPos = [0]*M.index.size + [1]*M.columns.size


    R = pd.DataFrame(data = nodes[np.vstack((x,y))].T,columns=['source','target'])
    R['Value'] = values
    
    try:
        from holoviews import dim
        from bokeh.models import Label
        import holoviews as hv
        hv.extension('bokeh',logo=False)
    except:
        raise ImportError('Please install holoviews with `!pip install holoviews`.')

    def f(plot,element):
        plot.handles['plot'].sizing_mode='scale_width'    
        plot.handles['plot'].x_range.start = -600    
        plot.handles['plot'].add_layout(Label(x=plot.handles['plot'].x_range.end*0.78, y=plot.handles['plot'].y_range.end*0.96, text=id2))
        plot.handles['plot'].x_range.end = 1500    
        plot.handles['plot'].add_layout(Label(x=0, y=plot.handles['plot'].y_range.end*0.96, text=id1))

    sankey1 = hv.Sankey(R, kdims=["source", "target"], vdims=["Value"])


    sankey1.opts(cmap='Colorblind',label_position='outer', edge_line_width=0, show_values=False,
                                     node_alpha=1.0, node_width=40, node_sort=True,frame_height=1000,frame_width=800,
                                     bgcolor="snow",apply_ranges = True,hooks=[f])

    return sankey1
コード例 #9
0
ファイル: graph.py プロジェクト: JonETJakobsson/scConnect
    def sankey_graph(node, th):
        # Find all interactions where node is target or source node
        G_s = nx.MultiDiGraph()
        for n, nbrs in G.adj.items():
            for nbr, edict in nbrs.items():
                if n == node:
                    for e, d in edict.items():
                        # append dash after the target node
                        G_s.add_edge(n, nbr + "_", **d)
                if nbr == node:
                    for e, d in edict.items():
                        # append dash before the source node
                        G_s.add_edge("_" + n, nbr, **d)
        # create the dataset used to build the sankey graph.
        # Sort values on weight to get ordered representation on plot.
        edges = nx.to_pandas_edgelist(G_s)
        links = hv.Dataset(edges, ["source", "target"],
                           ["weighted_score", "interaction", "receptorfamily"
                            ]).sort("weighted_score")
        nodes = hv.Dataset(list(G_s.nodes), 'cluster')
        sankey = hv.Sankey((links, nodes)).select(weighted_score=(th, None))

        # calculate bars
        ligands = hv.Dataset(edges, ["ligand", "source"], ["score"]).select(
            source=node).aggregate(function=np.mean).sort("score",
                                                          reverse=True)
        receptors = hv.Dataset(edges, ["receptor", "target"],
                               ["score"]).select(target=node).aggregate(
                                   function=np.mean).sort("score",
                                                          reverse=True)
        bars = (hv.Bars(ligands, "ligand") +
                hv.Bars(receptors, "receptor")).cols(2)

        # calculate table
        ligands = hv.Dataset((G.node[node]["ligands"]), "ligand",
                             "score").sort("score", reverse=True)
        receptors = hv.Dataset((G.node[node]["receptors"]), "receptor",
                               "score").sort("score", reverse=True)
        table = hv.Layout(hv.Table(ligands) + hv.Table(receptors)).cols(2)

        return (bars + table + sankey).cols(2)
コード例 #10
0
ファイル: HVfns.py プロジェクト: OrgansWithoutBodies/OHWeb
def catmapsankey(N=-1,perc=False):
    d=getdata(N,mapcats=False)      
    tperc=sum(d['amt']) if perc else 1#if perc then ends up dividing edge values by total else divides by 1
    map=sql.connect(dpath).execute("SELECT categorymap.name,donationcategories.name FROM CATEGORYMAP JOIN DONATIONCATEGORIES ON CATEGORYMAP.MAPSTOID=DONATIONCATEGORIES.ID").fetchall()
    mm=[['Original Category: '+m[0],'New Category: '+m[1],sum(d['amt'][d['item']==m[0]])/tperc]for m in map if sum(d['amt'][d['item']==m[0]])>0 ]
    return hv.Sankey(mm)
コード例 #11
0
import sys
import pandas as pd
import holoviews as hv
import panel as pn
from bokeh.resources import INLINE

if len(sys.argv) == 2:
    file_path = sys.argv[1]
else:
    file_path = "data/sankey_example.csv"

hv.extension('bokeh')

edges = pd.read_csv(file_path)
sankey = hv.Sankey(edges, label='Energy Diagram')
sankey.opts(label_position='left',
            edge_color='target',
            node_color='index',
            cmap='tab20')
panel_object = pn.pane.HoloViews(sankey)
pn.pane.HoloViews(sankey).save('build/sankey.html',
                               embed=True,
                               resources=INLINE)
コード例 #12
0
def sankey_plot_main():

    config_params(font_size=4)

    hv.extension('matplotlib')
    hv.output(fig='svg')

    forbidden = ['RADIATION', 'Miscellanious', 'Unknown', 'TopoII', 'TOPOII']
    out, dic_t = create_matrix_treatments_plot()
    order_ttypes = [
        'Breast',
        'Colon-Rectum',
        'Prostate',
        'Lung',
        'Skin',
        'Bone-Soft-tissue',
        'Ovary',
        'Esophagus',
        'Urinary-tract',
        'NET',
        'Kidney',
        'Nervous-system',
        'Biliary',
        'Pancreas',
        'Unknown',
        'Uterus',
        'Head-and-neck',
        'Liver',
        'Stomach',
        'Mesothelioma',
    ]

    all_rows = []
    for ttype in order_ttypes:
        samples = dic_t[ttype]
        subs = out.loc[samples]
        for col in subs:
            if col not in forbidden:
                all_rows.append((ttype, col, int(subs[col].sum())))

    matrix_df = pd.DataFrame(all_rows)
    matrix_df.columns = ['target', 'source', 'value']
    matrix_df = matrix_df[(matrix_df['target'] != 'Unknown')]
    matrix_df = matrix_df.fillna(0)
    matrix_df['value'] = matrix_df['value'].astype(int)

    good_source = set()
    for source, data in matrix_df.groupby(by='source'):
        tot = data['value'].sum()
        if tot > 30:
            if source != 'Unknown':
                good_source.add(source)
    matrix_df = matrix_df[matrix_df['source'].isin(good_source)]
    out = hv.Sankey(matrix_df.sort_values(
        by='source',
        ascending=True,
    ),
                    label='').opts(label_position='left',
                                   edge_color='target',
                                   node_color='index',
                                   cmap='Set1')  # color=total_colors)

    fig = hv.render(out)
    fig.set_figwidth(10)
    fig.savefig('figures/2A.svg')
    fig.savefig('figures/2A.png', dpi=600)
コード例 #13
0
def plot_sankey(full_df, dates, column_name, diff_classes=[0, 10, 20]):
    n_diff_classes = len(diff_classes) + 1
    dates_str = [
        '{0}-{1:02d}-{2:02d}'.format(date[0], date[1], date[2])
        for date in dates
    ]

    # Expand df
    df = pd.DataFrame(full_df[column_name].to_list(), columns=dates_str)

    # Create the node dictionary.
    for date_str in dates_str:
        df[date_str] = df[date_str].apply(quantize, args=(diff_classes, ))

    print(df)
    df = df.apply(create_directions,
                  axis=1,
                  result_type='expand',
                  args=(n_diff_classes, ))
    print(df)

    # nodes = ["1: 0", "1: 1-10", "1: 11-20", "1: >21", "2: 0", "2: 1-10", "2: 11-20", "2: "]
    nodes = []
    for index in range(len(dates)):
        last_value = None
        for bound in diff_classes:
            name_str = None
            if last_value == None:
                name_str = bound
            else:
                name_str = f"{last_value + 1}-{bound}"
            last_value = bound
            nodes.append(f"{index}: {name_str}")
        else:
            nodes.append(f"{index}: >{last_value + 1}")
    print(nodes)
    nodes = hv.Dataset(enumerate(nodes), 'index', 'label')
    edges = []
    for column_name in range(len(dates) - 1):
        # Count occurences.
        group_count = df[column_name].value_counts()
        group_sum = group_count.sum()
        print(group_count, group_sum)
        for index, occurence in group_count.iteritems():
            print(index, occurence)
            from_node = floor(
                index / n_diff_classes) + column_name * n_diff_classes
            to_node = index % n_diff_classes + (column_name +
                                                1) * n_diff_classes
            edges.append((from_node, to_node, occurence / group_sum * 100))

    print(edges)
    value_dim = hv.Dimension('Percentage', unit='%')
    flow = hv.Sankey((edges, nodes), ['From', 'To'], vdims=value_dim)
    something = flow.opts(
        opts.Sankey(labels='label',
                    label_position='right',
                    width=1800,
                    height=1200,
                    cmap='Set1',
                    edge_color=dim('To').str(),
                    node_color=dim('index').str()))
    something2 = hv.render(something)
    show(something2)
コード例 #14
0
def make_sankey(colfrom, colto):
    conc_table = concussion_df[~concussion_df.Player_Activity_Derived.isnull()]\
                    .groupby([colfrom, colto])['PlayDescription'].size()\
                    .to_frame().reset_index()
    return conc_table.values

sankey_cols = ['Type_player','Player_Activity_Derived', 
                    'Primary_Partner_Activity_Derived', 'Type_partner']
sankey_list = []
for i in range(3):
    sankey_piece = make_sankey(sankey_cols[i], sankey_cols[i+1])
    sankey_list.append(sankey_piece)

sankey_table = np.concatenate(sankey_list)
c_sankey = hv.Sankey(sankey_table)
display(HTML('<span style="font-weight:bold; margin-left:84px">' \
                 + 'Concussion Roles and Activities' + '</span>'), c_sankey)

# The above diagram shows a variety of scenarios in which concussions occur. One can see by the green box to the left that 20 offensive linemen sustained concussions from tackling and being blocked. Although punt returners received the most concussions as a single-person role, offensive linemen collectively received over half the concussions during the two-season period. Gunners (typically two per play) also collectively received more concussions than most roles.

# I next reviewed the videos for all concussion plays along with NGS animations for select plays. The Next Gen Stats (NGS) data for 2016 and 2017 include the specific postion of each player during the play at intervals of 0.1 seconds, as well as the player's direction of travel and orientaton. The data also include the events that occurred on the field at each time interval. Below is a small excerpt from the 66 Million rows of data for punt plays.
# 

# In[ ]:


from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE

ngs = pd.read_parquet('../input/nfl-data-preparation/NGS.parq').sort_values(['GSISID', 
コード例 #15
0
# Symptomy

path = "/Users/alubis/Downloads/"
sym = pd.read_csv("" + path + "COVID_symptoms.csv", sep=",")
sym = sym.sample(n=10000, random_state=1)
#sym['Dry.Cough'] = sym['Dry.Cough'].astype(int)

sym_grouped = sym.groupby(by=["Age", "Gender"]).sum()[[
    "Dry.Cough", 'Difficulty.in.Breathing', 'Fever', 'None_Sympton'
]]
sym_grouped = sym_grouped.reset_index()
var = ["female", "male"]
sym_fin = sym_grouped[sym_grouped.Gender.isin(var)]

hv.extension('bokeh')
sankey = hv.Sankey(data=sym_fin, kdims=["Age", "Gender"], vdims=['Dry.Cough'])
sankey.opts(cmap='Colorblind',
            label_position='left',
            edge_color='Gender',
            edge_line_width=0,
            node_alpha=1.0,
            node_width=40,
            node_sort=True,
            width=1000,
            height=800,
            bgcolor="snow",
            title="Distribution of the population of people with a dry cough")
#show(hv.render(sankey))
heatmap = hv.HeatMap(sym_fin,
                     vdims=['Dry.Cough']).sort().aggregate(function=np.sum)
heatmap.opts(