def plot_chord(G, notebook=False): """ render an interactive chord plot of the graph. If notebook is False, starts an bokeh app in a browser window, if True, renders the plot directly in the cell. """ import scConnect as cn import holoviews as hv from holoviews import opts, dim import networkx as nx import pandas as pd import numpy as np # instantiate the bokeh renderer renderer = hv.renderer('bokeh') hv.extension("bokeh") hv.output(size=250) # set visuals opts.defaults( opts.Chord( node_cmap='Category20', edge_cmap='Category20', edge_color=dim("source"), labels='cluster', node_color=dim('cluster'), inspection_policy="edges", toolbar="above", )) # Creat the Dataset object to be passed to the Chord object (NOTE: aggregates the data # leaving only one edge per cluster pair) edges = nx.to_pandas_edgelist(G) links = hv.Dataset(edges, ["source", "target"], ["weighted_score", "loyalty"]).sort( by="source").aggregate(function=np.sum) nodes = hv.Dataset(list(G.nodes), 'cluster').sort(by="cluster") # Calculate values for threshold representing given percentiles percentiles = [0, 20, 40, 60, 80, 90, 95, 99] th_values = np.percentile(links.data["weighted_score"], percentiles) th = hv.Dimension(("th", "weighted scores threshold"), default=th_values[0]) # Filter data on threshold, and return a chord element def chord_graph(th): links_ = links.select(weighted_score=(th, None)) chord = hv.Chord((links_, nodes)) return chord # instantiate the dynamic map chord = hv.DynamicMap(chord_graph, kdims=[th]).redim.values(th=th_values) # Run the server if not in notebook if notebook == False: server = renderer.app(chord, show=True, new_window=True) if notebook == True: return chord
def generate_chord_diagram(responses_count, thr_count=5): # generate dataframes as required for the plotting function plot_data = responses_count.loc[responses_count['count'] > 0, ['index', 'target', 'count']] plot_data.columns = ['source', 'target', 'value'] plot_data.index = np.arange(len(plot_data)) nodes = responses_count.loc[responses_count['count']>0, ['index', 'screen_name', 'party']].\ drop_duplicates().set_index('index').sort_index(level=0) nodes = hv.Dataset(nodes, 'index') nodes.data.head() # generate colormap for single accounts according to party affiliations person_party_cmap = dict( zip(responses_count['index'], responses_count['party'].apply(lambda row: party_cmap[row]))) # generate plot chord = hv.Chord((plot_data, nodes)).select(value=(thr_count, None)) chord.opts( hv_opts.Chord(cmap=party_cmap, edge_cmap=person_party_cmap, edge_color=hv_dim('source'), labels='screen_name', node_color=hv_dim('party'), edge_hover_line_color='cyan', node_hover_fill_color='cyan', height=700, width=700)) return chord
def modify_doc(doc, mytabs): start, end = 1, 20 samples_count = 5 slider = Slider(start=start, end=end, value=start, step=1, title="Counts") select = Select(title="Count", value="aux", options=["box", "pack", "image", "user"]) renderer = hv.renderer('bokeh')##.instance(mode='server') hv.extension('bokeh') hv.output(size=200) links = pd.DataFrame(data['links']) print(links.head(3)) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord = hv.Chord((links, nodes)).select(value=(samples_count, None)) chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(chord, doc) def slider_update(attrname, old, new): # Notify the HoloViews stream of the slider update print ("update received") samples_count = new links = pd.DataFrame(data['links']) print(links.head(3)) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord = hv.Chord((links, nodes)).select(value=(samples_count, None)) chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(chord, doc) tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot") mytabs.append(tab3) views = Tabs(tabs = mytabs) layout=row(views) doc.add_root(layout) return doc slider.on_change('value', slider_update) def select_update(attrname, old, new): # Notify the HoloViews stream of the slider update print ("update received. Old: {} New: {}".format(old, new)) select.on_change('value', select_update) # Combine the holoviews plot and widgets in a layout tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot") mytabs.append(tab3) views = Tabs(tabs = mytabs) layout=row(views) doc.add_root(layout) return doc
def plot_connection_graph(): lst = read_csv_list("results_ip_comp.csv")[1:1000] lst = [x for x in lst if float(x[-1]) < float(10000)] links = pd.DataFrame({ 'source': [x[0] for x in lst], 'target': [x[1] for x in lst]}) chord = hv.Chord(links).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str()))
def chordDiagram(person_id, df_enron): import holoviews as hv from holoviews import opts from bokeh.resources import CDN from bokeh.embed import file_html hv.extension('bokeh') df_chord = df_enron.sort_values('fromJobtitle') df_chord['index'] = df_chord.index df_links = df_chord.groupby(['fromId', 'toId']).agg({ 'date': 'count', 'sentiment': 'mean' }) df_links = df_links.reset_index()[['fromId', 'toId', 'date', 'sentiment']] df_links.columns = ['source', 'target', 'value', 'sentiment'] x = df_chord[['fromId', 'fromJobtitle']].drop_duplicates() x.columns = ['source', 'fromJobtitle'] df_links = pd.merge(df_links, x, on="source") df_links.drop_duplicates(subset='source') df_nodes = df_chord[['fromId', 'fromEmail', 'fromJobtitle' ]].drop_duplicates().reset_index(drop=True) df_nodes.columns = ['index', 'name', 'group'] df_nodes.sort_values('name') y = df_chord[['fromId', 'toId']].drop_duplicates().groupby(['fromId' ]).count().reset_index() y.columns = ['index', 'size'] df_nodes = pd.merge(df_nodes, y, on='index') df_nodes['size'] = df_nodes['size'] / 3 + 8 nodes = hv.Dataset(df_nodes, 'index') edge_df = df_links import seaborn as sns # also improves the look of plots sns.set() # set Seaborn defaults chord = hv.Chord((df_links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color='sentiment', labels='name', node_color='group', edge_alpha=0.8, edge_line_width=1.5)) final_chord = chord.select(index=person_id) plot = hv.render(final_chord, backend='bokeh') item_text = json.dumps(json_item(plot)) return item_text
def plot_chord_graph(df, col_idx, sep="; ", height=800, width=800, top_n=None, cmap='glasbey_light'): """ Plots a chord plot for the different categories Reference: https://holoviews.org/gallery/demos/bokeh/route_chord.html :param df: dataframe to analyse :param col_idx: The column name :param sep: Separator to use to split the column value :param height: height of the final image :param width: width of the final image :param top_n: Plot only top n nodes (optional) :param cmap: Colour scheme for the graph :return: """ plot_df = df[[col_idx]].dropna() plot_df[col_idx] = plot_df[col_idx].str.split(sep) # Get the nodes and node weights nodes = [x for l in plot_df[col_idx] for x in l] node_wts = Counter(nodes) nodes_df = pd.DataFrame({'Key': list(node_wts.keys()), 'Count': list(node_wts.values())}) nodes_df.sort_values('Count', inplace=True, ascending=False) nodes_df['ID'] = [i for i in range(0, nodes_df.shape[0])] nodes = hv.Dataset(nodes_df, 'ID', 'Key') # Get the edges edges = plot_df[col_idx].apply(lambda x: [(*sorted(c),) for c in combinations(x, 2)]) edges = [edge for row in edges for edge in row] edge_wts = Counter(edges) edges_df = pd.DataFrame({'Source_Dest': list(edge_wts.keys()), 'Count': list(edge_wts.values())}) edges_df.sort_values('Count', inplace=True, ascending=False) edges_df[['Source', 'Dest']] = pd.DataFrame(edges_df['Source_Dest'].tolist(), index=edges_df.index) edges_df = edges_df.merge(nodes_df[['Key', 'ID']], left_on=["Source"], right_on=["Key"]) edges_df = edges_df.merge(nodes_df[['Key', 'ID']], left_on=["Dest"], right_on=["Key"]) edges_df.rename(columns={'ID_x': 'Source_ID', 'ID_y': 'Dest_ID'}, inplace=True) chord = hv.Chord((edges_df, nodes), ['Source_ID', 'Dest_ID'], ['Count']) if top_n: most_used_lang = chord.select(ID=list(nodes_df.iloc[:top_n]['ID']), selection_mode='nodes') else: most_used_lang = chord.select(ID=nodes_df['ID'].tolist(), selection_mode='nodes') most_used_lang.opts( opts.Chord(cmap=cmap, edge_color=dim('Source_ID').str(), height=height, width=width, labels='Key', node_color=dim('ID').str()) ) return most_used_lang
def chord(data): hv.extension('bokeh') renderer=hv.renderer('bokeh') hv.output(size=230) links=pd.DataFrame(data['links']) hv.Chord(links) nodes=hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord=hv.Chord((links, nodes)).select(value=(10, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) bokeh_plot=renderer.get_plot(chord).state html=file_html(bokeh_plot,CDN,"my plot") return html
def make_chords(dict_df, type_IO='_PhysUse'): hv.extension('bokeh') hv.output(size=250) for key_dict, dataf in dict_df.items(): links = dict_df[key_dict] nodes = hv.Dataset(pd.DataFrame(node_d), 'index') chord = hv.Chord((links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) hv.save(chord, 'chord_' + str(key_dict) + str(type_IO) + '.html')
def plot_network(all_transactions): hv.extension('bokeh') hv.output(size=500) links = pd.DataFrame(generate_links(all_transactions)) print(links) hv.Chord(links) nodes = hv.Dataset(pd.DataFrame(generate_nodes(all_transactions)), 'index') nodes.data.head() chord = hv.Chord((links, nodes)).select(value=(1, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) hv.save(chord, 'image.html') print("Network analysis complete, saved as image.html")
def ChordDiagram(self): # Step 1 Get Data self.sharedNeos = self.GetShared() self.matchedNeos = self.GetMatchedNeos() hv.output(size=200) source = [] target = [] value = [] for i, sam in enumerate(self.sharedNeos): for pair in self.sharedNeos[sam]: source.append(sam + "_" + pair.split(',')[0]) target.append(sam + "_" + pair.split(',')[1]) value.append(self.sharedNeos[sam][pair]) for matched in self.matchedNeos: source.append(matched.split(',')[0]) target.append(matched.split(',')[1]) value.append(self.matchedNeos[matched]) links = pd.DataFrame({ 'source': source, 'target': target, 'value': value }) chord = hv.Chord(links) # chord = hv.Chord((links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', labels='index', node_color=dim('index').str())) p = hv.render(chord) select = Select(title="Option:", value="foo", options=["foo", "bar", "baz", "quux"]) return (p, select)
def slider_update(attrname, old, new): # Notify the HoloViews stream of the slider update print ("update received") samples_count = new links = pd.DataFrame(data['links']) print(links.head(3)) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord = hv.Chord((links, nodes)).select(value=(samples_count, None)) chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(chord, doc) tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot") mytabs.append(tab3) views = Tabs(tabs = mytabs) layout=row(views) doc.add_root(layout) return doc
def plot_chord(predictions, filename): to_use = predictions.copy() for n, row in predictions.iterrows(): if row.MLSynergy_score < 0: to_use.at[n, "MLSynergy_score"] = row.MLSynergy_score * -1 to_use.at[n, "Interaction"] = "Synergy" else: to_use.at[n, "Interaction"] = "Antagony" hv.extension('bokeh') hv.output(size=200) to_use2 = to_use[to_use.NumbDrugs == 2] links = to_use2[["Drug1", "Drug2", "MLSynergy_score", "Interaction"]] drugs = list(links["Drug1"].unique()) + list(links["Drug2"].unique()) nodes = hv.Dataset(drugs, 'Drug') chord = hv.Chord((links, nodes)).select(value=(1, None)) chord.opts(opts.Chord(cmap='Rainbow', edge_cmap='Rainbow',\ edge_color=dim('Interaction').str(), labels='Drug',\ node_color=dim('Drug').str())) output_file(filename) show(hv.render(chord)) return to_use2
def chord_diagram(self, launch): try: def normalize_value(x, total): x = int((x / total) * 1000) if x <= 0: return 1 return x df = self.df.copy() # -------------- nodes data = {} data['nodes'] = [] source_list = df['milestone_owner'].tolist() names = list(set(source_list)) person_type_dict = dict(zip(df.milestone_owner, df.type)) type_dict = {} types = list(set(df['type'].tolist())) name_dict = {} for idx, name in enumerate(names): name_dict[name] = idx for idx, name in enumerate(names): type_tmp = person_type_dict[name] index = name_dict[name] data['nodes'].append({ 'OwnerID': index, 'index': idx, 'Type': type_tmp }) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') # --------- make the links data['links'] = [] for idx, row in df.iterrows(): src = name_dict[row['project_owner']] tgt = name_dict[row['milestone_owner']] val = row['remuneration'] data['links'].append({ 'source': src, 'target': tgt, 'value': val }) links = pd.DataFrame(data['links']) # get the individual links links = links.groupby(['source', 'target'])['value'].sum() links = links.reset_index() total = links['value'].sum() links['value'] = links['value'].apply( lambda x: normalize_value(x, total)) # filter for top percentile quantile_val = links['value'].quantile( self.chord_data['percentile_threshold']) links = links[links['value'] >= quantile_val] #logger.warning('after quantile filter:%s',len(links)) chord_ = hv.Chord((links, nodes), ['source', 'target'], ['value']) chord_.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Type', node_color=dim('index').str(), width=1000, height=1000)) return chord_ except Exception: logger.error('chord diagram', exc_info=True)
# Specify the plot render to use hv.extension('bokeh') hv.output(size=300) # Chord diagram with interactive components edgeList = edges[['Source', 'Target', 'weight']] # Within the holoviews dataset object we define kdim and vdims # Kdims are the independent variables which is Id in this example # Vdims are dependent variables cent_value and rank_value # By defining these here were can use them when creating the graph nodeDS = hv.Dataset(nodes_extended, 'Id', ['cent_value', 'rank_value']) # Coloured interactive chord diagram with node size determined by Vdims kwargs = dict(width=300, height=300, xaxis=None, yaxis=None) opts.defaults(opts.Nodes(**kwargs), opts.Graph(**kwargs)) graph = hv.Graph((edgeList, nodeDS), label='GoT season 1') graph.opts(cmap='Category20', edge_cmap='Category20', node_size='cent_value', edge_line_width=1, node_color=dim('Id').str(), edge_color=dim('Source').str()) graph.opts( opts.Chord(inspection_policy='nodes', tools=['hover'], edge_hover_line_color='green', node_hover_fill_color='red')) hv.save(graph, 'node_size_chord.html')
def hv_chord(contrast, frequency, threshold, stats, re_order_ind, label_names, des, freq_vect): """ Makes a holoview/boken chord diagram :param contrast: Which contrast in the glm data are we looking at :param frequency: The index of the frequency in the freq_vect we are looking at :param threshold: The percentile threshold for plotting (so the plot isn't messy) :param stats: The stats array we are plotting from :param re_order_ind: The indices for re-ordering the data into the Y-order of the parcels in the brain :param label_names: The names for each parcel :param des: The design matrix from the GLM :param freq_vect: The frequency vector containing the actual frequencies of each :return: """ dtypes = np.dtype([ ('source', int), ('target', int), ('value', int), ]) data = np.empty(0, dtype=dtypes) links = pd.DataFrame(data) square = stats[contrast,:,:,frequency] #square = stats[contrast,:,:,:].sum(axis=2) thresh_mask = square > np.percentile(square, threshold) square[~thresh_mask] = 0 #reorder X_sorted = np.copy(square) # Sort along first dim X_sorted = X_sorted[re_order_ind,:] # Sort along second dim X_sorted = X_sorted[:,re_order_ind] labels_sorted = np.array(label_names)[re_order_ind] # loop through Y axis of matrix counter = 0 for i in range(X_sorted.shape[0]): for ii in range(X_sorted.shape[1]): links.loc[counter] = [i, ii, int(X_sorted[i,ii])] counter +=1 # make label index dtypes = np.dtype([ ('name', int), ('group', int), ]) data = np.empty(0, dtype=dtypes) nodes = pd.DataFrame(data) for i in range(X_sorted.shape[0]): nodes.loc[i] = [labels_sorted[i], 1] graph = hv.Chord((links, hv.Dataset(nodes, 'index'))) graph.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str()) ) graph.relabel('Directed Graph').opts(directed=True) graph.opts(title=f'{des.contrast_names[contrast]} Partial Directed Coherence @ {int(freq_vect[frequency])}Hz') return graph
def plot_connectogram(connectivity_matrix, atlas_labels, atlas_indices, threshold=None, chord_type=int, dst_dir=None, filename=None): '''Plot a connectivity matrix as a connectogram. Parameters ---------- connectivity_matrix : np.array A symmetric connectivity matrix. atlas_labels : pd.Series or list A list-like object providing names of each atlas region. atlas_indices : pd.Series or list A list-like object providing indices of each atlas region. threshold : float or int, optional Apply a threshold to the connectivity matrix before plotting. Only connectvity values that are greater or equal than this threshold are visualized. chord_type : int or float, optional Convert the connectivity values to float or int type. If the weight values are integers, they define the number of chords to be drawn between the source and target nodes directly. If the weights are floating point values, they are normalized to a default of 500 chords, which are divided up among the edges. Any non-zero weight will be assigned at least one chord. The default is int. dst_dir : str, optional Name of the output directory. The default is None. filename : str, optional Name of the file (must be provided including the extenstion). The default is None. Returns ------- connectogram_plot : holoviews.element.graphs.Chord The connectogram plot object. ''' # copy matrix connectivity_matrix = connectivity_matrix.copy() # set lower triangle to NaN (since matrix is symmetric we want to remove duplicates) il = np.tril_indices(len(connectivity_matrix)) connectivity_matrix[il] = np.nan # convert to pd.DataFrame for further processing connectivity_matrix_df = pd.DataFrame(data=connectivity_matrix, columns=atlas_indices, index=atlas_indices) # Ensure that index name has the default name 'Index' if connectivity_matrix_df.index.name: connectivity_matrix_df.index.name = None # stack connectivity_matrix connectivity_matrix_stacked = connectivity_matrix_df.stack().reset_index() connectivity_matrix_stacked.columns = ['source', 'target', 'value'] if chord_type == int: connectivity_matrix_stacked = connectivity_matrix_stacked.astype(int) # reduce to only connections that are not 0 connectivity_matrix_stacked = connectivity_matrix_stacked.loc[ connectivity_matrix_stacked['value'] != 0, :] # Optional: reduce to only connections >= threshold if threshold: connectivity_matrix_stacked = connectivity_matrix_stacked.loc[ connectivity_matrix_stacked['value'] >= threshold, :] # add node infos and show only nodes that also have a connection after subsetting to # connections that are not zero and (optionally) connections that pass the specified threshold atlas_df = pd.DataFrame({ 'region_id': atlas_indices, 'label': atlas_labels }) nodes_to_show = np.unique(connectivity_matrix_stacked[['source', 'target']].values) atlas_df = atlas_df.loc[atlas_df['region_id'].isin(nodes_to_show)] nodes = hv.Dataset(atlas_df, 'region_id', 'label') # create plot connectogram_plot = hv.Chord((connectivity_matrix_stacked, nodes), ['source', 'target'], ['value']) connectogram_plot.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), node_color=dim('region_id').str(), labels='label')) # save plot if dst_dir: if not filename: raise ValueError('Please provide a filename') dst_path = dst_dir + filename hv.save(connectogram_plot, dst_path) # FIXME: this doesn't work for me in Spyder show(hv.render(connectogram_plot)) return connectogram_plot
"index": 1 }, { "name": "Cytosine", "index": 2 }, { "name": "Guanine", "index": 3 }, { "name": "Thymine", "index": 4 }] nodes = hv.Dataset(pd.DataFrame(nodos), 'index') # generate the diagram with given options # these options create a diagram with labels, nodes, and edges # comment these two lines if you want to make a all-black diagram #chord = hv.Chord((links, nodes)) #chord.opts( # opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), # labels="name", node_color=dim('index').str())) # use these options to create a diagram in black with no labels, no nodes, and no edges # these options create a more artsy diagram # comment these lines if you want to keep the oob options chord = hv.Chord(links) chord.opts( opts.Chord(edge_color="black", node_color="black", node_size=1, edge_visible=False))
def chord_diagram(df_flow_MN, n_cycles, dir_path): ''' Function to plot chord diagram for flows across industry sectors ''' df_flow_MN = df_flow_MN.loc[df_flow_MN['Option'] == 'Industrial'] df_flow_MN = df_flow_MN[['Cycle', 'Generator Industry Sector', 'Flow transferred', 'RETDF Industry Sector', 'Recycled flow', 'Industry sector']] Flows = {'waste': {'Generator Industry Sector': 'source', 'RETDF Industry Sector': 'target', 'Flow transferred': 'value'}, 'recyled': {'RETDF Industry Sector': 'source', 'Industry sector': 'target', 'Recycled flow': 'value'}} df_links = pd.DataFrame() for Flow, Link in Flows.items(): cols = list(Link.keys()) df_links_aux = df_flow_MN[['Cycle'] + cols] df_links_aux = df_links_aux.groupby(['Cycle'] + cols[0:2], as_index=False).sum() df_links_aux.drop(columns='Cycle', inplace=True) df_links_aux = df_links_aux.groupby(cols[0:2], as_index=False).sum() df_links_aux[cols[2]] = df_links_aux[cols[2]]/n_cycles df_links_aux['flow'] = Flow df_links_aux.rename(columns=Link, inplace=True) if Flow == 'waste': # 1 metric ton/yr df_links_aux = df_links_aux[df_links_aux['value'] >= 1000] df_links = pd.concat([df_links, df_links_aux], ignore_index=True, sort=True, axis=0) df_links = df_links.loc[df_links['source'] != df_links['target']] Nodes = set(df_links['source'].unique().tolist() + df_links['target'].unique().tolist()) Nodes = {node: i for i, node in enumerate(Nodes)} df_links = df_links.replace({'source': Nodes, 'target': Nodes}) df_nodes = pd.DataFrame({'index': [idx for idx in Nodes.values()], 'name sector': [name for name in Nodes.keys()]}) df_nodes['name'] = df_nodes['index'].apply(lambda x: f'Sector {x+1}') for Flow in ['waste', 'recyled']: try: df_links_plot = df_links.loc[df_links['flow'] == Flow, ['source', 'target', 'value']] sources = df_links_plot['source'].unique().tolist() search = df_links_plot.loc[~df_links_plot['target'] .isin(sources), 'target'].unique().tolist() for s in search: df_links_plot = pd.concat([df_links_plot, pd.DataFrame({'source': [s], 'target': [s], 'value': [10**-50]})], ignore_index=True, sort=True, axis=0) hv.Chord(df_links_plot) nodes = hv.Dataset(df_nodes, 'index') chord = hv.Chord((df_links_plot, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) df_nodes.to_csv(f'{dir_path}/chord_{Flow}.csv', sep=',', index=False) hv.save(chord, f'{dir_path}/chord_{Flow}.pdf', fmt='pdf') except ValueError: print(f'There are not records for {Flow} activities')
'source': edge[0], 'target': edge[1], 'weight': 1, 'node': node[0], 'package': node[1]['package'], }) except KeyError: #takes care of base module imports pass df = pd.DataFrame(graph_data) return df if __name__ == '__main__': test = ImportGraph( directory=Path('/home/dal/PycharmProjects/pyjanitor_fork/janitor')) df = test.output_graph() hv.extension('bokeh') defaults = dict(width=1000, height=1000, padding=0.1) chord = hv.Chord((df[['source', 'target', 'weight']], df[['node', 'package']])) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='node', node_color=dim('package').str(), width=1000, height=1000)) hv.save(chord, 'test.html')
def hv_generator(ontology_id_list): try: mentions = {} for selectedID in ontology_id_list: if selectedID in ontoterminology.keys(): # print("ontoterminology selectedID 'NAME'", ontoterminology[selectedID]['NAME']) # print("set: ", set(ontoterminology[selectedID]['PMID'])) mentions[ontoterminology[selectedID]['NAME']] = set( ontoterminology[selectedID]['PMID']) # print("got one: ", mentions[ontoterminology[selectedID]['NAME']]) else: # print("No mentions found for ",selectedID) pass # print("loaded mentions", mentions) chn_list = [] for source in mentions: # print("plain source: ", source) for target in mentions: if source.strip() == "" or target.strip() == "": # print("blank source or target") pass elif source.strip() == target.strip(): # print("intersection: ", source.strip()) pass else: intersection = mentions[source].intersection( mentions[target]) if len(intersection) > 0: chn = { "source": source, "target": target, "PMID": len(intersection) } #inverse duplicate checking here: add_item = True for k in chn_list: if source + target == k['target'] + k['source']: add_item = False if add_item: chn_list.append(chn) print("finished checking for inverse duplicates..") # print("length of intersection list: ", len(chn_list)) # print(chn_list) # Build the data table expected by the visualisation library links = pd.DataFrame.from_dict(chn_list) node_names = links.source.append(links.target) node_names = node_names.unique() # print(node_names) node_info = { "index": node_names, "name": node_names, "group": [1] * len(node_names) } # print(node_info) nodes = hv.Dataset(pd.DataFrame(node_info), 'index') nodes.data.head() chord = hv.Chord((links, nodes)).select( value=(0, None)) # value=5 - changing to 0 works for more? chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color='source', labels='name', node_color='index')) renderer = hv.renderer('bokeh') hvplot = renderer.get_plot(chord) html = renderer.static_html(hvplot) return json.dumps(html) except Exception as e: print(e) traceback.print_exc() html_error_message = "<!doctype html><div><h4>ERROR CREATING TABLE - no associations found, or possibly some of the ID's were incorrect?</h4></div></html>" return (json.dumps(html_error_message))
def chordGraphByMajor(self, coefficient=0.5, pval=0.05, outputName='majorGraph', outputSize=200, imageSize=300, showGraph=True, outputImage=True): """Creates a chord graph between available majors through averaging and filtering both correlation coefficients and P-values. Outputs to an html file, PNG file, and saves the underlying data by default. Note: The 'classDept' column as set by :obj:`defineWorkingColumns` must have been defined in your dataset to filter by major. Args: coefficient (:obj:`float`, optional): Minimum correlation coefficient to filter correlations by. pval (:obj:`float`, optional): Maximum P-value to filter correlations by. Defaults to 0.05 (a standard P-value limit used throughout the sciences) outputName (:obj:`str`, optional): First part of the outputted file names, e.g. fileName.csv, fileName.html, etc. outputSize (:obj:`int`, optional): Size (units unknown) of html graph to output. 200 by default. imageSize (:obj:`int`, optional): Size (units unknown) of image of the graph to output. 300 by default. Increase this if node labels are cut off. showGraph (:obj:`bool`, optional): Whether or not to open a browser and display the interactive graph that was created. Defaults to :obj:`True`. outputImage (:obj:`bool`, optional): Whether or not to export an image of the graph. Defaults to :obj:`True`. """ # M: The parameters should usually be changed when the function is called!! # M: initialized holoview of size outputSize hv.output(size=outputSize) # M: creates a copy of df and sets course1 and course2 to the elements in the respective rows w # substring index 0 to the first number, exclusive (if number is first, element would be empty) majorFiltered = self.df.copy() # M: added the makeMissingValuesNanInColumn so that none of the entries are empty # majorFiltered.removeNanInColumn('course1') # majorFiltered.removeNanInColumn('course2') majorFiltered['course1'] = majorFiltered['course1'].apply( lambda course: re.findall('\A\D+', course)[0]) majorFiltered['course2'] = majorFiltered['course2'].apply( lambda course: re.findall('\A\D+', course)[0]) # sets majors to the unique remaining tuples of course1 majors = majorFiltered['course1'].unique().tolist() majors.sort() majorCorrelations = [] usedMajors = [] # M: Makes the data in corr, P-value, and #students attributes numeric majorFiltered['corr'] = pd.to_numeric(majorFiltered['corr']) majorFiltered['P-value'] = pd.to_numeric(majorFiltered['P-value']) majorFiltered['#students'] = pd.to_numeric(majorFiltered['#students']) count = 0 # M: loops through unique majors in course 1(those w/o numerical beginning) for major in majors: # Adds 1 to count then prints the number of elements in majors count += 1 print(str(count) + ' / ' + str(len(majors)) + ' majors') # M: sets filteredToMajor to the majorFiltered where course 1 column is equal to 'major' in the majors list filteredToMajor = majorFiltered.loc[majorFiltered['course1'] == major] # M: sets connectedMajors to the unique values in course2 column connectedMajors = filteredToMajor['course2'].unique().tolist() # M: loops through the unique majors in course 2 (those w/o numerical beginning) for targetMajor in connectedMajors: # M: Sets filteredToMajorPair to the tuple(s) where course 1 is 'major' and course 2 is 'targetMajor' filteredToMajorPair = filteredToMajor.loc[ filteredToMajor['course2'] == targetMajor] # M: Finds means for corr, PVal, and Students avgCorr = int(filteredToMajorPair['corr'].mean() * 100) avgPVal = filteredToMajorPair['P-value'].mean() avgStudents = filteredToMajorPair['#students'].mean() # M: ensures no corr following the constraints are counted twice and adds it to the list of correlations if avgCorr > (coefficient * 100) and major != targetMajor and avgPVal < pval: if (targetMajor, major) not in usedMajors: usedMajors.append((major, targetMajor)) majorCorrelations.append((major, targetMajor, avgCorr, avgPVal, avgStudents)) # M: Tells us how many correlations found if len(majorCorrelations) == 0: print('Error: no valid correlations found.') return print(str(len(majorCorrelations)) + ' valid major correlations found.') # M: Sets output to majorCorrelations and sets the column names output = pd.DataFrame(majorCorrelations, columns=('source', 'target', 'corr', 'P-value', '#students')) # M: Sets newMajors to have the unique sources and targets (by putting them in a set) newMajors = set(output['source']) newMajors.update(output['target']) # M: Sets sortedMajors to one list of sources and targets, all sorted sortedMajors = sorted(list(newMajors)) # M: sets 'nodes' to be sortedMajors w/ column name 'name' nodes = pd.DataFrame(sortedMajors, columns=['name']) # M: added this to check the value of output source and target before the apply print("source before:", output['source']) print("target before:", output['target']) # M: output source and target are changed to numeric instead of string objects to represent the sources and targets output['source'] = output['source'].apply( lambda major: nodes.index[nodes['name'] == major][0]) output['target'] = output['target'].apply( lambda major: nodes.index[nodes['name'] == major][0]) # M: Added this to check what each value would be set to after the apply print("index:", nodes.index[nodes['name'] == major][0]) print("source now:", output['source']) print("target now:", output['target']) print(output['source'].dtype) # M: constructs the chord graph output.to_csv(outputName + '.csv', index=False) hvNodes = hv.Dataset(nodes, 'index') chord = hv.Chord((output, hvNodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) graph = hv.render(chord) output_file(outDir + outputName + '.html', mode='inline') # M: Saves and shows graph if showGraph true save(graph) if showGraph: show(graph) chord.opts(toolbar=None) # M: changes size to imageSize then saves it to outDir +outputName + '.png' if outputImage: hv.output(size=imageSize) try: export_png(hv.render(chord), filename=outDir + outputName + '.png') except RuntimeError: print( "Runtime error in chordGraphByMajors in line with export_png(hv.render(chord), filename=outDir +outputName + '.png')" ) # M: changes size to outputSize hv.output(size=outputSize)
PIOT_Z['group'] = PIOT_Z.index1.astype(str).str[0:2] df = PIOT_Z[['index1', 'group']].copy().reset_index().drop('index', 1) Key = pd.DataFrame(df.group.unique()).reset_index() Key.columns = ['group', 'NAICS_2'] df_fin = df.merge(Key, left_on=['group'], right_on=['NAICS_2']) df_fin = df_fin.drop(['group_x', 'NAICS_2'], 1) df_fin.columns = ['index', 'group'] node_desc = df_fin.to_dict('records') nodes = hv.Dataset(pd.DataFrame(node_desc), 'index') chord = hv.Chord((links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) hv.save(chord, 'chord_piotalt.html') ###### Do the same thing for the MIOT ############## # MIOT = "SIOT_BEA2012AR_CxC.xlsx" # Z_Monetary = pd.read_excel(MIOT, usecols = 'A: OP', skipfooter = 6, header = 0, index_col = 0) # Z_Monetary = format_idx_col(Z_Monetary) # tidy_MIOT = pd.melt(Z_Monetary.reset_index(), id_vars = 'index') # Probs = tidy_MIOT[tidy_MIOT['value'] < 0].index # tidy_MIOT = elim_nan(tidy_MIOT) # tidier_MIOT = tidy_MIOT.copy()
- *target*: destination of the connection - *value*: the strength of the connection - *sign*: whether the connection is positive (+1) or negative (-1) def mat2chord(vec, t=0, cthresh=0.25): def mat2links(x, ids): links = [] for i in range(x.shape[0]): for j in range(i): links.append({'source': ids[i], 'target': ids[j], 'value': np.abs(x[i, j]), 'sign': np.sign(x[i, j])}) return pd.DataFrame(links) links = mat2links(tc.vec2mat(vec)[:, :, t], rois['ID']) chord = hv.Chord((links, hv.Dataset(rois, 'ID'))).select(value=(cthresh, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Region', node_color=dim('ID').str()) ) return chord Here's the first correlation matrix: hmap = mat2chord(isfc, t=0) # This is to render chord plot in jupyter-book html_repr = file_html(pn.Column(hmap).get_root(), CDN) IPython.display.HTML(html_repr) Now let's create an interactive figure to display the dynamic network patterns, with a slider for controlling the timepoint: def timecorr_explorer(x, cthresh=0.25): hv.output(max_frames=x.shape[0])
def plot4(): df_data = pd.read_csv(file, sep=';', header=0, index_col=False) if not Path(df_data[:-4] + '.out.csv').is_file(): df_data = pd.read_csv(df_data, sep=';', header=0, index_col=False) p = [] d = [] e = [] f = [] hold = df_data.shape[0] # loop that sets values first in lists for columns i = 0 while i < hold: # Fromnames + delete row of names once listed b = list(df_data.columns.values) del b[0] a1 = len(b) - i a = list(a1 * (df_data.iloc[i, 0], )) del a[:1] # Tonames + delete names that are already linked p = b del p[:(i + 1)] # weights + delete weights that are already linked c = df_data.iloc[:, 1].tolist() del c[:(i + 1)] # remove people linked to themselves for ele in c: if ele == 1: c.remove(ele) e = list(e + a) d = list(d + p) f = list(f + c) i += 1 # df from which the plot will be made df_plot = pd.DataFrame(columns=['from', 'to', 'weight']) # puts said lists in columns df_plot['from'] = e df_plot['to'] = d df_plot['weight'] = f # delete edges with weight 0 df_plot = df_plot.loc[df_plot['weight'] != 0.0] df_plot.to_csv(df_data[:-4] + '.out.csv', sep='\t', encoding='utf-8', index=False) else: df_plot = pd.read_csv(df_data[:-4] + '.out.csv', sep='\t', encoding='utf-8', index_col=False) chord = hv.Chord(df_plot) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('weight').str(), labels='name', node_color=dim('index').str())) holder2 = chord renderer = hv.renderer('bokeh') m = renderer.get_plot(holder2).state return json.dumps(json_item(m))