def plot_chordgraph(G, node_data=None, label_col='index', size=200, cmap='Category20', title='', draw_labels=True, edge_color='E-Group', node_color='index'): hv.extension('bokeh') hv.output(size=size) # Get the edge list of the graph H = nx.to_undirected(G) edge_data = nx.to_pandas_edgelist(H) # Enforce that the value column is in the right position in the dataframe print(edge_data.columns) val_loc = 2 cur_loc = edge_data.columns.get_loc("value") cols = list(edge_data.columns.values) swap = cols[val_loc] cols[val_loc] = cols[cur_loc] cols[cur_loc] = swap edge_data = edge_data.reindex(columns=cols) # Account for passed in node dataset if node_data is not None: node_dataset = hv.Dataset(node_data, 'index') chord = hv.Chord((edge_data, node_dataset), label=title).select(value=(5, None)) else: chord = hv.Chord(edge_data, label=title) label_col = 'index' # Draw the desired graph if draw_labels is True: chord.opts( hv.opts.Chord( cmap=cmap, edge_cmap=cmap, edge_color=hv.dim(edge_color).str(), node_color=hv.dim(node_color).str(), labels=label_col, )) else: chord.opts( hv.opts.Chord(cmap=cmap, edge_cmap=cmap, edge_color=hv.dim(edge_color).str(), node_color=hv.dim(node_color).str())) c = hv.render(chord, backend='bokeh') return c
def plot_network(all_transactions): hv.extension('bokeh') hv.output(size=500) links = pd.DataFrame(generate_links(all_transactions)) print(links) hv.Chord(links) nodes = hv.Dataset(pd.DataFrame(generate_nodes(all_transactions)), 'index') nodes.data.head() chord = hv.Chord((links, nodes)).select(value=(1, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) hv.save(chord, 'image.html') print("Network analysis complete, saved as image.html")
def chord(data): hv.extension('bokeh') renderer=hv.renderer('bokeh') hv.output(size=230) links=pd.DataFrame(data['links']) hv.Chord(links) nodes=hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord=hv.Chord((links, nodes)).select(value=(10, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) bokeh_plot=renderer.get_plot(chord).state html=file_html(bokeh_plot,CDN,"my plot") return html
def generate_chord_diagram(responses_count, thr_count=5): # generate dataframes as required for the plotting function plot_data = responses_count.loc[responses_count['count'] > 0, ['index', 'target', 'count']] plot_data.columns = ['source', 'target', 'value'] plot_data.index = np.arange(len(plot_data)) nodes = responses_count.loc[responses_count['count']>0, ['index', 'screen_name', 'party']].\ drop_duplicates().set_index('index').sort_index(level=0) nodes = hv.Dataset(nodes, 'index') nodes.data.head() # generate colormap for single accounts according to party affiliations person_party_cmap = dict( zip(responses_count['index'], responses_count['party'].apply(lambda row: party_cmap[row]))) # generate plot chord = hv.Chord((plot_data, nodes)).select(value=(thr_count, None)) chord.opts( hv_opts.Chord(cmap=party_cmap, edge_cmap=person_party_cmap, edge_color=hv_dim('source'), labels='screen_name', node_color=hv_dim('party'), edge_hover_line_color='cyan', node_hover_fill_color='cyan', height=700, width=700)) return chord
def plot_chord(data_input, from_column, to_column, **kwargs): if from_column in data_input.columns and to_column in data_input.columns: freq_input = pd.crosstab(data_input[from_column], data_input[to_column]) freq_input = freq_input.reset_index() print("frequence table of " + " " + from_column + " " + to_column) print(freq_input) chord_input = pd.melt(freq_input, id_vars=from_column) chord_index_from = pd.DataFrame(data_input[from_column].unique()) chord_index_to = pd.DataFrame(data_input[to_column].unique()) chord_index = chord_index_from.append(chord_index_to, ignore_index=True).dropna() chord_dict = chord_index[0].to_dict() chord_dict = dict((v, k) for k, v in chord_dict.items()) chord_input.replace(chord_dict, inplace=True) return hv.Chord(chord_input) else: print( "re-format the data input, give three arguments: input dataframe, from column, to column" )
def modify_doc(doc, mytabs): start, end = 1, 20 samples_count = 5 slider = Slider(start=start, end=end, value=start, step=1, title="Counts") select = Select(title="Count", value="aux", options=["box", "pack", "image", "user"]) renderer = hv.renderer('bokeh')##.instance(mode='server') hv.extension('bokeh') hv.output(size=200) links = pd.DataFrame(data['links']) print(links.head(3)) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord = hv.Chord((links, nodes)).select(value=(samples_count, None)) chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(chord, doc) def slider_update(attrname, old, new): # Notify the HoloViews stream of the slider update print ("update received") samples_count = new links = pd.DataFrame(data['links']) print(links.head(3)) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord = hv.Chord((links, nodes)).select(value=(samples_count, None)) chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(chord, doc) tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot") mytabs.append(tab3) views = Tabs(tabs = mytabs) layout=row(views) doc.add_root(layout) return doc slider.on_change('value', slider_update) def select_update(attrname, old, new): # Notify the HoloViews stream of the slider update print ("update received. Old: {} New: {}".format(old, new)) select.on_change('value', select_update) # Combine the holoviews plot and widgets in a layout tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot") mytabs.append(tab3) views = Tabs(tabs = mytabs) layout=row(views) doc.add_root(layout) return doc
def plot_connection_graph(): lst = read_csv_list("results_ip_comp.csv")[1:1000] lst = [x for x in lst if float(x[-1]) < float(10000)] links = pd.DataFrame({ 'source': [x[0] for x in lst], 'target': [x[1] for x in lst]}) chord = hv.Chord(links).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str()))
def chordDiagram(person_id, df_enron): import holoviews as hv from holoviews import opts from bokeh.resources import CDN from bokeh.embed import file_html hv.extension('bokeh') df_chord = df_enron.sort_values('fromJobtitle') df_chord['index'] = df_chord.index df_links = df_chord.groupby(['fromId', 'toId']).agg({ 'date': 'count', 'sentiment': 'mean' }) df_links = df_links.reset_index()[['fromId', 'toId', 'date', 'sentiment']] df_links.columns = ['source', 'target', 'value', 'sentiment'] x = df_chord[['fromId', 'fromJobtitle']].drop_duplicates() x.columns = ['source', 'fromJobtitle'] df_links = pd.merge(df_links, x, on="source") df_links.drop_duplicates(subset='source') df_nodes = df_chord[['fromId', 'fromEmail', 'fromJobtitle' ]].drop_duplicates().reset_index(drop=True) df_nodes.columns = ['index', 'name', 'group'] df_nodes.sort_values('name') y = df_chord[['fromId', 'toId']].drop_duplicates().groupby(['fromId' ]).count().reset_index() y.columns = ['index', 'size'] df_nodes = pd.merge(df_nodes, y, on='index') df_nodes['size'] = df_nodes['size'] / 3 + 8 nodes = hv.Dataset(df_nodes, 'index') edge_df = df_links import seaborn as sns # also improves the look of plots sns.set() # set Seaborn defaults chord = hv.Chord((df_links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color='sentiment', labels='name', node_color='group', edge_alpha=0.8, edge_line_width=1.5)) final_chord = chord.select(index=person_id) plot = hv.render(final_chord, backend='bokeh') item_text = json.dumps(json_item(plot)) return item_text
def plot_chord_graph(df, col_idx, sep="; ", height=800, width=800, top_n=None, cmap='glasbey_light'): """ Plots a chord plot for the different categories Reference: https://holoviews.org/gallery/demos/bokeh/route_chord.html :param df: dataframe to analyse :param col_idx: The column name :param sep: Separator to use to split the column value :param height: height of the final image :param width: width of the final image :param top_n: Plot only top n nodes (optional) :param cmap: Colour scheme for the graph :return: """ plot_df = df[[col_idx]].dropna() plot_df[col_idx] = plot_df[col_idx].str.split(sep) # Get the nodes and node weights nodes = [x for l in plot_df[col_idx] for x in l] node_wts = Counter(nodes) nodes_df = pd.DataFrame({'Key': list(node_wts.keys()), 'Count': list(node_wts.values())}) nodes_df.sort_values('Count', inplace=True, ascending=False) nodes_df['ID'] = [i for i in range(0, nodes_df.shape[0])] nodes = hv.Dataset(nodes_df, 'ID', 'Key') # Get the edges edges = plot_df[col_idx].apply(lambda x: [(*sorted(c),) for c in combinations(x, 2)]) edges = [edge for row in edges for edge in row] edge_wts = Counter(edges) edges_df = pd.DataFrame({'Source_Dest': list(edge_wts.keys()), 'Count': list(edge_wts.values())}) edges_df.sort_values('Count', inplace=True, ascending=False) edges_df[['Source', 'Dest']] = pd.DataFrame(edges_df['Source_Dest'].tolist(), index=edges_df.index) edges_df = edges_df.merge(nodes_df[['Key', 'ID']], left_on=["Source"], right_on=["Key"]) edges_df = edges_df.merge(nodes_df[['Key', 'ID']], left_on=["Dest"], right_on=["Key"]) edges_df.rename(columns={'ID_x': 'Source_ID', 'ID_y': 'Dest_ID'}, inplace=True) chord = hv.Chord((edges_df, nodes), ['Source_ID', 'Dest_ID'], ['Count']) if top_n: most_used_lang = chord.select(ID=list(nodes_df.iloc[:top_n]['ID']), selection_mode='nodes') else: most_used_lang = chord.select(ID=nodes_df['ID'].tolist(), selection_mode='nodes') most_used_lang.opts( opts.Chord(cmap=cmap, edge_color=dim('Source_ID').str(), height=height, width=width, labels='Key', node_color=dim('ID').str()) ) return most_used_lang
def getChordDiagram(messages_df): # name_id = messages_df[['sender_id', 'name']].astype(str) # name_id = name_id.tail(200) # name_id = name_id.drop_duplicates().set_index('sender_id').to_dict() names_dict = {'17289343': 'Ben Pagel', '18257442': 'Logan Camilletti', '23885372': 'Jake Linford', '41651233': 'Shad Karlson', '41651237': 'Jackson Esplin', '43797901': 'John Hammond', '52396192': 'Jon Michael Ossola', '9803929': 'Zach Preator'} df = messages_df[['sender_id', 'favorited_by', 'fav_num']] df = df[df['fav_num'] > 0] df = df[['sender_id', 'favorited_by']] df = df.explode('favorited_by') df = df.groupby(df.columns.tolist()).size().reset_index().rename(columns={0: 'likes'}) df['sender_id'] = df['sender_id'].astype(str) df['favorited_by'] = df['favorited_by'].astype(str) df = df.replace(names_dict) names = list(set(df["favorited_by"].unique().tolist() + df["sender_id"].unique().tolist())) names_dataset = hv.Dataset(pd.DataFrame(names, columns=["Name"])) chord = hv.Chord((df, names_dataset)) # Bokeh # hv.extension("bokeh") # plot = chord.opts(labels='Name', # node_color='Name', # edge_color='favorited_by', # label_index='sender_id', # cmap='Category10', # edge_cmap='Category10', # width=1000, # height=1000, # bgcolor="black", # label_text_color="white") # output_file('templates/likes.html') # save(hv.render(chord)) # hv.save(plot, 'static/likes.svg', fmt='auto') # Matplotlib hv.extension('matplotlib') plot = chord.opts(labels='Name', node_color='Name', edge_color='favorited_by', label_index='sender_id', cmap='Category10', edge_cmap='Category10', bgcolor="black") # output_file('templates/likes.html') # save(plot) hv.save(plot, 'static/likes.svg', fmt='auto') hv.save(plot, 'templates/likes.html', fmt='auto') # export_svg(plot, filename='static/likes.svg') return plot
def make_chords(dict_df, type_IO='_PhysUse'): hv.extension('bokeh') hv.output(size=250) for key_dict, dataf in dict_df.items(): links = dict_df[key_dict] nodes = hv.Dataset(pd.DataFrame(node_d), 'index') chord = hv.Chord((links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) hv.save(chord, 'chord_' + str(key_dict) + str(type_IO) + '.html')
def plot_chord(df): # Making and saving and showing Chord cities = list(set(df.cod_name_x.unique()).union(set(df.cod_name_y.unique()))) cities_dataset = hv.Dataset(pd.DataFrame(cities, columns=["city"])) chord = hv.Chord((df, cities_dataset)) chord.opts(hv.opts.Chord(height=400, width=400, title="Flow of families among municipalities", node_cmap="Category20", edge_cmap='Category20', edge_color='cod_name_x', labels='city', node_color='city', bgcolor="black", edge_alpha=0.8, edge_line_width=2, node_size=25, label_text_color="white")) # To save to figure # hv.extension("matplotlib") # hv.output(fig='svg', size=250) hv.save(chord, '../other/PS2_validation/chord.html')
def chord_chard(data): """ Takes in processed dataframe for multilabel classification problem and computes label co-occurences. Draws chord chard using bokeh and local server. """ hv.extension('bokeh') hv.output(size=200) labels_only = data.drop(labels = ['PIMS_ID', 'language', 'description', 'all_logs', 'text'], axis=1) cooccurrence_matrix = np.dot(labels_only.transpose(),labels_only) cooccurrence_matrix_diagonal = np.diagonal(cooccurrence_matrix) with np.errstate(divide='ignore', invalid='ignore'): cooccurrence_matrix_percentage = np.nan_to_num(np.true_divide(cooccurrence_matrix, cooccurrence_matrix_diagonal[:, None])) coocc = labels_only.T.dot(labels_only) diagonal = np.diagonal(coocc) co_per = np.nan_to_num(np.true_divide(coocc, diagonal[:, None])) df_co_per = pd.DataFrame(co_per) df_co_per = pd.DataFrame(data=co_per, columns=coocc.columns, index=coocc.index) #replace diagonal with 0: coocc.values[[np.arange(coocc.shape[0])]*2] = 0 coocc = coocc.mask(np.triu(np.ones(coocc.shape, dtype=np.bool_))) coocc = coocc.fillna(0) data = hv.Dataset((list(coocc.columns), list(coocc.index), coocc), ['source', 'target'], 'value').dframe() data['value'] = data['value'].astype(int) chord = hv.Chord(data) plot = chord.opts( node_color='index', edge_color='source', label_index='index', cmap='Category20', edge_cmap='Category20', width=400, height=400) bokeh_server = pn.Row(plot).show(port=1234)
def ChordDiagram(self): # Step 1 Get Data self.sharedNeos = self.GetShared() self.matchedNeos = self.GetMatchedNeos() hv.output(size=200) source = [] target = [] value = [] for i, sam in enumerate(self.sharedNeos): for pair in self.sharedNeos[sam]: source.append(sam + "_" + pair.split(',')[0]) target.append(sam + "_" + pair.split(',')[1]) value.append(self.sharedNeos[sam][pair]) for matched in self.matchedNeos: source.append(matched.split(',')[0]) target.append(matched.split(',')[1]) value.append(self.matchedNeos[matched]) links = pd.DataFrame({ 'source': source, 'target': target, 'value': value }) chord = hv.Chord(links) # chord = hv.Chord((links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', labels='index', node_color=dim('index').str())) p = hv.render(chord) select = Select(title="Option:", value="foo", options=["foo", "bar", "baz", "quux"]) return (p, select)
def slider_update(attrname, old, new): # Notify the HoloViews stream of the slider update print ("update received") samples_count = new links = pd.DataFrame(data['links']) print(links.head(3)) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') chord = hv.Chord((links, nodes)).select(value=(samples_count, None)) chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) # Create HoloViews plot and attach the document hvplot = renderer.get_plot(chord, doc) tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot") mytabs.append(tab3) views = Tabs(tabs = mytabs) layout=row(views) doc.add_root(layout) return doc
def plot_chord(predictions, filename): to_use = predictions.copy() for n, row in predictions.iterrows(): if row.MLSynergy_score < 0: to_use.at[n, "MLSynergy_score"] = row.MLSynergy_score * -1 to_use.at[n, "Interaction"] = "Synergy" else: to_use.at[n, "Interaction"] = "Antagony" hv.extension('bokeh') hv.output(size=200) to_use2 = to_use[to_use.NumbDrugs == 2] links = to_use2[["Drug1", "Drug2", "MLSynergy_score", "Interaction"]] drugs = list(links["Drug1"].unique()) + list(links["Drug2"].unique()) nodes = hv.Dataset(drugs, 'Drug') chord = hv.Chord((links, nodes)).select(value=(1, None)) chord.opts(opts.Chord(cmap='Rainbow', edge_cmap='Rainbow',\ edge_color=dim('Interaction').str(), labels='Drug',\ node_color=dim('Drug').str())) output_file(filename) show(hv.render(chord)) return to_use2
def chord_diagram(self, launch): try: def normalize_value(x, total): x = int((x / total) * 1000) if x <= 0: return 1 return x df = self.df.copy() # -------------- nodes data = {} data['nodes'] = [] source_list = df['milestone_owner'].tolist() names = list(set(source_list)) person_type_dict = dict(zip(df.milestone_owner, df.type)) type_dict = {} types = list(set(df['type'].tolist())) name_dict = {} for idx, name in enumerate(names): name_dict[name] = idx for idx, name in enumerate(names): type_tmp = person_type_dict[name] index = name_dict[name] data['nodes'].append({ 'OwnerID': index, 'index': idx, 'Type': type_tmp }) nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index') # --------- make the links data['links'] = [] for idx, row in df.iterrows(): src = name_dict[row['project_owner']] tgt = name_dict[row['milestone_owner']] val = row['remuneration'] data['links'].append({ 'source': src, 'target': tgt, 'value': val }) links = pd.DataFrame(data['links']) # get the individual links links = links.groupby(['source', 'target'])['value'].sum() links = links.reset_index() total = links['value'].sum() links['value'] = links['value'].apply( lambda x: normalize_value(x, total)) # filter for top percentile quantile_val = links['value'].quantile( self.chord_data['percentile_threshold']) links = links[links['value'] >= quantile_val] #logger.warning('after quantile filter:%s',len(links)) chord_ = hv.Chord((links, nodes), ['source', 'target'], ['value']) chord_.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Type', node_color=dim('index').str(), width=1000, height=1000)) return chord_ except Exception: logger.error('chord diagram', exc_info=True)
movies_genres12.columns), list(movies_genres12.index), movies_genres12), ['source', 'target'], 'value_12').dframe() movies_chord13 = hv.Dataset((list( movies_genres13.columns), list(movies_genres13.index), movies_genres13), ['source', 'target'], 'value_13').dframe() movies_chord = pd.DataFrame.merge(movies_chord12, movies_chord13, how="outer") movies_chord["value"] = movies_chord.value_12 + movies_chord.value_13.fillna( 0).astype(int) movies_chord.drop(["value_12", "value_13"], 1, inplace=True) chrd = hv.Chord(movies_chord).opts(node_color='index', edge_color='source', label_index='index', cmap='Category20b', edge_cmap='Category20b', width=750, height=750) hv.render(chrd, backend="bokeh") hv.save(chrd, "chord.html", backend="bokeh") # %% score decile vs other quant vars movies_parallel = movies[[ "Metascore Decile", "Score", "Metascore", "Vote", "Runtime", "Revenue", "Revenue/Min", "Meta:Score" ]] movies_parallel_norm = ( movies_parallel.drop("Metascore Decile", axis=1) - movies_parallel.drop( "Metascore Decile", axis=1).mean()) / movies_parallel.drop( "Metascore Decile", axis=1).std()
def hv_generator(ontology_id_list): try: mentions = {} for selectedID in ontology_id_list: if selectedID in ontoterminology.keys(): # print("ontoterminology selectedID 'NAME'", ontoterminology[selectedID]['NAME']) # print("set: ", set(ontoterminology[selectedID]['PMID'])) mentions[ontoterminology[selectedID]['NAME']] = set( ontoterminology[selectedID]['PMID']) # print("got one: ", mentions[ontoterminology[selectedID]['NAME']]) else: # print("No mentions found for ",selectedID) pass # print("loaded mentions", mentions) chn_list = [] for source in mentions: # print("plain source: ", source) for target in mentions: if source.strip() == "" or target.strip() == "": # print("blank source or target") pass elif source.strip() == target.strip(): # print("intersection: ", source.strip()) pass else: intersection = mentions[source].intersection( mentions[target]) if len(intersection) > 0: chn = { "source": source, "target": target, "PMID": len(intersection) } #inverse duplicate checking here: add_item = True for k in chn_list: if source + target == k['target'] + k['source']: add_item = False if add_item: chn_list.append(chn) print("finished checking for inverse duplicates..") # print("length of intersection list: ", len(chn_list)) # print(chn_list) # Build the data table expected by the visualisation library links = pd.DataFrame.from_dict(chn_list) node_names = links.source.append(links.target) node_names = node_names.unique() # print(node_names) node_info = { "index": node_names, "name": node_names, "group": [1] * len(node_names) } # print(node_info) nodes = hv.Dataset(pd.DataFrame(node_info), 'index') nodes.data.head() chord = hv.Chord((links, nodes)).select( value=(0, None)) # value=5 - changing to 0 works for more? chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color='source', labels='name', node_color='index')) renderer = hv.renderer('bokeh') hvplot = renderer.get_plot(chord) html = renderer.static_html(hvplot) return json.dumps(html) except Exception as e: print(e) traceback.print_exc() html_error_message = "<!doctype html><div><h4>ERROR CREATING TABLE - no associations found, or possibly some of the ID's were incorrect?</h4></div></html>" return (json.dumps(html_error_message))
def plot_connectogram(connectivity_matrix, atlas_labels, atlas_indices, threshold=None, chord_type=int, dst_dir=None, filename=None): '''Plot a connectivity matrix as a connectogram. Parameters ---------- connectivity_matrix : np.array A symmetric connectivity matrix. atlas_labels : pd.Series or list A list-like object providing names of each atlas region. atlas_indices : pd.Series or list A list-like object providing indices of each atlas region. threshold : float or int, optional Apply a threshold to the connectivity matrix before plotting. Only connectvity values that are greater or equal than this threshold are visualized. chord_type : int or float, optional Convert the connectivity values to float or int type. If the weight values are integers, they define the number of chords to be drawn between the source and target nodes directly. If the weights are floating point values, they are normalized to a default of 500 chords, which are divided up among the edges. Any non-zero weight will be assigned at least one chord. The default is int. dst_dir : str, optional Name of the output directory. The default is None. filename : str, optional Name of the file (must be provided including the extenstion). The default is None. Returns ------- connectogram_plot : holoviews.element.graphs.Chord The connectogram plot object. ''' # copy matrix connectivity_matrix = connectivity_matrix.copy() # set lower triangle to NaN (since matrix is symmetric we want to remove duplicates) il = np.tril_indices(len(connectivity_matrix)) connectivity_matrix[il] = np.nan # convert to pd.DataFrame for further processing connectivity_matrix_df = pd.DataFrame(data=connectivity_matrix, columns=atlas_indices, index=atlas_indices) # Ensure that index name has the default name 'Index' if connectivity_matrix_df.index.name: connectivity_matrix_df.index.name = None # stack connectivity_matrix connectivity_matrix_stacked = connectivity_matrix_df.stack().reset_index() connectivity_matrix_stacked.columns = ['source', 'target', 'value'] if chord_type == int: connectivity_matrix_stacked = connectivity_matrix_stacked.astype(int) # reduce to only connections that are not 0 connectivity_matrix_stacked = connectivity_matrix_stacked.loc[ connectivity_matrix_stacked['value'] != 0, :] # Optional: reduce to only connections >= threshold if threshold: connectivity_matrix_stacked = connectivity_matrix_stacked.loc[ connectivity_matrix_stacked['value'] >= threshold, :] # add node infos and show only nodes that also have a connection after subsetting to # connections that are not zero and (optionally) connections that pass the specified threshold atlas_df = pd.DataFrame({ 'region_id': atlas_indices, 'label': atlas_labels }) nodes_to_show = np.unique(connectivity_matrix_stacked[['source', 'target']].values) atlas_df = atlas_df.loc[atlas_df['region_id'].isin(nodes_to_show)] nodes = hv.Dataset(atlas_df, 'region_id', 'label') # create plot connectogram_plot = hv.Chord((connectivity_matrix_stacked, nodes), ['source', 'target'], ['value']) connectogram_plot.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), node_color=dim('region_id').str(), labels='label')) # save plot if dst_dir: if not filename: raise ValueError('Please provide a filename') dst_path = dst_dir + filename hv.save(connectogram_plot, dst_path) # FIXME: this doesn't work for me in Spyder show(hv.render(connectogram_plot)) return connectogram_plot
"index": 1 }, { "name": "Cytosine", "index": 2 }, { "name": "Guanine", "index": 3 }, { "name": "Thymine", "index": 4 }] nodes = hv.Dataset(pd.DataFrame(nodos), 'index') # generate the diagram with given options # these options create a diagram with labels, nodes, and edges # comment these two lines if you want to make a all-black diagram #chord = hv.Chord((links, nodes)) #chord.opts( # opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), # labels="name", node_color=dim('index').str())) # use these options to create a diagram in black with no labels, no nodes, and no edges # these options create a more artsy diagram # comment these lines if you want to keep the oob options chord = hv.Chord(links) chord.opts( opts.Chord(edge_color="black", node_color="black", node_size=1, edge_visible=False))
def chord_diagram(df_flow_MN, n_cycles, dir_path): ''' Function to plot chord diagram for flows across industry sectors ''' df_flow_MN = df_flow_MN.loc[df_flow_MN['Option'] == 'Industrial'] df_flow_MN = df_flow_MN[['Cycle', 'Generator Industry Sector', 'Flow transferred', 'RETDF Industry Sector', 'Recycled flow', 'Industry sector']] Flows = {'waste': {'Generator Industry Sector': 'source', 'RETDF Industry Sector': 'target', 'Flow transferred': 'value'}, 'recyled': {'RETDF Industry Sector': 'source', 'Industry sector': 'target', 'Recycled flow': 'value'}} df_links = pd.DataFrame() for Flow, Link in Flows.items(): cols = list(Link.keys()) df_links_aux = df_flow_MN[['Cycle'] + cols] df_links_aux = df_links_aux.groupby(['Cycle'] + cols[0:2], as_index=False).sum() df_links_aux.drop(columns='Cycle', inplace=True) df_links_aux = df_links_aux.groupby(cols[0:2], as_index=False).sum() df_links_aux[cols[2]] = df_links_aux[cols[2]]/n_cycles df_links_aux['flow'] = Flow df_links_aux.rename(columns=Link, inplace=True) if Flow == 'waste': # 1 metric ton/yr df_links_aux = df_links_aux[df_links_aux['value'] >= 1000] df_links = pd.concat([df_links, df_links_aux], ignore_index=True, sort=True, axis=0) df_links = df_links.loc[df_links['source'] != df_links['target']] Nodes = set(df_links['source'].unique().tolist() + df_links['target'].unique().tolist()) Nodes = {node: i for i, node in enumerate(Nodes)} df_links = df_links.replace({'source': Nodes, 'target': Nodes}) df_nodes = pd.DataFrame({'index': [idx for idx in Nodes.values()], 'name sector': [name for name in Nodes.keys()]}) df_nodes['name'] = df_nodes['index'].apply(lambda x: f'Sector {x+1}') for Flow in ['waste', 'recyled']: try: df_links_plot = df_links.loc[df_links['flow'] == Flow, ['source', 'target', 'value']] sources = df_links_plot['source'].unique().tolist() search = df_links_plot.loc[~df_links_plot['target'] .isin(sources), 'target'].unique().tolist() for s in search: df_links_plot = pd.concat([df_links_plot, pd.DataFrame({'source': [s], 'target': [s], 'value': [10**-50]})], ignore_index=True, sort=True, axis=0) hv.Chord(df_links_plot) nodes = hv.Dataset(df_nodes, 'index') chord = hv.Chord((df_links_plot, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) df_nodes.to_csv(f'{dir_path}/chord_{Flow}.csv', sep=',', index=False) hv.save(chord, f'{dir_path}/chord_{Flow}.pdf', fmt='pdf') except ValueError: print(f'There are not records for {Flow} activities')
def chord_diagram(ds, agg='mean', minimum_quantile=0, groups=None, size=200, pallette='Category20', fig_inches=4): """ Build a chord diagram on the base of holoviews [1]. It visualizes allocated peer-to-peer flows for all buses given in the data. As for compatibility with ipython shell the rendering of the image is passed to matplotlib however to the disfavour of interactivity. Note that the plot becomes only meaningful for networks with N > 5, because of sparse flows otherwise. [1] http://holoviews.org/reference/elements/bokeh/Chord.html Parameters ---------- allocation : xarray.Dataset Dataset with 'peer_to_peer' variable. lower_bound : int, default is 0 filter small power flows by a lower bound groups : pd.Series, default is None Specify the groups of your buses, which are then used for coloring. The series must contain values for all allocated buses. size : int, default is 300 Set the size of the holoview figure save_path : str, default is '/tmp/chord_diagram_pypsa' set the saving path of your figure """ from holoviews.plotting.mpl import Layout, LayoutPlot import holoviews as hv hv.extension('matplotlib') allocation = filter_null( as_dense( ds.peer_to_peer.mean('snapshot')), 'source') .to_series().dropna() if groups is not None: allocation = allocation.rename(groups).sum(level=['sink', 'source']) allocated_buses = allocation.index.levels[0] \ .append(allocation.index.levels[1]).unique() bus_map = pd.Series(range(len(allocated_buses)), index=allocated_buses) links = allocation.to_frame('value').reset_index() .replace( { 'source': bus_map, 'sink': bus_map}) .sort_values('source').reset_index( drop=True)[ lambda df: df.value >= df.value.quantile(minimum_quantile)] nodes = pd.DataFrame({'bus': bus_map.index}) cindex = 'index' ecindex = 'source' nodes = hv.Dataset(nodes, 'index') diagram = hv.Chord((links, nodes)) diagram = diagram.opts(style={'cmap': pallette, 'edge_cmap': pallette, 'tight': True}, plot={'label_index': 'bus', 'color_index': cindex, 'edge_color_index': ecindex}) # fig = hv.render(diagram, size=size, dpi=300) fig = LayoutPlot(Layout([diagram]), dpi=300, fig_size=size, fig_inches=fig_inches, tight=True, tight_padding=0, fig_bounds=(-.15, -.15, 1.15, 1.15), hspace=0, vspace=0, fontsize=15)\ .initialize_plot() return fig, fig.axes
'source': edge[0], 'target': edge[1], 'weight': 1, 'node': node[0], 'package': node[1]['package'], }) except KeyError: #takes care of base module imports pass df = pd.DataFrame(graph_data) return df if __name__ == '__main__': test = ImportGraph( directory=Path('/home/dal/PycharmProjects/pyjanitor_fork/janitor')) df = test.output_graph() hv.extension('bokeh') defaults = dict(width=1000, height=1000, padding=0.1) chord = hv.Chord((df[['source', 'target', 'weight']], df[['node', 'package']])) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='node', node_color=dim('package').str(), width=1000, height=1000)) hv.save(chord, 'test.html')
import pandas as pd import holoviews as hv from holoviews import opts, dim #Read in game of thrones data nodes = pd.read_csv('data/got-s1-nodes.csv', low_memory=False) edges = pd.read_csv('data/got-s1-edges.csv', low_memory=False) # Specify the plot render to use hv.extension('bokeh') hv.output(size=300) # Chord diagram with interactive components edgeList = edges[['Source', 'Target', 'weight']] nodeDS = hv.Dataset(nodes, 'Id') chord = hv.Chord((edgeList, nodeDS)) chord.opts( opts.Chord(inspection_policy='nodes', tools=['hover'], edge_hover_line_color='green', node_hover_fill_color='red')) hv.save(chord, 'simple_chord.html') # Coloured interactive chord diagram kwargs = dict(width=300, height=300, xaxis=None, yaxis=None) opts.defaults(opts.Nodes(**kwargs), opts.Graph(**kwargs)) graph = hv.Graph((edgeList, nodeDS), label='GoT season 1') graph.opts(cmap='Category20', edge_cmap='Category20',
def chordGraphByMajor(self, coefficient=0.5, pval=0.05, outputName='majorGraph', outputSize=200, imageSize=300, showGraph=True, outputImage=True): """Creates a chord graph between available majors through averaging and filtering both correlation coefficients and P-values. Outputs to an html file, PNG file, and saves the underlying data by default. Note: The 'classDept' column as set by :obj:`defineWorkingColumns` must have been defined in your dataset to filter by major. Args: coefficient (:obj:`float`, optional): Minimum correlation coefficient to filter correlations by. pval (:obj:`float`, optional): Maximum P-value to filter correlations by. Defaults to 0.05 (a standard P-value limit used throughout the sciences) outputName (:obj:`str`, optional): First part of the outputted file names, e.g. fileName.csv, fileName.html, etc. outputSize (:obj:`int`, optional): Size (units unknown) of html graph to output. 200 by default. imageSize (:obj:`int`, optional): Size (units unknown) of image of the graph to output. 300 by default. Increase this if node labels are cut off. showGraph (:obj:`bool`, optional): Whether or not to open a browser and display the interactive graph that was created. Defaults to :obj:`True`. outputImage (:obj:`bool`, optional): Whether or not to export an image of the graph. Defaults to :obj:`True`. """ # M: The parameters should usually be changed when the function is called!! # M: initialized holoview of size outputSize hv.output(size=outputSize) # M: creates a copy of df and sets course1 and course2 to the elements in the respective rows w # substring index 0 to the first number, exclusive (if number is first, element would be empty) majorFiltered = self.df.copy() # M: added the makeMissingValuesNanInColumn so that none of the entries are empty # majorFiltered.removeNanInColumn('course1') # majorFiltered.removeNanInColumn('course2') majorFiltered['course1'] = majorFiltered['course1'].apply( lambda course: re.findall('\A\D+', course)[0]) majorFiltered['course2'] = majorFiltered['course2'].apply( lambda course: re.findall('\A\D+', course)[0]) # sets majors to the unique remaining tuples of course1 majors = majorFiltered['course1'].unique().tolist() majors.sort() majorCorrelations = [] usedMajors = [] # M: Makes the data in corr, P-value, and #students attributes numeric majorFiltered['corr'] = pd.to_numeric(majorFiltered['corr']) majorFiltered['P-value'] = pd.to_numeric(majorFiltered['P-value']) majorFiltered['#students'] = pd.to_numeric(majorFiltered['#students']) count = 0 # M: loops through unique majors in course 1(those w/o numerical beginning) for major in majors: # Adds 1 to count then prints the number of elements in majors count += 1 print(str(count) + ' / ' + str(len(majors)) + ' majors') # M: sets filteredToMajor to the majorFiltered where course 1 column is equal to 'major' in the majors list filteredToMajor = majorFiltered.loc[majorFiltered['course1'] == major] # M: sets connectedMajors to the unique values in course2 column connectedMajors = filteredToMajor['course2'].unique().tolist() # M: loops through the unique majors in course 2 (those w/o numerical beginning) for targetMajor in connectedMajors: # M: Sets filteredToMajorPair to the tuple(s) where course 1 is 'major' and course 2 is 'targetMajor' filteredToMajorPair = filteredToMajor.loc[ filteredToMajor['course2'] == targetMajor] # M: Finds means for corr, PVal, and Students avgCorr = int(filteredToMajorPair['corr'].mean() * 100) avgPVal = filteredToMajorPair['P-value'].mean() avgStudents = filteredToMajorPair['#students'].mean() # M: ensures no corr following the constraints are counted twice and adds it to the list of correlations if avgCorr > (coefficient * 100) and major != targetMajor and avgPVal < pval: if (targetMajor, major) not in usedMajors: usedMajors.append((major, targetMajor)) majorCorrelations.append((major, targetMajor, avgCorr, avgPVal, avgStudents)) # M: Tells us how many correlations found if len(majorCorrelations) == 0: print('Error: no valid correlations found.') return print(str(len(majorCorrelations)) + ' valid major correlations found.') # M: Sets output to majorCorrelations and sets the column names output = pd.DataFrame(majorCorrelations, columns=('source', 'target', 'corr', 'P-value', '#students')) # M: Sets newMajors to have the unique sources and targets (by putting them in a set) newMajors = set(output['source']) newMajors.update(output['target']) # M: Sets sortedMajors to one list of sources and targets, all sorted sortedMajors = sorted(list(newMajors)) # M: sets 'nodes' to be sortedMajors w/ column name 'name' nodes = pd.DataFrame(sortedMajors, columns=['name']) # M: added this to check the value of output source and target before the apply print("source before:", output['source']) print("target before:", output['target']) # M: output source and target are changed to numeric instead of string objects to represent the sources and targets output['source'] = output['source'].apply( lambda major: nodes.index[nodes['name'] == major][0]) output['target'] = output['target'].apply( lambda major: nodes.index[nodes['name'] == major][0]) # M: Added this to check what each value would be set to after the apply print("index:", nodes.index[nodes['name'] == major][0]) print("source now:", output['source']) print("target now:", output['target']) print(output['source'].dtype) # M: constructs the chord graph output.to_csv(outputName + '.csv', index=False) hvNodes = hv.Dataset(nodes, 'index') chord = hv.Chord((output, hvNodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) graph = hv.render(chord) output_file(outDir + outputName + '.html', mode='inline') # M: Saves and shows graph if showGraph true save(graph) if showGraph: show(graph) chord.opts(toolbar=None) # M: changes size to imageSize then saves it to outDir +outputName + '.png' if outputImage: hv.output(size=imageSize) try: export_png(hv.render(chord), filename=outDir + outputName + '.png') except RuntimeError: print( "Runtime error in chordGraphByMajors in line with export_png(hv.render(chord), filename=outDir +outputName + '.png')" ) # M: changes size to outputSize hv.output(size=outputSize)
links = tidier_PIOT PIOT_Z['index1'] = PIOT_Z.index PIOT_Z['group'] = PIOT_Z.index1.astype(str).str[0:2] df = PIOT_Z[['index1', 'group']].copy().reset_index().drop('index', 1) Key = pd.DataFrame(df.group.unique()).reset_index() Key.columns = ['group', 'NAICS_2'] df_fin = df.merge(Key, left_on=['group'], right_on=['NAICS_2']) df_fin = df_fin.drop(['group_x', 'NAICS_2'], 1) df_fin.columns = ['index', 'group'] node_desc = df_fin.to_dict('records') nodes = hv.Dataset(pd.DataFrame(node_desc), 'index') chord = hv.Chord((links, nodes)).select(value=(5, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str())) hv.save(chord, 'chord_piotalt.html') ###### Do the same thing for the MIOT ############## # MIOT = "SIOT_BEA2012AR_CxC.xlsx" # Z_Monetary = pd.read_excel(MIOT, usecols = 'A: OP', skipfooter = 6, header = 0, index_col = 0) # Z_Monetary = format_idx_col(Z_Monetary)
import numpy as np import pandas as pd od_matrix = pd.read_csv('od_matrix.csv', header=None).astype('int') od_matrix.columns = ['origin','ori_ts','destination','des_ts','cost'] od_matrix = od_matrix.assign(ordered = od_matrix.ori_ts < od_matrix.des_ts) od_matrix = od_matrix.assign(source = od_matrix.origin if od_matrix.ordered is True else od_matrix.destination, target = od_matrix.destination if od_matrix.ordered is True else od_matrix.origin) links = od_matrix.groupby(['source','target']).size().reset_index(name='value') station_features = pd.read_csv('stations_location.csv') nodes = hv.Dataset(station_features, 'numero_estacion') graph = hv.Chord((links, nodes), ['source','target'], ['value']).select(value=(60, None)) %opts Chord [label_index='nombre' color_index='nombre' edge_color_index='source' width=800 height=800] %opts Chord (cmap='Category20' edge_cmap='Category20') # Using renderer save renderer = hv.renderer('bokeh') renderer.save(graph, 'graph') ## Convert to bokeh figure then save using bokeh #plot = renderer.get_plot(curve).state # #from bokeh.io import output_file, save, show #save(plot, 'graph.html') ## OR #output_file("graph.html")
def plot4(): df_data = pd.read_csv(file, sep=';', header=0, index_col=False) if not Path(df_data[:-4] + '.out.csv').is_file(): df_data = pd.read_csv(df_data, sep=';', header=0, index_col=False) p = [] d = [] e = [] f = [] hold = df_data.shape[0] # loop that sets values first in lists for columns i = 0 while i < hold: # Fromnames + delete row of names once listed b = list(df_data.columns.values) del b[0] a1 = len(b) - i a = list(a1 * (df_data.iloc[i, 0], )) del a[:1] # Tonames + delete names that are already linked p = b del p[:(i + 1)] # weights + delete weights that are already linked c = df_data.iloc[:, 1].tolist() del c[:(i + 1)] # remove people linked to themselves for ele in c: if ele == 1: c.remove(ele) e = list(e + a) d = list(d + p) f = list(f + c) i += 1 # df from which the plot will be made df_plot = pd.DataFrame(columns=['from', 'to', 'weight']) # puts said lists in columns df_plot['from'] = e df_plot['to'] = d df_plot['weight'] = f # delete edges with weight 0 df_plot = df_plot.loc[df_plot['weight'] != 0.0] df_plot.to_csv(df_data[:-4] + '.out.csv', sep='\t', encoding='utf-8', index=False) else: df_plot = pd.read_csv(df_data[:-4] + '.out.csv', sep='\t', encoding='utf-8', index_col=False) chord = hv.Chord(df_plot) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('weight').str(), labels='name', node_color=dim('index').str())) holder2 = chord renderer = hv.renderer('bokeh') m = renderer.get_plot(holder2).state return json.dumps(json_item(m))
We'll use a [chord diagram](http://python-graph-gallery.com/chord-diagram/) generated by the [Bokeh](https://docs.bokeh.org/en/latest/index.html) backend of [HoloViews](http://holoviews.org) to visualize the brain connectivity patterns. We'll need to re-format the correlation matrices into DataFrames that describe the set of connections using four columns (there will be a total of $[(K^2 - K)/2]$ rows in this DataFrame: - *source*: origin of the connection - *target*: destination of the connection - *value*: the strength of the connection - *sign*: whether the connection is positive (+1) or negative (-1) def mat2chord(vec, t=0, cthresh=0.25): def mat2links(x, ids): links = [] for i in range(x.shape[0]): for j in range(i): links.append({'source': ids[i], 'target': ids[j], 'value': np.abs(x[i, j]), 'sign': np.sign(x[i, j])}) return pd.DataFrame(links) links = mat2links(tc.vec2mat(vec)[:, :, t], rois['ID']) chord = hv.Chord((links, hv.Dataset(rois, 'ID'))).select(value=(cthresh, None)) chord.opts( opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Region', node_color=dim('ID').str()) ) return chord Here's the first correlation matrix: hmap = mat2chord(isfc, t=0) # This is to render chord plot in jupyter-book html_repr = file_html(pn.Column(hmap).get_root(), CDN) IPython.display.HTML(html_repr) Now let's create an interactive figure to display the dynamic network patterns, with a slider for controlling the timepoint: