def iso_json(string1,string2): dataG1 = json.loads(string1) graph1 = json_graph.node_link_graph(dataG1) dataG2 = json.loads(string2) graph2 = json_graph.node_link_graph(dataG2) # return nx.is_isomorphic(graph1, graph2) return nx.faster_could_be_isomorphic(graph1, graph2)
def restore_anm_nidb_from_json(data): d = ank_json_custom_loads(data) anm = autonetkit.anm.AbstractNetworkModel() nidb = autonetkit.nidb.NIDB() for overlay_id, overlay_data in d.items(): if overlay_id == "nidb": continue # don't restore nidb graph to anm anm._overlays[overlay_id] = json_graph.node_link_graph(overlay_data) nidb._graph = json_graph.node_link_graph(d['nidb']) rebind_interfaces(anm) return anm, nidb
def graphs_json2networkx(input_dict): from json import loads from networkx.readwrite import json_graph gtext = loads(input_dict['graph']) g = json_graph.node_link_graph(gtext) return {'nxgraph': g}
def main(json_file, output_prefix, source, target): with open(json_file) as data_file: data = json.load(data_file) G = json_graph.node_link_graph(data, directed=False) print "Finished Reading in Graph: {0}".format(datetime.datetime.now()) id_seq = networkx.get_node_attributes(G, "sequence") seq_id = { seq : node_id for node_id, seq in id_seq.items()} print "Created inverse lookup table: {0}".format(datetime.datetime.now()) if ',' in target: targets = target.split(',') for target in targets: paths = networkx.all_shortest_paths(G, seq_id[source], seq_id[target]) with open("{0}_paths_{1}_{2}.txt".format(output_prefix, source, target), 'w') as o: for path in paths: o.write(",".join( [id_seq[node_id] for node_id in path ] )) o.write("\n") print "Output paths: {0}".format(datetime.datetime.now())
def restore_anm_nidb_from_json(data): # This can be used to extract from the json used to send to webserver d = ank_json_custom_loads(data) anm = autonetkit.anm.AbstractNetworkModel() nidb = autonetkit.nidb.DeviceModel() for overlay_id, overlay_data in d.items(): if overlay_id == "nidb": continue # don't restore nidb graph to anm anm._overlays[overlay_id] = json_graph.node_link_graph(overlay_data) nidb._graph = json_graph.node_link_graph(d['nidb']) rebind_interfaces(anm) return anm, nidb
def graph(self, node_links_data): self.g = json_graph.node_link_graph(node_links_data) remove = list() for _id in self.g.node: if self.g.node[_id]["node_type"] not in self.valid_type: remove.append(_id) self.g.remove_nodes_from(remove)
def as_tree(graph, root=OPENSTACK_CLUSTER, reverse=False): linked_graph = json_graph.node_link_graph(graph) if 0 == nx.number_of_nodes(linked_graph): return {} if reverse: linked_graph = linked_graph.reverse() return json_graph.tree_data(linked_graph, root=root)
def load_json(stream): """ Args: stream: Open stream containing js Assumes the js is in networkx link-node format """ js = json.load(stream) g = json_graph.node_link_graph(js) assert all([nd.has_key('coords') for nd in g.node.values()]),\ "json node-link graph must have nodes with coords for GeoGraph" # get coords coords = [v['coords'] for v in g.node.values()] # set default projection input_proj = "" if gm.is_in_lon_lat(coords): input_proj = gm.PROJ4_LATLONG else: input_proj = gm.PROJ4_FLAT_EARTH coords_dict = {k: v['coords'] for k, v in g.node.items()} # now get rid of 'coords' key,val for each node for node in g.node.values(): node.pop('coords', None) geo_nodes = GeoGraph(srs=input_proj, coords=coords_dict, data=g) return geo_nodes
def get_selected_reaction(jsonGraph, nodeDic, reacIDs, org): """ Filtering selected Reactions and show Results from PyNetMet calculation. It returns a subgraph of the Graph from the jsonGraph. The output is a DOT-Language String. @param jsonGraph: Graph in JSON-Format @param nodeDic: dict mapping names to ids @param reacIDs: Name of reactions that contained in the nodeDic @param org: organism @return Subgraph """ # Translate reac names to IDs # Get substrates and products of all reacs metabolites = [] for reac in reacIDs: metabolites += org.get_reaction(reac).metabolites met_ids = list(map(lambda x: nodeDic[x], metabolites)) g = json_graph.node_link_graph(jsonGraph) g.remove_edges_from(list(filter(lambda x: g.get_edge_data(*x)["object"].name not in reacIDs, g.edges(met_ids)))) # Get products/substrates directly connected to filter #reacIDs += flatten(g.in_edges(reacIDs)) + flatten(g.out_edges(reacIDs)) h = g.subgraph(met_ids) return h
def find_min_spanning_tree(A): """ Input: A : Adjecency matrix in scipy.sparse format. Output: T : Minimum spanning tree. run_time : Total runtime to find minimum spanning tree """ # Record start time. start = time.time() # Check if graph is pre-processed, if yes then don't process it again. if os.path.exists('../Data/dcg_graph.json'): with open('../Data/dcg_graph.json') as data: d = json.load(data) G = json_graph.node_link_graph(d) # If graph is not preprocessed then convert it to a Graph and save it to a JSON file. else: G = from_scipy_sparse_matrix(A) data = json_graph.node_link_data(G) with open('../Data/dcg_graph.json', 'w') as outfile: json.dump(data, outfile) # Find MST. T = minimum_spanning_tree(G) #Record total Runtime run_time = time.time()-start return T, run_time
def main(json_file, output_prefix, metric): with open(json_file) as data_file: data = json.load(data_file) G = json_graph.node_link_graph(data) metrics = {} #metrics["degree"] = degree(G) metrics["closeness"] = closeness_centrality(G).values() #TODO: add any other metrics here using a similar format to above line. sequences = {} cleaved_seq = { key : val for key, val in sequences.items() if val["type"] == "CLEAVED" } if metric != "metrics": labels_to_plot = [metric] else: labels_to_plot = metrics.keys() n_to_plot = len(labels_to_plot) fig, axarr = pconv.create_ax(n_to_plot, 1, shx=False, shy=False) nbins = 20 for ind, key in enumerate(labels_to_plot): normed = True hist.draw_actual_plot(axarr[0,ind], metrics["key"], "", key.capitalize(), normed=normed, nbins=nbins) axarr[0,ind].ticklabel_format(axis='x', style='sci', scilimits=(-2,2)) #pconv.add_legend(axarr[0,ind], location="middle right") pconv.save_fig(fig, output_prefix, "metrics", n_to_plot*5, 5, tight=True, size=12)
def read_from_json_gexf(fname=None,label_field_name='APIs',conv_undir = False): ''' Load the graph files (.gexf or .json only supported) :param fname: graph file name :param label_field_name: filed denoting the node label :param conv_undir: convert to undirected graph or not :return: graph in networkx format ''' if not fname: logging.error('no valid path or file name') return None else: try: try: with open(fname, 'rb') as File: org_dep_g = json_graph.node_link_graph(json.load(File)) except: org_dep_g = nx.read_gexf (path=fname) g = nx.DiGraph() for n, d in org_dep_g.nodes_iter(data=True): g.add_node(n, attr_dict={'label': '-'.join(d[label_field_name].split('\n'))}) g.add_edges_from(org_dep_g.edges_iter()) except: logging.error("unable to load graph from file: {}".format(fname)) # return 0 logging.debug('loaded {} a graph with {} nodes and {} egdes'.format(fname, g.number_of_nodes(),g.number_of_edges())) if conv_undir: g = nx.Graph (g) logging.debug('converted {} as undirected graph'.format (g)) return g
def transferRedditDataFormat(dataset_dir, output_file): G = json_graph.node_link_graph(json.load(open(dataset_dir + "/reddit-G.json"))) labels = json.load(open(dataset_dir + "/reddit-class_map.json")) train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']] test_ids = [n for n in G.nodes() if G.node[n]['test']] val_ids = [n for n in G.nodes() if G.node[n]['val']] train_labels = [labels[i] for i in train_ids] test_labels = [labels[i] for i in test_ids] val_labels = [labels[i] for i in val_ids] feats = np.load(dataset_dir + "/reddit-feats.npy") ## Logistic gets thrown off by big counts, so log transform num comments and score feats[:, 0] = np.log(feats[:, 0] + 1.0) feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1)) feat_id_map = json.load(open(dataset_dir + "reddit-id_map.json")) feat_id_map = {id: val for id, val in feat_id_map.iteritems()} # train_feats = feats[[feat_id_map[id] for id in train_ids]] # test_feats = feats[[feat_id_map[id] for id in test_ids]] # numNode = len(feat_id_map) # adj = sp.lil_matrix(np.zeros((numNode,numNode))) # for edge in G.edges(): # adj[feat_id_map[edge[0]], feat_id_map[edge[1]]] = 1 train_index = [feat_id_map[id] for id in train_ids] val_index = [feat_id_map[id] for id in val_ids] test_index = [feat_id_map[id] for id in test_ids] np.savez(output_file, feats = feats, y_train=train_labels, y_val=val_labels, y_test = test_labels, train_index = train_index, val_index=val_index, test_index = test_index)
def read_json_graph(istream): """ Reads a json graph output by the algorithm and returns it """ data = json.loads(istream.read()) G = json_graph.node_link_graph(data) return G
def simple_to_nx(j_data): port_to_index_mapping = defaultdict(dict) for node in j_data['nodes']: if not "ports" in node: continue node_id = node['id'] # first check for loopback zero ports = node['ports'] _ports = {} # output format try: lo_zero = [p for p in ports if p['id'] == "Loopback0"].pop() except IndexError: # can't pop -> no loopback zero, append lo_zero = {'category': 'loopback', 'description': "Loopback Zero"} else: ports.remove(lo_zero) finally: _ports[0] = lo_zero '''Sharad: below change is for 2nd loopback. currently commenting it out. change start in below loop to 2 while adding another loopback lo_one = {'category': 'loopback', 'description': "Loopback One", 'id':'loopback1'} _ports[1] = lo_one ''' for index, port in enumerate(ports, start=1): _ports[index] = port port_to_index_mapping[node_id][port['id']] = index del node['ports'] node['_ports'] = _ports nodes_by_id = {n['id']: i for i, n in enumerate(j_data['nodes'])} unmapped_links = [] if "links" in j_data: mapped_links = j_data['links'] for link in mapped_links: src = link['src'] dst = link['dst'] src_pos = nodes_by_id[src] dst_pos = nodes_by_id[dst] src_port_id = port_to_index_mapping[src][link['src_port']] dst_port_id = port_to_index_mapping[dst][link['dst_port']] interfaces = {src: src_port_id, dst: dst_port_id} unmapped_links.append({'source': src_pos, 'target': dst_pos, '_ports': interfaces, 'link_type': link['link_type'] }) j_data['links'] = unmapped_links return json_graph.node_link_graph(j_data)
def load(self, content): loaded = json.loads(content) self.G = json_graph.node_link_graph(loaded['structure']) for folder in loaded['data']: temp = Folder() temp.load(loaded['data'][folder]) self.data[folder] = temp
def get_all_tags_graph(): try: j = AllTagsGraph.objects.all()[:1][0].graph d = simplejson.loads(j) g = json_graph.node_link_graph(d, directed=True) except IndexError: g = networkx.read_edgelist('taggraph/fixtures/wired_text_hubpagerank.edgelist', create_using=networkx.DiGraph()) tg = BaseTagGraph() tg.graph = g tg._pagerank() j = tg.to_json() AllTagsGraph.objects.create(graph=simplejson.dumps(j)) j = AllTagsGraph.objects.all()[:1][0].graph d = simplejson.loads(j) g = json_graph.node_link_graph(d, directed=True) return g
def create_manhattan_scenario(load='high', reduced = False): #load graph, get positions if reduced: with open('assets/manhattan_road_netx_constrained.json','r') as data_file: road_graph = json_graph.node_link_graph(json.load(data_file)) else: with open('assets/manhattan_road_netx.json','r') as data_file: road_graph = json_graph.node_link_graph(json.load(data_file)) pos = OrderedDict({node: (road_graph.node[node]["latlon"][1], road_graph.node[node]["latlon"][0]) for node in road_graph.nodes()}) with open('assets/manhattan_demands_50.json','r') as f: raw_demands = json.loads(f.read()) #implement 1-NN, get relationship of source, sink -> source_node, sink_node station_to_node = {} for station, loc in raw_demands['stations'].iteritems(): node = get_neighbor(pos, (loc[1], loc[0]), 1)[0] station_to_node[station] = road_graph.nodes().index(node) #create demands list of tuples using the 1-NN relationship and the demands demands = {} for scenario in raw_demands['scenarios']: demands[scenario] = {} for demand in raw_demands['scenarios'][scenario]: src = station_to_node[demand[0]] snk = station_to_node[demand[1]] if src in demands[scenario]: if snk in demands[scenario][src]: demands[scenario][src] += demand[2] else: demands[scenario][src][snk] = demand[2] else: demands[scenario][src] = {} demands[scenario][src][snk] = demand[2] dems = [] for src, sinks in demands[scenario].iteritems(): for snk, d in sinks.iteritems(): dems.append((src,snk,d)) demands[scenario] = dems return road_graph, pos, demands[load]
def load_from_json(self, fileName='net.json'): start = timer() with open(fileName) as json_file: json_data = json.load(json_file) self.G = json_graph.node_link_graph(json_data) self.initialLoadComplete = True end = start - timer() log.info('time taken to load: {}'.format(end))
def calculate_average_degree(self): n = self.dbs.query(NetworkModel.graph_data) \ .filter(NetworkModel.site == self.site) \ .filter(NetworkModel.graph_type == self.graph_type) \ .one() g = json_graph.node_link_graph(json.loads(n[0]), directed=False, multigraph=False) return nx.degree_centrality(g)
def load_graph(path): try: json_data = open(path, 'r') image_graph = json_graph.node_link_graph(eval(json_data.read())) json_data.close() except FileNotFoundError: image_graph = networkx.Graph() return image_graph
def get_commit_tree_json(repo): import json from networkx.readwrite import json_graph with open('./data/' + repo + ':commits', 'r') as f: data = f.read() json_data = json.loads(data) return json_graph.node_link_graph(json_data)
def load(self,pathname): global igversion with open(pathname,"r") as f: try: self.G = json_graph.node_link_graph(json.load(f,encoding='utf-8'),multigraph=False,directed=True) except ValueError: self.G = json_graph.node_link_graph(json.load(f),multigraph=False,directed=True) if 'igversion' in self.G.graph: if self.G.graph['igversion'] != igversion: raise ValueError('Mismatched version. Graph needs to be upgraded to ' + igversion) self.G.graph['igversion'] = igversion if 'idcount' in self.G.graph: self.idc = self.G.graph['idcount'] elif self.G.has_node('idcount'): self.idc = self.G.node['idcount']['count'] self.G.graph['idcount']=self.idc self.G.remove_node('idcount') self.dir = os.path.abspath(os.path.split(pathname)[0])
def graph_from_file(path): """ Given a file path, makes a NetworkX graph from the JSON data and returns """ with open(path) as f: data = json.load(f) g = json_graph.node_link_graph(data) return g
def load(fname): with gzip.GzipFile(fname, 'r') as infile: data = json.loads(infile.read()) def cell_from_dict(d): return {"id": Cell(d["address"], None, value=d["value"], formula=d["formula"], is_named_range=d["is_named_range"])} nodes = map(cell_from_dict, data["nodes"]) data["nodes"] = nodes G = json_graph.node_link_graph(data) return Spreadsheet(G, G.nodes())
def validateExpDL(expdl={}, distributionPath=None): """ """ # Do not modify the input object expdl = copy.deepcopy(expdl) if not expdl: expdl = dict() if not isinstance(expdl, dict): raise TypeError("ExpDL should be a dictionary") expdl.setdefault('topoGraph', json_graph.node_link_data(testbed.getTopoGraph())) nodeList = json_graph.node_link_graph(expdl['topoGraph']).nodes() nodeList.sort() expdl['nodeList'] = nodeList expdl.setdefault('magiNodeList', nodeList) if distributionPath: expdl['distributionPath'] = distributionPath else: expdl.setdefault('distributionPath', DEFAULT_DIST_DIR) nodePaths = expdl.setdefault('nodePaths', dict()) nodeDir = nodePaths.setdefault('root', NODE_DIR) nodePaths.setdefault('config', os.path.join(nodeDir, 'config')) nodePaths.setdefault('logs', os.path.join(nodeDir, 'logs')) nodePaths.setdefault('db', os.path.join(nodeDir, 'db')) nodePaths.setdefault('temp', DEFAULT_TEMP_DIR) testbedPaths = expdl.setdefault('testbedPaths', dict()) testbedPaths['experimentDir'] = testbed.getExperimentDir() expdl.setdefault('aal', os.path.join(testbed.getExperimentDir(), "procedure.aal")) testbedClass = expdl.setdefault('testbedClass', testbed.getTestbedClassFQCN()) # In case the experimenter specifies a testbed type that is different from # the default type for the host machine, set the environment accordingly. if testbedClass != testbed.getTestbedClassFQCN(): try: testbed.setTestbedClass(testbedClass) except Exception: log.exception('Could not create instance of set testbed class') log.info('Setting default testbed class.') expdl['testbedClass'] = testbed.getTestbedClassFQCN() # Setting experiment name and project name # testbed.toControlPlaneNodeName() uses them from magi.testbed.emulab import EmulabTestbed if isinstance(testbed.getTestbedClassInstance(), EmulabTestbed): experimentName = expdl.setdefault('experimentName', testbed.getExperiment()) projectName = expdl.setdefault('projectName', testbed.getProject()) testbed.setEID(experiment=experimentName, project=projectName) return expdl
def __init__(self, *args, **kwargs): super(MRInfluence, self).__init__(*args, **kwargs) with open(self.options.graph_file, "r") as graph_data: graph_data = json.load(graph_data) self.graph = json_graph.node_link_graph(graph_data) self.k = int(self.options.num_init) self.t = int(self.options.periods) self.initial_nodes = list(np.random.choice(self.graph.nodes(),self.k))
def transform(self, data): """Transform.""" try: for serial_data in util.read(data): py_obj = json.loads(serial_data) graph = json_graph.node_link_graph(py_obj) yield graph except Exception as e: logger.debug('Failed iteration. Reason: %s' % e) logger.debug('Exception', exc_info=True)
def __init__(self, data_path): with open(data_path, 'r') as graph_file: graph_data = json.load(graph_file) self.graph = json_graph.node_link_graph(graph_data, multigraph=False) self.name_node_map = {node[1]['name']: node[0] for node in self.graph.nodes(data=True)} self.all_paths = networkx.all_pairs_dijkstra_path(self.graph) self.all_costs = networkx.all_pairs_dijkstra_path_length(self.graph) # add blank houses set to each node for node in self.graph.nodes(): self.graph.node[node]['houses'] = []
def perform_map_matching(self, road_network, trace, rank): if settings.DEBUG: log("Building road network graph...") graph = json_graph.node_link_graph(road_network["graph"]) shortest_path_index = road_network["shortest_path_index"] beta = self.hmm_prob_model(road_network, graph, shortest_path_index, trace, rank) if settings.DEBUG: log("Implementing viterbi algorithm...") chosen_index = self.hmm_viterbi_forward() sequence = self.hmm_viterbi_backward(road_network, graph, shortest_path_index, trace, chosen_index) return {'path': sequence[0], 'route': sequence[1], 'dist': sequence[2], 'path_index': sequence[3], 'emission_prob': self.emission_prob, 'transition_prob': self.transition_prob, 'candidate_rid': self.candidate_rid, 'confidence': sequence[4]}
def load_data(prefix, normalize=True, load_walks=False): G_data = json.load(open(prefix + "-G.json")) G = json_graph.node_link_graph(G_data) # nx.draw(G, pos=nx.spring_layout(G)) # plt.show() if isinstance(G.nodes()[0], int): conversion = lambda n: int(n) else: conversion = lambda n: n if os.path.exists(prefix + "-feats.npy"): feats = np.load(prefix + "-feats.npy") else: print("No features present.. Only identity features will be used.") feats = None id_map = json.load(open(prefix + "-id_map.json")) id_map = {conversion(k): int(v) for k, v in id_map.items()} walks = [] class_map = json.load(open(prefix + "-class_map.json")) if isinstance(list(class_map.values())[0], list): lab_conversion = lambda n: n else: lab_conversion = lambda n: int(n) class_map = { conversion(k): lab_conversion(v) for k, v in class_map.items() } ## Remove all nodes that do not have val/test annotations ## (necessary because of networkx weirdness with the Reddit data) broken_count = 0 for node in G.nodes(): if not 'val' in G.node[node] or not 'test' in G.node[node]: G.remove_node(node) broken_count += 1 print( "Removed {:d} nodes that lacked proper annotations due to networkx versioning issues" .format(broken_count)) ## Make sure the graph has edge train_removed annotations ## (some datasets might already have this..) print("Loaded data.. now preprocessing..") for edge in G.edges(): if (G.node[edge[0]]['val'] or G.node[edge[1]]['val'] or G.node[edge[0]]['test'] or G.node[edge[1]]['test']): G[edge[0]][edge[1]]['train_removed'] = True else: G[edge[0]][edge[1]]['train_removed'] = False if normalize and not feats is None: from sklearn.preprocessing import StandardScaler train_ids = np.array([ id_map[n] for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test'] ]) train_feats = feats[train_ids] scaler = StandardScaler() scaler.fit(train_feats) feats = scaler.transform(feats) if load_walks: with open(prefix + "-walks.txt") as fp: for line in fp: walks.append(map(conversion, line.split())) return G, feats, id_map, walks, class_map
def load_data(prefix): G_data = json.load(open(prefix + "-G.json")) G = json_graph.node_link_graph(G_data) id_map = json.load(open(prefix + "-id_map.json")) id_map = {int(k): int(v) for k, v in id_map.items()} return G, id_map
def load_graphsage_data(dataset_path, dataset_str, normalize=True): """Load GraphSAGE data.""" start_time = time.time() graph_json = json.load( gfile.Open('{}/{}/{}-G.json'.format(dataset_path, dataset_str, dataset_str))) graph_nx = json_graph.node_link_graph(graph_json) id_map = json.load( gfile.Open('{}/{}/{}-id_map.json'.format(dataset_path, dataset_str, dataset_str))) is_digit = list(id_map.keys())[0].isdigit() id_map = {(int(k) if is_digit else k): int(v) for k, v in id_map.items()} class_map = json.load( gfile.Open('{}/{}/{}-class_map.json'.format(dataset_path, dataset_str, dataset_str))) is_instance = isinstance(list(class_map.values())[0], list) class_map = {(int(k) if is_digit else k): (v if is_instance else int(v)) for k, v in class_map.items()} broken_count = 0 to_remove = [] for node in graph_nx.nodes(): if node not in id_map: to_remove.append(node) broken_count += 1 for node in to_remove: graph_nx.remove_node(node) tf.logging.info( 'Removed %d nodes that lacked proper annotations due to networkx versioning issues', broken_count) feats = np.load( gfile.Open( '{}/{}/{}-feats.npy'.format(dataset_path, dataset_str, dataset_str), 'rb')).astype(np.float32) tf.logging.info('Loaded data (%f seconds).. now preprocessing..', time.time() - start_time) start_time = time.time() edges = [] for edge in graph_nx.edges(): if edge[0] in id_map and edge[1] in id_map: edges.append((id_map[edge[0]], id_map[edge[1]])) num_data = len(id_map) val_data = np.array( [id_map[n] for n in graph_nx.nodes() if graph_nx.node[n]['val']], dtype=np.int32) test_data = np.array( [id_map[n] for n in graph_nx.nodes() if graph_nx.node[n]['test']], dtype=np.int32) is_train = np.ones((num_data), dtype=np.bool) is_train[val_data] = False is_train[test_data] = False train_data = np.array([n for n in range(num_data) if is_train[n]], dtype=np.int32) train_edges = [ (e[0], e[1]) for e in edges if is_train[e[0]] and is_train[e[1]] ] edges = np.array(edges, dtype=np.int32) train_edges = np.array(train_edges, dtype=np.int32) # Process labels if isinstance(list(class_map.values())[0], list): num_classes = len(list(class_map.values())[0]) labels = np.zeros((num_data, num_classes), dtype=np.float32) for k in class_map.keys(): labels[id_map[k], :] = np.array(class_map[k]) else: num_classes = len(set(class_map.values())) labels = np.zeros((num_data, num_classes), dtype=np.float32) for k in class_map.keys(): labels[id_map[k], class_map[k]] = 1 if normalize: train_ids = np.array([ id_map[n] for n in graph_nx.nodes() if not graph_nx.node[n]['val'] and not graph_nx.node[n]['test'] ]) train_feats = feats[train_ids] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) feats = scaler.transform(feats) def _construct_adj(edges): adj = sp.csr_matrix((np.ones( (edges.shape[0]), dtype=np.float32), (edges[:, 0], edges[:, 1])), shape=(num_data, num_data)) adj += adj.transpose() return adj train_adj = _construct_adj(train_edges) full_adj = _construct_adj(edges) train_feats = feats[train_data] test_feats = feats tf.logging.info('Data loaded, %f seconds.', time.time() - start_time) return num_data, train_adj, full_adj, feats, train_feats, test_feats, labels, train_data, val_data, test_data
def graph_from_json(f): with open(f, 'r') as infile: networkx_graph = json_graph.node_link_graph(json.load(infile)) return networkx_graph
def __load_graph(json_data): G = json_graph.node_link_graph(json_data, False, False, {'name':'pub_key', 'source':'node1_pub', 'target':'node2_pub', 'key':'channel_id', 'link':'edges'}) return G
def _load_G(self): G_data = json.load(open(os.path.join(self.data_dir, "G.json"))) self.G = json_graph.node_link_graph(G_data) if type(self.G.nodes()[0]) is int: mapping = {k: str(k) for k in self.G.nodes()} self.G = nx.relabel_nodes(self.G, mapping)
def read_json_file(self, graph_dir): with open(graph_dir, 'r') as f: j_graph = json.load(f) return json_graph.node_link_graph(j_graph)
def joins_for_table(self, table_name: str): # get the graph for networkx G = json_graph.node_link_graph(self.design["graph"]) return nx.shortest_path(G, source=self.base_table_name, target=table_name)
def load_data(self, directory): '''Loads data if already generated''' with open(directory + '/page_ranks.json') as rank_file: self.ranks = json.load(rank_file) with open(directory + '/citation_graph.json') as graph_file: self.graph = json_graph.node_link_graph(json.load(graph_file))
def structure_raw(self): if self.__cached_structure_raw is None: g = node_link_graph(self.data) g.__class__ = MoleculeContainer self.__cached_structure_raw = g return self.__cached_structure_raw
def from_json(cls, data): graph = node_link_graph(data[PYANNOTE_JSON_CONTENT]) mapping = {node: T(node) for node in graph} graph = nx.relabel_nodes(graph, mapping) return cls(graph=graph, **graph.graph)
flag = 0 N = 1000 P = 0.008008 n = 1000 m = 4 if flag == 0: print('generate a newG') #ER = nx.random_graphs.erdos_renyi_graph(N, P) ER = nx.random_graphs.barabasi_albert_graph(n, m) with open(filename, 'w', encoding='utf-8') as f: json.dump(json_graph.node_link_data(ER), f) else: print('load a goodG') f = open(filename, 'r', encoding='utf-8') nld = json.load(f) ER = json_graph.node_link_graph(nld) f.close() # 画图 print(f'ER-Graph: {n}, {m}, {len(ER.edges())}') ''' pos = nx.spring_layout(ER) nx.draw(ER, pos, with_labels=False, node_size=40) plt.show()''' ################################## # 删边策略1-边介数 G = nx.Graph(ER) sublist = [] len_of_mcc = [] edges = []
def load_graphs(dataset_str): node_labels = [None] edge_labels = [None] idx_train = [None] idx_val = [None] idx_test = [None] if dataset_str == 'grid': graphs = [] features = [] for _ in range(1): graph = nx.grid_2d_graph(20, 20) graph = nx.convert_node_labels_to_integers(graph) feature = np.identity(graph.number_of_nodes()) graphs.append(graph) features.append(feature) elif dataset_str == 'communities': graphs = [] features = [] node_labels = [] edge_labels = [] for i in range(1): community_size = 20 community_num = 20 p = 0.01 graph = nx.connected_caveman_graph(community_num, community_size) count = 0 for (u, v) in graph.edges(): if random.random() < p: # rewire the edge x = random.choice(list(graph.nodes)) if graph.has_edge(u, x): continue graph.remove_edge(u, v) graph.add_edge(u, x) count += 1 print('rewire:', count) n = graph.number_of_nodes() label = np.zeros((n, n), dtype=int) for u in list(graph.nodes): for v in list(graph.nodes): if u // community_size == v // community_size and u > v: label[u, v] = 1 rand_order = np.random.permutation(graph.number_of_nodes()) feature = np.identity(graph.number_of_nodes())[:, rand_order] graphs.append(graph) features.append(feature) edge_labels.append(label) elif dataset_str == 'protein': graphs_all, features_all, labels_all = Graph_load_batch( name='PROTEINS_full') features_all = (features_all - np.mean( features_all, axis=-1, keepdims=True)) / np.std( features_all, axis=-1, keepdims=True) graphs = [] features = [] edge_labels = [] for graph in graphs_all: n = graph.number_of_nodes() label = np.zeros((n, n), dtype=int) for i, u in enumerate(graph.nodes()): for j, v in enumerate(graph.nodes()): if labels_all[u - 1] == labels_all[v - 1] and u > v: label[i, j] = 1 if label.sum() > n * n / 4: continue graphs.append(graph) edge_labels.append(label) idx = [node - 1 for node in graph.nodes()] feature = features_all[idx, :] features.append(feature) print('final num', len(graphs)) elif dataset_str == 'email': with open('data/email.txt', 'rb') as f: graph = nx.read_edgelist(f) label_all = np.loadtxt('data/email_labels.txt') graph_label_all = label_all.copy() graph_label_all[:, 1] = graph_label_all[:, 1] // 6 for edge in list(graph.edges()): if graph_label_all[int(edge[0])][1] != graph_label_all[int( edge[1])][1]: graph.remove_edge(edge[0], edge[1]) comps = [ comp for comp in nx.connected_components(graph) if len(comp) > 10 ] graphs = [graph.subgraph(comp) for comp in comps] edge_labels = [] features = [] for g in graphs: n = g.number_of_nodes() feature = np.ones((n, 1)) features.append(feature) label = np.zeros((n, n), dtype=int) for i, u in enumerate(g.nodes()): for j, v in enumerate(g.nodes()): if label_all[int(u)][1] == label_all[int(v)][1] and i > j: label[i, j] = 1 label = label edge_labels.append(label) elif dataset_str == 'ppi': dataset_dir = 'data/ppi' print("Loading data...") G = json_graph.node_link_graph( json.load(open(dataset_dir + "/ppi-G.json"))) edge_labels_internal = json.load( open(dataset_dir + "/ppi-class_map.json")) edge_labels_internal = { int(i): l for i, l in edge_labels_internal.items() } train_ids = [n for n in G.nodes()] train_labels = np.array([edge_labels_internal[i] for i in train_ids]) if train_labels.ndim == 1: train_labels = np.expand_dims(train_labels, 1) print("Using only features..") feats = np.load(dataset_dir + "/ppi-feats.npy") # Logistic gets thrown off by big counts, so log transform num comments and score feats[:, 0] = np.log(feats[:, 0] + 1.0) feats[:, 1] = np.log(feats[:, 1] - min(np.min(feats[:, 1]), -1)) feat_id_map = json.load(open(dataset_dir + "/ppi-id_map.json")) feat_id_map = {int(id): val for id, val in feat_id_map.items()} train_feats = feats[[feat_id_map[id] for id in train_ids]] node_dict = {} for id, node in enumerate(G.nodes()): node_dict[node] = id comps = [comp for comp in nx.connected_components(G) if len(comp) > 10] graphs = [G.subgraph(comp) for comp in comps] id_all = [] for comp in comps: id_temp = [] for node in comp: id = node_dict[node] id_temp.append(id) id_all.append(np.array(id_temp)) features = [train_feats[id_temp, :] + 0.1 for id_temp in id_all] # elif dataset_str == 'brightkite': # dataset_dir = 'data/Brightkite' # print("Loading data...") # G = nx.read_edgelist( # open(dataset_dir + "/Brightkite_edges.txt", "rb")) # node_dict = {} # for id, node in enumerate(G.nodes()): # node_dict[node] = id # feature = [] # feature_id = [] # fp = open(dataset_dir + "/Brightkite_totalCheckins.txt", "r") # lines = fp.readlines() # for line in lines: # line = line.strip() # line = line.split() # if (len(line) < 5): # continue # feature_id.append(int(line[0])) # feature.append([time.mktime(time.strptime( # line[1], '%Y-%m-%dT%H:%M:%SZ')), float(line[2]), float(line[3])]) # # print(line) # feature = np.array(feature) # feature[:, 0] = np.log(feature[:, 0] + 1.0) # feature[:, 1] = np.log( # feature[:, 1] - min(np.min(feature[:, 1]), -1)+1) # feature[:, 2] = np.log( # feature[:, 2] - min(np.min(feature[:, 2]), -1)+1) # feature_map = {} # for i in range(len(feature_id)): # if (feature_id[i] not in feature_map): # feature_map[feature_id[i]] = [] # feature_map[feature_id[i]].append(feature[i]) # # print(feature_map) # feature_actual_map = {} # for k in feature_map: # feature_actual_map[k] = np.mean(feature_map[k], axis=0) # # print(feature_actual_map) # comps = [comp for comp in nx.connected_components(G) if len(comp) > 10] # graphs = [G.subgraph(comp) for comp in comps] # id_all = [] # features = [] # count = 0 # for comp in comps: # id_temp = [] # feat_temp = [] # for node in comp: # id = node_dict[node] # id_temp.append(id) # if (id not in feature_actual_map): # feat_temp.append([0.0, 0.0, 0.0]) # count = count+1 # else: # feat_temp.append(feature_actual_map[id]) # id_all.append(np.array(id_temp)) # features.append(np.array(feat_temp)) # print("Not found features of %d nodes" % {count}) else: raise NotImplementedError return graphs, features, edge_labels, node_labels, idx_train, idx_val, idx_test
def load_from_JSON(self, filename="temp.json"): with open(filename, 'r') as file: self.network = json_graph.node_link_graph(json.load(file))
def json_to_nxgraph(jsgraph): """Converts a json-like dictionary to a networkx graph.""" return json_graph.node_link_graph(jsgraph)
def read_from_file(self, filename=FILENAME): """Reads graph from JSON file in data link format""" with open(filename, "r") as f: dat = json.load(f) self.graph = json_graph.node_link_graph(dat)
def load(fileName): with open(fileName+'.json') as json_file: topology = json.load(json_file) F = json_graph.node_link_graph(topology) return F
def __init__(self, ): graph_data = self._load_graph_data() self.graph = (json_graph.node_link_graph(graph_data) if graph_data else nx.Graph())
pairs = [] for count, node in enumerate(nodes): if G.degree(node) == 0: continue for i in range(num_walks): curr_node = node for j in range(WALK_LEN): next_node = random.choice(G.neighbors(curr_node)) # self co-occurrences are useless if curr_node != node: pairs.append((node, curr_node)) curr_node = next_node if count % 1000 == 0: print("Done walks for", count, "nodes") return pairs if __name__ == "__main__": """ Run random walks """ graph_file = sys.argv[1] out_file = sys.argv[2] G_data = json.load(open(graph_file)) G = json_graph.node_link_graph(G_data) nodes = [ n for n in G.nodes() if not G.node[n]["val"] and not G.node[n]["test"] ] G = G.subgraph(nodes) pairs = run_random_walks(G, nodes) with open(out_file, "w") as fp: fp.write("\n".join([str(p[0]) + "\t" + str(p[1]) for p in pairs]))
def read_json_file(filename): with open(filename) as f: js_graph = json.load(f) return json_graph.node_link_graph(js_graph)
def find_GPSA(dir_raw, GP_dict, soln, dnR, fuel_comp, n_break=0): GP_name = GP_dict['name'] traced = GP_dict['traced'] dir_save = os.path.join(dir_raw, 'GPSA', traced) if not os.path.exists(dir_save): os.makedirs(dir_save) path_save = os.path.join(dir_save, shorten_GP_name(GP_name) + '.json') # ===================================== # if previously computed, return False GPSA_all = dict() if os.path.exists(path_save): GPSA_all = json.load(open(path_save, 'r')) if GP_name in GPSA_all.keys(): return False # ===================================== # if not loaded, compute these results print 'computing GPSA for ' + GP_name traced = GP_dict['traced'] GP_member = GP_dict['member'] dir_graph = os.path.join(dir_raw, 'graph') path_raw = os.path.join(dir_raw, 'raw.npz') raw = load_raw(path_raw) rr_mat = raw['net_reaction_rate'] GPSA = dict() GPSA['R_GP'] = [ ] # net radical production rate associated with a Global Pathway (GP) GPSA['Q_GP'] = [] # net heat release rate associated with a GP GPSA['D_GP'] = [] # dominancy of a GP GPSA['R_ij'] = dict( ) # net radical production rate associated with a conversion step (from the i-th species to the j-th species) GPSA['Q_ij'] = dict() GPSA['a_iji'] = dict() #GPSA['perc_ij'] = dict() for i in range(len(GP_member) - 1): edge = find_edge_name(GP_member, i) GPSA['R_ij'][edge] = dict() GPSA['R_ij'][edge]['member'] = [] GPSA['R_ij'][edge]['net'] = [] GPSA['Q_ij'][edge] = dict() GPSA['Q_ij'][edge]['member'] = [] GPSA['Q_ij'][edge]['net'] = [] GPSA['a_iji'][edge] = dict() GPSA['a_iji'][edge]['member'] = [] GPSA['a_iji'][edge]['net'] = [] #GPSA['perc_ij'][edge] = dict() #GPSA['perc_ij'][edge]['member'] = [] #GPSA['perc_ij'][edge]['net'] = [] source = GP_dict['member'][0] traced = GP_dict['traced'] if source not in fuel_comp.keys(): perc_from_source = 0.0 else: total_atom = 0 for k in fuel_comp.keys(): sp = soln.species(k) atom = 0.0 if traced in sp.composition.keys(): atom += fuel_comp[k] * sp.composition[traced] if source == k: atom_source = atom total_atom += atom perc_from_source = 1.0 * atom_source / total_atom #print 'total '+traced+' atoms for '+str(fuel_comp)+' is '+str(total_atom) #print 'source '+str(source)+' has '+str(atom_source)+' atms' #print 'so perc_from_source = '+str(perc_from_source) #GPSA['perc_ij']['from_source'] = perc_from_source # for each point ----------- i_pnt = 0 while True: path_graph = os.path.join(dir_graph, traced + '_' + str(i_pnt) + '.json') if not os.path.exists(path_graph): if i_pnt > n_break: print 'break as cannot find: ' + str(path_graph) break else: # fill this with None --------------- if i_pnt % 10 == 0: print ' fill None GPSA for ' + str(path_graph) for i in range(len(GP_member) - 1): s = GP_member[i] t = GP_member[i + 1] edge = find_edge_name(GP_member, i) for k in ['member', 'net']: GPSA['R_ij'][edge][k].append(None) GPSA['Q_ij'][edge][k].append(None) GPSA['a_iji'][edge][k].append(None) #GPSA['perc_ij'][edge][k].append(None) GPSA['R_GP'].append(None) GPSA['D_GP'].append(None) i_pnt += 1 continue soln = raw2soln(soln, raw, i_pnt) if i_pnt % 10 == 0: print ' finding GPSA for ' + str(path_graph) # fill this with real value --------------- #norm_Rpro = 0.0 #norm_Rcon = 0.0 for id_rxn in range(soln.n_reactions): dR = dnR[id_rxn] * rr_mat[i_pnt, id_rxn] #norm_Rpro += max(0, dR) #norm_Rcon += max(0, -dR) flux_graph = json_graph.node_link_graph( json.load(open(path_graph, 'r'))) out_deg = flux_graph.out_degree(weight='flux') norm_out_deg = sum( [out_deg[m] for m in fuel_comp.keys() if m in out_deg.keys()]) flux = [] rxn_involved = [] sum_OMEGA_R = 0 sum_OMEGA_Q = 0 perc_ij_list = [] # for each edge (conversion step) ----------------- for i in range(len(GP_member) - 1): s = GP_member[i] t = GP_member[i + 1] edge = find_edge_name(GP_member, i) GPSA['R_ij'][edge]['member'].append(dict()) GPSA['Q_ij'][edge]['member'].append(dict()) GPSA['a_iji'][edge]['member'].append(dict()) #GPSA['perc_ij'][edge]['member'].append(dict()) # ------------------------------ try: st = flux_graph[s][t] except KeyError: st = None perc_ij = None if st is not None: flux.append(st['flux']) perc_ij = 1.0 * st['flux'] / out_deg[s] perc_ij_list.append(perc_ij) for id_rxn_s in st['member'].keys(): id_rxn = int(id_rxn_s) rxn = soln.reaction_equation(id_rxn) rr = rr_mat[i_pnt, id_rxn] if rr < 0: sign_rxn = -id_rxn else: sign_rxn = id_rxn GPSA['a_iji'][edge]['member'][i_pnt][sign_rxn] = st[ 'member'][id_rxn_s] #GPSA['perc_ij'][edge]['member'][i_pnt][sign_rxn] = 1.0 * st['member'][id_rxn_s]/out_deg[s] OMEGA_R = float(rr * dnR[id_rxn]) GPSA['R_ij'][edge]['member'][i_pnt][sign_rxn] = OMEGA_R OMEGA_Q = float(rr * soln.delta_enthalpy[id_rxn]) GPSA['Q_ij'][edge]['member'][i_pnt][sign_rxn] = OMEGA_Q if id_rxn not in rxn_involved: sum_OMEGA_R += OMEGA_R sum_OMEGA_Q += OMEGA_Q rxn_involved.append(id_rxn) # ------------------------------ try: ts = flux_graph[t][s] except KeyError: ts = None if ts is not None: for id_rxn_s in ts['member'].keys(): id_rxn = int(id_rxn_s) rxn = soln.reaction_equation(id_rxn) rr = rr_mat[i_pnt, id_rxn] if rr < 0: sign_rxn = -id_rxn else: sign_rxn = id_rxn GPSA['a_iji'][edge]['member'][i_pnt][ sign_rxn] = -ts['member'][id_rxn_s] # ------------------------------ GPSA['R_ij'][edge]['net'].append( sum(GPSA['R_ij'][edge]['member'][i_pnt].values())) GPSA['Q_ij'][edge]['net'].append( sum(GPSA['Q_ij'][edge]['member'][i_pnt].values())) GPSA['a_iji'][edge]['net'].append( sum(GPSA['a_iji'][edge]['member'][i_pnt].values())) #GPSA['perc_ij'][edge]['net'].append(perc_ij) domi_perc = gmean(perc_ij_list) * perc_from_source if bool(flux): min_flux = min(flux) if norm_out_deg > 0: domi_flux = 1.0 * min_flux / norm_out_deg else: domi_flux = float('nan') else: min_flux = 0.0 domi_flux = 0.0 R_GP = domi_perc * sum_OMEGA_R Q_GP = domi_perc * sum_OMEGA_Q GPSA['R_GP'].append(R_GP) GPSA['Q_GP'].append(Q_GP) GPSA['D_GP'].append(domi_perc) i_pnt += 1 GPSA_all[GP_name] = GPSA json.dump(GPSA_all, open(path_save, 'w')) return True
def process_span(span,span_done,log_messages): def log(m): if CONFIG["process_verbose"] or CONFIG["report_verbose"] : log_messages.put("%s: %s"%(span,m)) # data to be reported after processing span_info={"span":span} log("starting") g=networkx.Graph() if CONFIG["export_ref_format"] =="gexf": if CONFIG["process_verbose"] : log("read gexf") g=networkx.read_gexf(os.path.join(CONFIG["parsed_data"],span,"%s.gexf"%span),node_type=unicode) elif CONFIG["export_ref_format"] == "edgelist": if CONFIG["process_verbose"] : log("read csv export") g=networkx.read_weighted_edgelist(os.path.join(CONFIG["parsed_data"],span,"%s.csv"%span),delimiter="\t") elif CONFIG["export_ref_format"] == "pajek": if CONFIG["process_verbose"] : log("read pajek export") g=networkx.read_pajek(os.path.join(CONFIG["parsed_data"],span,"%s.csv"%span)) elif CONFIG["export_ref_format"] == "json": if CONFIG["process_verbose"] : log("read pajek export") data=json.load(open(os.path.join(CONFIG["parsed_data"],span,"%s.json"%span),"r"),encoding="UTF-8") g=json_graph.node_link_graph(data) else: log("no export compatible export format specified") exit(1) network_references=g.nodes() nb_network_references=len(network_references) log("loaded %s ref from graph"%nb_network_references) span_info["references_occ_filtered"]=nb_network_references with codecs.open(os.path.join(CONFIG["parsed_data"],span,"references.dat"),"r",encoding="UTF-8") as file: # dat file have one trailing blank line at end of file data_lines=file.read().split("\n")[:-1] references_by_articles = [(l.split("\t")[0],",".join(l.split("\t")[1:])) for l in data_lines] #references_by_articles_filtered=[(a,r) for a,r in references_by_articles if r in references] references_by_articles.sort(key=lambda e:e[1]) article_groupby_reference=[(reference,list(ref_arts)) for reference,ref_arts in itertools.groupby(references_by_articles,key=lambda e:e[1])] span_info["nb_reference_before_filtering"]=len(article_groupby_reference) references_article_grouped=[t for t in article_groupby_reference if len(t[1])>=CONFIG["spans"][span]["references"]["occ"]] del article_groupby_reference del references_by_articles #make sure we have same references than network ref_filtered=[r for r,_ in references_article_grouped] if(len(ref_filtered))!=nb_network_references: s1=set(ref_filtered) s2=set(network_references) to_remove = s1 - s2 if len(to_remove)>0: log("filtering ref which are not in original network : removing %s ref"%len(to_remove)) references_article_grouped=[ (r,ref_arts) for r,ref_arts in references_article_grouped if r not in to_remove] del s1 del s2 del ref_filtered del network_references # print references_article_grouped log("imported, filtered and grouped references by articles") span_info["subjects_occ_filtered"]=add_annotations(span,"subjects",references_article_grouped,g,log) span_info["authors_occ_filtered"]=add_annotations(span,"authors",references_article_grouped,g,log) span_info["institutions_occ_filtered"]=add_annotations(span,"institutions",references_article_grouped,g,log) span_info["article_keywords_occ_filtered"]=add_annotations(span,"article_keywords",references_article_grouped,g,log) span_info["title_keywords_occ_filtered"]=add_annotations(span,"title_keywords",references_article_grouped,g,log) span_info["isi_keywords_occ_filtered"]=add_annotations(span,"isi_keywords",references_article_grouped,g,log) span_info["countries_occ_filtered"]=add_annotations(span,"countries",references_article_grouped,g,log) del references_article_grouped log("have now %s nodes"%len(g.nodes())) if not os.path.exists(CONFIG["output_directory"]): os.mkdir(CONFIG["output_directory"]) if CONFIG["export_ref_annotated_format"] =="gexf": log("write gexf export") networkx.write_gexf(g,os.path.join(CONFIG["output_directory"],"%s_annotated.gexf"%span)) elif CONFIG["export_ref_annotated_format"] == "edgelist": log("write csv export") networkx.write_weighted_edgelist(g,os.path.join(CONFIG["output_directory"],"%s_annotated.csv"%span),delimiter="\t") elif CONFIG["export_ref_annotated_format"] == "pajek": log("write pajek export") networkx.write_pajek(g,os.path.join(CONFIG["output_directory"],"%s_annotated.net"%span)) elif CONFIG["export_ref_annotated_format"] == "graphml": log("write pajek export") networkx.write_graphml(g,os.path.join(CONFIG["output_directory"],"%s_annotated.graphml"%span)) else: log("no compatible export format specified") with codecs.open(os.path.join(CONFIG["parsed_data"],span,"articles.dat"),"r",encoding="UTF-8") as articles_file: nb_articles=len(articles_file.read().split("\n")[:-1]) span_info["nb_articles"]=nb_articles span_done.put(span_info) del g
def structure(self): molcont = node_link_graph(self.data) molcont.__class__ = MoleculeContainer return molcont
def process_graph(content, model, search_value, n_clicks, search_type, filepath, G1, pos1, G2, pos2): """ Update/rebuild the graph when the user picks a new file or searches something. Stores the graph and its nodes positions in an intermediary div (replace div with dcc.store). This little maneuver greatly improves run-time. Arguments: content -- [The content of the uploaded file] search_value -- [The value searched by the user: nodes/paths/similarity] n_clicks -- [Number of times the path-button was clicked] model -- [Whether the user wants to perform the first 4 searches on the first or second model] filepath -- [Contains the file extension. Used to differentiate .txt from .p files] G1 -- [The first graph in json format] pos1 -- [The corresponding position of nodes in json format] G2 -- [The second graph in json format] pos2 -- [The corresponding position of nodes in json format] """ ctx = dash.callback_context component_name = ctx.triggered[0]['prop_id'].split('.')[0] if component_name == 'upload-data': # Get content & decode content = content.split(',')[1] decoded_content = base64.b64decode(content).decode('utf-8') file_extension = filepath.split(".")[1] # Build new graph nodes, edges = process_file(decoded_content, file_extension) G = build_graph(nodes, edges) pos = nx.nx_pydot.graphviz_layout(G) graph, _ = visualize_graph(G, pos) if model == 'model1': G1, pos1, graph1 = G, pos, graph G2, pos2, graph2 = try_get_other_graph(G2, pos2, fig2) else: G2, pos2, graph2 = G, pos, graph G1, pos1, graph1 = try_get_other_graph(G1, pos1, fig1) return graph1, graph2, json.dumps(node_link_data(G1)), json.dumps(pos1), json.dumps(node_link_data(G2)), json.dumps(pos2), {'display': 'none'}, '' elif component_name != 'model_selector': if model == 'model1': try: # Will be used for other searches G = node_link_graph(json.loads(G1)) pos = json.loads(pos1) except (TypeError, UnboundLocalError): raise dash.exceptions.PreventUpdate else: # Needed when both graphs are utilized for similarity G1, pos1, graph1 = G, pos, fig1 G2, pos2, graph2 = try_get_other_graph(G2, pos2, fig2) else: try: G = node_link_graph(json.loads(G2)) pos = json.loads(pos2) except (TypeError, UnboundLocalError): raise dash.exceptions.PreventUpdate else: G2, pos2, graph2 = G, pos, fig2 G1, pos1, graph1 = try_get_other_graph(G1, pos1, fig1) error = '' if component_name == 'input': if (search_type == 'word1,n,thld') & (pos1 is not None) & (pos2 is not None) & (NLP_MODEL is not None): # search using both graphs. Using 4-th search as proxy graph1, error1 = visualize_graph(G1, pos1, search_value, 'word,n,thld') graph2, error2 = visualize_graph(G2, pos2, search_value, 'word,n,thld') error = html.P([error1, html.Br(), error2]) else: # Other searches graph, error = visualize_graph(G, pos, search_value, search_type) elif (component_name == 'next-path-btn'): if n_clicks > 0: # Display other paths highlighted = get_clicked_path(n_clicks) graph, error = visualize_graph(G, pos, '', '', highlighted) else: raise dash.exceptions.PreventUpdate if len(GLOBAL_PATHS) > 1: button_display = {'text-align': 'center', 'display': 'inline-block'} else: button_display = {'display':'none'} if search_type != 'word1,n,thld': # Update values as a search other than the two graph-similarity has been performed if model == 'model1': G1, pos1, graph1 = G, pos, graph else: G2, pos2, graph2 = G, pos, graph return graph1, graph2, json.dumps(node_link_data(G1)), json.dumps(pos1), json.dumps(node_link_data(G2)), json.dumps(pos2), button_display, error else: raise dash.exceptions.PreventUpdate
def main(args): args.input += "/" + args.prefix G_data = json.load(open(args.input + "-G.json")) G = json_graph.node_link_graph(G_data) print(nx.info(G)) H = G.copy() G1, G2 = create_subnet(H, args.alpha_s, args.alpha_c) data1 = json_graph.node_link_data(G1) data2 = json_graph.node_link_data(G2) s1 = json.dumps(data1, indent=4, sort_keys=True) s2 = json.dumps(data2, indent=4, sort_keys=True) print("About G1") print(nx.info(G1)) print("About G2") print(nx.info(G2)) args.output1 += "/sourceclone,alpha_c={0},alpha_s={1}".format( args.alpha_c, args.alpha_s) args.output2 += "/targetclone,alpha_c={0},alpha_s={1}".format( args.alpha_c, args.alpha_s) if not os.path.isdir(args.output2 + '/edgelist'): os.makedirs(args.output2 + '/edgelist') os.makedirs(args.output2 + '/graphsage') os.makedirs(args.output2 + '/dictionaries') if not os.path.isdir(args.output1 + '/edgelist'): os.makedirs(args.output1 + '/edgelist') os.makedirs(args.output1 + '/graphsage') os.makedirs(args.output1 + '/dictionaries') edgelist_dir1 = args.output1 + "/edgelist/" + args.prefix + ".edgelist" edgelist_dir2 = args.output2 + "/edgelist/" + args.prefix + ".edgelist" if not os.path.isdir(args.output1): os.makedirs(args.output1) if not os.path.isdir(args.output2): os.makedirs(args.output2) nx.write_edgelist(G1, path=edgelist_dir1, delimiter=" ", data=['weight']) nx.write_edgelist(G2, path=edgelist_dir2, delimiter=" ", data=['weight']) args.output1 += "/graphsage/" + args.prefix args.output2 += "/graphsage/" + args.prefix with open(args.output1 + "-G.json", 'w') as f: f.write(s1) f.close() with open(args.output2 + "-G.json", 'w') as f: f.write(s2) f.close() copyfile(args.input + "-id_map.json", args.output1 + "-id_map.json") copyfile(args.input + "-id_map.json", args.output2 + "-id_map.json") if os.path.exists(args.input + "-class_map.json"): copyfile(args.input + "-class_map.json", args.output1 + "-class_map.json") copyfile(args.input + "-class_map.json", args.output2 + "-class_map.json") if os.path.exists(args.input + "-feats.npy"): copyfile(args.input + "-feats.npy", args.output1 + "-feats.npy") copyfile(args.input + "-feats.npy", args.output2 + "-feats.npy") if os.path.exists(args.input + "-walks.txt"): copyfile(args.input + "-walks.txt", args.output1 + "-walks.txt") copyfile(args.input + "-walks.txt", args.output2 + "-walks.txt")
def read_json_file(self, filename): with open(filename) as f: js_graph = json.load(f, encoding='utf-8') return json_graph.node_link_graph(js_graph)
def json_network_to_networkx(graph_json): graph = json_graph.node_link_graph(graph_json) return graph
width=1.0, alpha=1, node_size=node_size) nx.draw_networkx_edges( G, pos, edgelist=near_edges, width=1, alpha=0.3, edge_color="r", node_size=node_size, ) nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels) # plt.xlim(0 - axis * 0.1, axis * 1.1) # plt.ylim(0 - axis * 0.1, axis * 1.1) # plt.xticks(np.arange(0 - axis * 0.1, axis * 1.1, step=1)) # plt.yticks(np.arange(0 - axis * 0.1, axis * 1.1, step=1)) if grid: plt.grid() plt.show() if __name__ == "__main__": for entry in tqdm( os.scandir("data/filtered/final_data/zebra-cat-computer/1")): if entry.name.endswith("json"): with open(entry, "r") as f: graph = json_graph.node_link_graph(json.loads(f.read())) render_graph(graph) break
# log.fit(train_embeds, train_labels) # print("F1 score:", f1_score(test_labels, log.predict(test_embeds), average="micro")) # print("Random baseline f1 score:", f1_score(test_labels, dummy.predict(test_embeds), average="micro")) if __name__ == '__main__': # parser = ArgumentParser("Run evaluation on citation data.") # parser.add_argument("dataset_dir", help="Path to directory containing the dataset.") # parser.add_argument("embed_dir", help="Path to directory containing the learned node embeddings.") # parser.add_argument("setting", help="Either val or test.") # args = parser.parse_args() dataset_dir = "../" # args.dataset_dir data_dir = "feat" # "unsup-../graphsage_mean_small_0.000010" or "feat" setting = "test" # args.setting print("Loading data...") G = json_graph.node_link_graph(json.load(open(dataset_dir + "/acm-G.json"))) train_ids = [n for n in G.nodes() if not G.node[n]['val'] and not G.node[n]['test']] test_ids = [n for n in G.nodes() if G.node[n][setting]] test_labels = get_class_labels(G, test_ids) train_labels = get_class_labels(G, train_ids) if data_dir == "feat": print("Using only features..") feats = np.load(dataset_dir + "/acm-feats.npy") feat_id_map = json.load(open(dataset_dir + "/acm-id_map.json")) feat_id_map = {int(id):val for id,val in feat_id_map.iteritems()} train_feats = feats[[feat_id_map[id] for id in train_ids]] test_feats = feats[[feat_id_map[id] for id in test_ids]] print("Running regression..") run_regression(train_feats, train_labels, test_feats, test_labels)
def mp_pool_format(G_data, graph_dir, mc_iter): return create_num_MC_sim_copies(json_graph.node_link_graph(G_data), graph_dir, mc_iter)