def tuples_to_graph(tuples): G = Graph() for node, attribute in tuples: print 'adding', node, attribute G.add_nodes_from(node, freq=attribute) G.add_edges_from(to_edges(node)) return G
def generate_small_world_graph(self): max_edges = self.NODE_COUNT*(self.NODE_COUNT-1)/2 if self.EDGE_COUNT > max_edges: return complete_graph(self.NODE_COUNT) graph = Graph() graph.add_nodes_from(range(self.NODE_COUNT)) edges = performer.edge_indices.flatten() probabilities = performer.probabilities.flatten() for trial in range(len(edges)-9): edge_index = numpy.random.choice(edges, p=probabilities) source, destination = self.edge_nodes(edge_index) graph.add_edge(source, destination, length = self.link_length(source, destination), weight = self.edge_weight(source, destination)) probabilities[edge_index] = 0 probabilities /= sum(probabilities) if max(graph.degree().values()) > self.DEGREE_MAX: graph.remove_edge(source, destination) if graph.number_of_edges() > self.EDGE_COUNT: victim = random.choice(graph.edges()) graph.remove_edge(victim[0], victim[1]) if self.constraints_satisfied(graph): print 'performer.generate_small_world_graph:', print self.BENCHMARK, self.NODE_COUNT, self.EDGE_COUNT, trial self.process_graph(graph) return graph
def convert_local_tree_topology_to_graph(loc_tree_topo, tree_node_labeling): """ Creates a directed, acyclic NetworkX graph from a local tree topology Parameters ---------- loc_tree_topo: array-like The local tree toplogy, where the root node element is -1 tree_node_labeling: array-like The integer ids for each tree node Returns ------- G : NetworkX graph """ assert( loc_tree_topo[0] == -1 ) G = Graph() G.add_nodes_from( tree_node_labeling ) # build up graph connectivity con = vstack( (loc_tree_topo, range(len(loc_tree_topo))) ) # prune root node connectivity con = con[:,1:] # update with correct labels con = tree_node_labeling[con] G.add_edges_from( zip(con[0,:], con[1,:]) ) return G
def fuzz_network(G_orig, threshold, b, edge_frac=1.0, nonedge_mult=5.0): G = G_orig.copy() n = len(G.nodes()) H = Graph() H.add_nodes_from(range(n)) pairs = n * (n - 1) / 2 actual_edges = len(G.edges()) edges = int(edge_frac * actual_edges) nonedges = int(edges * nonedge_mult) a = b / nonedge_mult # though these distributions are normalized to one, by selecting the appropriate number of edges # and nonedges, we make these 'distributions' correct edge_probs = np.random.beta(a + 1, b, edges) nonedge_probs = np.random.beta(a, b + 1, nonedges) # picking the right number of edges from the appropriate list edge_list = G.edges() nonedge_list = list(non_edges(G)) shuffle(edge_list) shuffle(nonedge_list) for i in range(len(edge_probs)): G[edge_list[i][0]][edge_list[i][1]]["weight"] = edge_probs[i] if edge_probs[i] > threshold: H.add_edge(edge_list[i][0], edge_list[i][1]) for i in range(len(nonedge_probs)): G.add_edge(nonedge_list[i][0], nonedge_list[i][1], weight=nonedge_probs[i]) if nonedge_probs[i] > threshold: H.add_edge(nonedge_list[i][0], nonedge_list[i][1]) return G, H
def make_graph(points, neighbor_max_dist=0.01): graph = Graph() graph.add_nodes_from(range(len(points))) for i in xrange(len(points)): for j in xrange(i+1, len(points)): if euclidian_3d_dist(points[i], points[j])<neighbor_max_dist: graph.add_edge(i,j) return graph
def eliminate_node(G, a): fillins = () nb = frozenset(G.neighbors(a)) for u in nb: for v in nb - frozenset((u,)): if not G.has_edge(v, u) and frozenset((u, v)) not in fillins: fillins += (frozenset((u, v)),) kill_edges = frozenset([(u, a) for u in nb] + [(a, u) for u in nb]) H = Graph() H.add_nodes_from(list(frozenset(G.nodes()) - frozenset((a,)))) H.add_edges_from(list((frozenset(G.edges()) - kill_edges) | frozenset(fillins))) return H
def __init__(self, mol, eps): G = Graph() G.add_nodes_from(a.GetIdx() for a in mol.GetAtoms()) for bond in mol.GetBonds(): a = bond.GetBeginAtom() b = bond.GetEndAtom() w = a.GetDegree() * b.GetDegree() G.add_edge(a.GetIdx(), b.GetIdx(), weight=w) self.G = G self.lim = int(1.0 / (eps ** 2))
def _build_authors_graph(self): """ Build authors graph with each author name as nodes and the collaboration between them as edges. @author 1: CipherHat @rtype: networkx.Graph() @return: the Graph containing nodes and edges """ all_data = self.get_network_data() # TODO refactor: revision on this part. whether to move the Graph code to its own class graph = Graph() # the nodes format will be {"id":int, "name":str} graph.add_nodes_from([(i, {"name": all_data[0][i][0]}) for i in range(len(all_data[0]))]) graph.add_edges_from(all_data[1]) return graph
def get_coauthor_graph_by_author_name(self, name): coauthors = set() for p in self.publications: for a in p.authors: if a == self.author_idx[name]: for a2 in p.authors: if a != a2: coauthors.add(a2) graph = Graph() # the nodes format will be {"id":int, "name":str} graph.add_node(self.author_idx[name], name = name) # graph.add_nodes_from([(i, {"name": all_data[0][i][0]}) for i in range(len(all_data[0]))]) graph.add_nodes_from([(ca, {"name": self.authors[ca].name}) for ca in coauthors]) graph.add_edges_from([(self.author_idx[name], ca) for ca in coauthors]) return graph
def calculate(self, P): C = self._prop.carbon G = Graph() G.add_nodes_from(a.GetIdx() for a in self.mol.GetAtoms()) for bond in self.mol.GetBonds(): i = bond.GetBeginAtomIdx() j = bond.GetEndAtomIdx() pi = bond.GetBondTypeAsDouble() with self.rethrow_zerodiv(): w = (C * C) / (P[i] * P[j] * pi) G.add_edge(i, j, weight=w) sp = floyd_warshall_numpy(G) np.fill_diagonal(sp, [1. - C / P[a.GetIdx()] for a in self.mol.GetAtoms()]) return sp
def merge_slices_to_events(self, current_slices): """ Method merges DBSCAN-generated event slices with previously found events. Bimodal network is used to find connections between events and slices, then slices are being merged with events, or transformed to new ones. Merged events are being deleted. Args: current_slices (Dict(List[Dict])): output of self.current_datapoints_dbscan method. Every item of dict is a slice cluster: list with dicts of messages from that cluster. """ slices_ids = set(current_slices.keys()) events_ids = set(self.events.keys()) edges = [] for slice_id, event_slice in current_slices.items(): slice_ids = {x['id'] for x in event_slice} for event in self.events.values(): if event.is_successor(slice_ids): edges.append((slice_id, event.id)) G = Graph() G.add_nodes_from(slices_ids.union(events_ids)) G.add_edges_from(edges) events_to_delete = [] for cluster in [x for x in connected_components(G) if x.intersection(slices_ids)]: unify_slices = cluster.intersection(slices_ids) unify_events = list(cluster.intersection(events_ids)) meta_slice = [msg for i in unify_slices for msg in current_slices[i]] if not unify_events: new_event = Event(self.mysql, self.redis, self.tokenizer, self.morph, self.classifier, meta_slice) self.events[new_event.id] = new_event elif len(unify_events) == 1 and len(unify_slices) == 1 and set(self.events[unify_events[0]].messages.keys()) == {x['id'] for x in meta_slice}: continue else: if len(unify_events) > 1: for ancestor in unify_events[1:]: self.events[unify_events[0]].merge(self.events[ancestor]) events_to_delete.append(ancestor) self.events[unify_events[0]].add_slice(meta_slice) for event in events_to_delete: del self.events[event] self.redis.delete("event:{}".format(event))
def maotree_old(g, m): from networkx import Graph, connected_components if len(m) == 0: return None T = Tree(None, []) # node -> index in mao o = dict((v,i) for i,v in enumerate(m)) # list of edges (u,v) with o[u] <= o[v] e = [(u,v) if o[u] <= o[v] else (v,u) for u,v in g.edges()] # we sort e w.r.t. to o such that we can disregard the entire prefix # up to the first pair (u,v) with o[u] >= o[current node] e.sort(key=lambda (u,v): (o[u], o[v])) # todo is a tuple of the current tree node, # the remaining mao to process and # the offset of the edges to be considered in the # edge list e todo = [(T, m, 0)] while len(todo): t, m, i = todo.pop() # x = m.pop(0) x = m[0] t.tag = x if len(m) <= 1: continue while i < len(e) and o[e[i][0]] <= o[x]: i = i+1 g_ = Graph() for (u,v) in e[i:]: g_.add_edge(u,v) g_.add_nodes_from(m[1:]) cs = connected_components(g_) for c in cs: c.sort(key=o.get) t.children = [Tree(None, []) for c in cs] todo.extend(zip(t.children, cs, (i for c in cs))) return T
class FrameworkFeatureAnalyzer(object): """ A class to do feature location analyses on a project written in a specific framework Project Graph Details: ----------------------- Node Groups: 1: Android package 2: - 3: Android imported indentifier 4: Java class 5: Java method 6: XML file Category 7: XML file Edge Groups: 1: internal/hierarchical links 2: Java---Android mappings 3: Java---XML mappings """ def __init__(self, framework, project): """ :param inspector.models.base.Project project: the project to be analyzed """ self.project = project self.framework_namespace = str(framework) self.graph = Graph() self.graph.add_node(self.framework_namespace) self.import_usages = [] def add_source_file(self, source_file): """ :param inspector.models.base.SourceFile source_file: the file """ self.analyze_framework_imports(source_file) self.analyze_source(source_file) def analyze_framework_imports(self, source_file): """ :param inspector.models.base.SourceFile source_file: the file """ for im in source_file.imports: if im.import_str.startswith(self.framework_namespace): self.import_usages.append((im, im.find_usages())) components = im.import_str.split('.') data = {'group': 1} if re.match(r'^[A-Z]+(_[A-Z]+)*$', components[-1]): data['group'] = 3 last = None for i in range(len(components)): cn = '.'.join(components[:i + 1]) self.graph.add_node(cn, **data) if last: self.graph.add_edge(last, cn, weight=1, group=1) last = cn if last: data['group'] = 3 self.graph.add_node(last, **data) def analyze_source(self, source_file): """ :param inspector.models.base.SourceFile source_file: the file """ for cl in source_file.classes: self.graph.add_node(cl.name, group=4) for fu in cl.methods: # print '[{0}-{1}]'.format(fu.starting_line, fu.ending_line), re.sub('\s*\n\s*', ' ', unicode(fu)) fn = fu.qualified_name self.graph.add_node(fn, group=5) self.graph.add_edge(cl.name, fn, weight=1, group=1) for im, usages in self.import_usages: w = 0 for ln in usages: if fu.starting_line <= ln <= fu.ending_line: w += 1 if w: self.graph.add_edge(im.import_str, fn, weight=w, group=2) def add_xml_files(self): xml_sub_groups = {':layout', ':values', ':drawable', ':menu', ':xml', ':color'} self.graph.add_nodes_from([':XML'] + list(xml_sub_groups), group=6) self.graph.add_edges_from([(':XML', g) for g in xml_sub_groups], weight=1, group=1) for path in self.project.filter_files(extension='xml'): xml_file = self.project.get_file(path) if path.startswith('app/res/'): g = path.split('/')[2] name = '/'.join(path.split('/')[2:]) self.graph.add_node(name, group=7) else: if not path.split('/')[-1] in ['pom.xml', 'AndroidManifest.xml']: # is ignored? print 'invalid path:', path continue valid_group = False if g == 'values': g = 'values-default' if g.startswith('values-'): g = g[7:] self.graph.add_edge(':values', ':' + g, weight=1, group=1) valid_group = True g = ':' + g if valid_group or g in xml_sub_groups: self.graph.add_edge(g, name, weight=1, group=1) else: print 'invalid subgroup:', g
# Define the graph and nodes graph = Graph(name = 'Grid 3x3') # Generate a set of nodes from 0 .. width - 1 and 0 .. height - 1 nodes = [ node(0, 0), node(0, 1), node(0, 2), node(1, 0), node(1, 1), node(1, 2), node(2, 0), node(2, 1), node(2, 2), ] graph.add_nodes_from(nodes) # Generate a set of edges connecting each node in the list on an inverted L pattern graph.add_edges_from([ # For node (0, 0) (nodes[0], nodes[3], { 'time': np.random.poisson(10.0) }), (nodes[0], nodes[1], { 'time': np.random.poisson(10.0) }), # For node (0, 1) (nodes[1], nodes[4], { 'time': np.random.poisson(10.0) }), (nodes[1], nodes[2], { 'time': np.random.poisson(10.0) }), # For node (0, 2) (nodes[2], nodes[5], { 'time': np.random.poisson(10.0) }), # For node (1, 0) (nodes[3], nodes[6], { 'time': np.random.poisson(10.0) }), (nodes[3], nodes[4], { 'time': np.random.poisson(10.0) }), # For node (1, 1)
if (item[1] is 2): points.append(item) matrix[y][x] = (item[0], 1) def to_name(tlp): return tlp[0] def extract_line(line): return list(map(to_name, line)) names = list(map(extract_line, matrix)) G = Graph() for line in names: G.add_nodes_from(line) for y, line in enumerate(matrix): for x, item in enumerate(line): if (x + 1 < len(line)): if ((item[1] is 1) and (line[x + 1][1] is 1)): G.add_edge(item[0], line[x + 1][0]) if (y + 1 < len(matrix)): if ((item[1] is 1) and (matrix[y + 1][x][1] is 1)): G.add_edge(item[0], matrix[y + 1][x][0]) path = shortest_path(G, source=points.pop()[0], target=points.pop()[0]) print(path)
Before attempting the exercise, use the IPython Shell to access the dictionary metadata of T and explore it, for instance by running the commands T.edge[1][10] and then T.edge[10][1]. Note how there's only one field, and now you're going to add another field, called 'weight'. ''' from pickle import load from networkx import Graph # Reading Graph v1 pickle data #with open('../datasets/ego-twitter.p', 'rb') as f: # T = load(f) # Reading Graph v2 pickle data with open('../datasets/ego-twitter.p2', 'rb') as f: nodes, edges = load(f) T = Graph() T.add_nodes_from(nodes) T.add_edges_from(edges) ''' INSTRUCTIONS * Set the 'weight' attribute of the edge between node 1 and 10 of T to be equal to 2. Refer to the following template to set an attribute of an edge: network_name.edge[node1][node2]['attribute'] = value. Here, the 'attribute' is 'weight'. * Set the weight of every edge involving node 293 to be equal to 1.1. To do this: * Using a for loop, iterate over all the edges of T, including the metadata. * If 293 is involved in the list of nodes [u, v]: * Set the weight of the edge between u and v to be 1.1. ''' # Set the weight of the edge T[1][10]['weight'] = 2 # Iterate over all the edges (with metadata)
def is_balanced(graph_obj, meta_data=False): """ Function to check if a signed graph is balanced. The algorithm used here has been adopted from the paper "On the notion of balance of a signed graph" by Frank Harary and the boook "Networks, Crowds, and Markets: Reasoning About a Highly Connected World" by David Easley and Jon Kleinberg. Args: graph_obj : The signed graph to pass meta_data : Option to get meta data regarding the nature of the balance in the graphs. Default: False Returns: A two tuple: (bool, meta-data dict). The meta-data dict is None, if meta_data is False """ from networkx import Graph from networkx.algorithms import bipartite if graph_obj.is_directed(): undirected_graph_obj = graph_obj.to_undirected() else: undirected_graph_obj = graph_obj nodes = undirected_graph_obj.nodes() node_labels = {} cur_label = 0 for node in nodes: if node not in node_labels: constrained_bfs(undirected_graph_obj, node_labels, cur_label, 1, node) cur_label += 1 num_labels = cur_label set_graph = Graph() set_graph.add_nodes_from([x for x in range(num_labels)]) # check for mutual antagonism between sets and mutual friendship inside sets edges = undirected_graph_obj.edges() balanced = True for edge in edges: f = edge[0] s = edge[1] if undirected_graph_obj[f][s]['weight'] == 1: if node_labels[f] != node_labels[s]: # this shouldn't happen balanced = False break if undirected_graph_obj[f][s]['weight'] == -1: set_graph.add_edge(node_labels[f], node_labels[s]) if node_labels[f] == node_labels[s]: balanced = False break metas = None if meta_data and balanced: # determine strength of balance (bipartite condition for sets antagonism) strong = None if bipartite.is_bipartite(set_graph): strong = True else: strong = False # sets sets = [[] for i in range(num_labels)] for node in node_labels: sets[node_labels[node]].append(node) # possible split split = None if strong: coloring = bipartite.color(set_graph) X = set() Y = set() for set_ in coloring: if coloring[set_] == 0: for node in sets[set_]: X.add(node) else: for node in sets[set_]: Y.add(node) split = {frozenset(X), frozenset(Y)} metas = {} metas['num_original_sets'] = num_labels metas['original_sets'] = sets metas['strength'] = 'strong' if strong else 'weak' metas['possible_split'] = split return (balanced, metas)
class FrameworkFeatureAnalyzer(object): """ A class to do feature location analyses on a project written in a specific framework Project Graph Details: ----------------------- Node Groups: 1: Android package 2: - 3: Android imported indentifier 4: Java class 5: Java method 6: XML file Category 7: XML file Edge Groups: 1: internal/hierarchical links 2: Java---Android mappings 3: Java---XML mappings """ def __init__(self, framework, project): """ :param inspector.models.base.Project project: the project to be analyzed """ self.project = project self.framework_namespace = str(framework) self.graph = Graph() self.graph.add_node(self.framework_namespace) self.import_usages = [] def add_source_file(self, source_file): """ :param inspector.models.base.SourceFile source_file: the file """ self.analyze_framework_imports(source_file) self.analyze_source(source_file) def analyze_framework_imports(self, source_file): """ :param inspector.models.base.SourceFile source_file: the file """ for im in source_file.imports: if im.import_str.startswith(self.framework_namespace): self.import_usages.append((im, im.find_usages())) components = im.import_str.split('.') data = {'group': 1} if re.match(r'^[A-Z]+(_[A-Z]+)*$', components[-1]): data['group'] = 3 last = None for i in range(len(components)): cn = '.'.join(components[:i + 1]) self.graph.add_node(cn, **data) if last: self.graph.add_edge(last, cn, weight=1, group=1) last = cn if last: data['group'] = 3 self.graph.add_node(last, **data) def analyze_source(self, source_file): """ :param inspector.models.base.SourceFile source_file: the file """ for cl in source_file.classes: self.graph.add_node(cl.name, group=4) for fu in cl.methods: # print '[{0}-{1}]'.format(fu.starting_line, fu.ending_line), re.sub('\s*\n\s*', ' ', unicode(fu)) fn = fu.qualified_name self.graph.add_node(fn, group=5) self.graph.add_edge(cl.name, fn, weight=1, group=1) for im, usages in self.import_usages: w = 0 for ln in usages: if fu.starting_line <= ln <= fu.ending_line: w += 1 if w: self.graph.add_edge(im.import_str, fn, weight=w, group=2) def add_xml_files(self): xml_sub_groups = { ':layout', ':values', ':drawable', ':menu', ':xml', ':color' } self.graph.add_nodes_from([':XML'] + list(xml_sub_groups), group=6) self.graph.add_edges_from([(':XML', g) for g in xml_sub_groups], weight=1, group=1) for path in self.project.filter_files(extension='xml'): xml_file = self.project.get_file(path) if path.startswith('app/res/'): g = path.split('/')[2] name = '/'.join(path.split('/')[2:]) self.graph.add_node(name, group=7) else: if not path.split('/')[-1] in [ 'pom.xml', 'AndroidManifest.xml' ]: # is ignored? print 'invalid path:', path continue valid_group = False if g == 'values': g = 'values-default' if g.startswith('values-'): g = g[7:] self.graph.add_edge(':values', ':' + g, weight=1, group=1) valid_group = True g = ':' + g if valid_group or g in xml_sub_groups: self.graph.add_edge(g, name, weight=1, group=1) else: print 'invalid subgroup:', g
def to_graph(l): G = Graph() for clique in l: G.add_nodes_from(clique) G.add_edges_from(to_edges(clique)) return G
def multigraph_to_graph(g: MultiGraph) -> Graph: gx = Graph() gt = Graph(g) gx.add_nodes_from(gt.nodes()) gx.add_edges_from(gt.edges()) return gx
""" Unit tests for the :mod:`pennylane.qaoa` submodule. """ import pytest import numpy as np import pennylane as qml from pennylane import qaoa from networkx import Graph from pennylane.wires import Wires pytestmark = pytest.mark.usefixtures("tape_mode") ##################################################### graph = Graph() graph.add_nodes_from([0, 1, 2]) graph.add_edges_from([(0, 1), (1, 2)]) non_consecutive_graph = Graph([(0, 4), (3, 4), (2, 1), (2, 0)]) def decompose_hamiltonian(hamiltonian): coeffs = hamiltonian.coeffs ops = [i.name for i in hamiltonian.ops] wires = [i.wires for i in hamiltonian.ops] return [coeffs, ops, wires] class TestMixerHamiltonians:
def create_graph(nodes, edges): graph = Graph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) return graph
def partition_reads(tint, maximum_ilp_size): reads = tint['reads'] read_reps = tint['read_reps'] I = tint['ilp_data']['I'] FL = tint['ilp_data']['FL'] tint['partitions'] = list() rids = sorted(I.keys()) unique_data = dict() edges = list() for i in rids: d = (tuple(I[i]), (FL[i][0], FL[i][1], reads[read_reps[i][0]]['poly_tail_category'])) if d in unique_data: unique_data[d].append(i) else: unique_data[d] = [i] unique_data = list(unique_data.items()) N = len(unique_data) for i in range(N): for j in range(i+1, N): d1, (f1, l1, t1) = unique_data[i][0] d2, (f2, l2, t2) = unique_data[j][0] f = max(f1, f2) l = min(l1, l2) o = l-f+1 d = sum(x != y for x, y in zip(d1[f:l+1], d2[f:l+1])) w = sum(x == y == 1 for x, y in zip(d1[f:l+1], d2[f:l+1])) if t1 != 'N' and t2 != 'N' and t1 != t2: continue if w < 1: continue if (o > 3 and d < 3) or (1 <= o <= 3 and d == 0): edges.append((i, j)) G = Graph() G.add_nodes_from(range(N)) G.add_edges_from(edges) while True: edges_to_remove = list() for i, j in G.edges: n1 = set(G.neighbors(i)) n2 = set(G.neighbors(j)) if len(n1) == 1 or len(n2) == 1 or len(n1 & n2) > 0: continue edges_to_remove.append((i, j)) G.remove_edges_from(edges_to_remove) if len(edges_to_remove) == 0: break for c in components.connected_components(G): rids = list() incomp = list() for c in split_list_evenly(list(c), maximum_ilp_size): for idx, i in enumerate(c): rids.extend(unique_data[i][1]) for j in c[idx+1:]: i,j = min(i,j),max(i,j) assert i<j if G.has_edge(i,j): continue for rid_1 in unique_data[i][1]: for rid_2 in unique_data[j][1]: incomp.append((rid_1,rid_2)) tint['partitions'].append((rids, incomp))
def add_entities(graph: Graph, entities: List[Dict[str, any]]): nodes = [(entity['entity_name'], { 'name': entity['entity_name'], 'entity_type': entity['entity_type'] }) for entity in entities] graph.add_nodes_from(nodes)
class PairWiseFiniteModel(GraphModel): """Pairwise finite graphical model. Represents a graphical model in which all variables have the same discrete domain, all factor depends on at most two variables. Model is represented by field F and interactions J. Probability of configuration ``X`` is proportional to ``exp(sum F[i][X_i] + 0.5*sum J[i][j][X[i]][X[j]])``. Field is stored explicitly as a matrix of shape ``(gr_size, al_size)``. Interactions are stored only for those pairs of variables for which they are non-zero. So, interactions are represented by undirected graph, where for each edge (i,j) we store matrix `J[i,j]`, which has shape ``(al_size, al_size)``. Names "Field" is called like that because in physical models (such as Ising model) these values correspond to local magnetic fields. They are also known as biases. "Interactions" are called like that because in physical models they correspond to strength of spin-spin interactions. The fact that all these terms enter the probability density function inside the exponent also refers to physical models, because fields and interactions are terms in energy and according to Bolzmann distribution probability of the state with energy E is proportional to ``exp(-E/(kT))``. """ def __init__(self, size, al_size): """Initializes PairWiseFiniteModel. :param num_variables: Number of variables. :param al_size: Size of the alphabet (domain). Domain will consist of integers in range 0, 1, ... al_size - 1. """ super().__init__(size, DiscreteDomain.range(al_size)) self.gr_size = size self.al_size = al_size self.field = np.zeros((self.gr_size, self.al_size), dtype=np.float64) self.edges = [] self._edges_interactions = [] # Maps (u,v) and (v,u) to index of one of them in self.edges. self._edge_ids = dict() # Cached properties that are invalidated when graph changes. self._graph = None self._edges_array = None self._dfs_result = None def set_field(self, field: np.ndarray): """Sets values of field (biases) in all vertices.""" assert field.shape == (self.gr_size, self.al_size) self.field = np.array(field, dtype=np.float64) def add_interaction(self, u, v, interaction): """Adds factor corresponding to interaction between nodes u and v. Factor is f(x) = exp(interaction[x[u], x[v]]). If there already is interaction between these edges, this interaction will be added to it (old interaction isn't discarded). """ if (u, v) in self._edge_ids: edge_id = self._edge_ids[(u, v)] if self.edges[edge_id] == (v, u): interaction = interaction.T self._edges_interactions[edge_id] += interaction else: self._on_graph_changed() self.edges.append((u, v)) self._edges_interactions.append( np.array(interaction, dtype=np.float64)) self._edge_ids[(u, v)] = len(self.edges) - 1 self._edge_ids[(v, u)] = len(self.edges) - 1 def get_interaction_matrix(self, u, v): """Returns interaction matrix between nodes u and v. Returns np.array of shape (al_size, al_size). If there is no interaction between these nodes, raises KeyError. """ edge_id = self._edge_ids[(u, v)] if self.edges[edge_id] == (u, v): return self._edges_interactions[edge_id] else: return self._edges_interactions[edge_id].T def get_interactions_for_edges(self, edges) -> np.ndarray: """Returns interaction for given edges. If some edges don't exist, interaction matrix for them will be a zero matrix. :param edges: Edge list. np.array of shape ``(x, 2)``. :return: np.array of shape (x, al_size, al_size). """ edges_num = edges.shape[0] assert edges.shape == (edges_num, 2) result = np.zeros((edges_num, self.al_size, self.al_size), dtype=np.float64) for i in range(edges_num): u, v = edges[i] if self.has_edge(u, v): result[i, :, :] = self.get_interaction_matrix(u, v) return result def has_edge(self, u, v) -> bool: """Whether there is edge between vertices u and v.""" return (u, v) in self._edge_ids def get_graph(self): """Returns interaction graph.""" if self._graph is None: self._graph = Graph() self._graph.add_nodes_from(range(self.gr_size)) for u, v in self.edges: self._graph.add_edge(u, v) return self._graph def get_dfs_result(self) -> FastDfsResult: """Performs DFS for interaction graph.""" if self._dfs_result is None: self._dfs_result = fast_dfs(self.gr_size, self.get_edges_array()) return self._dfs_result def is_graph_acyclic(self): """Whether interaction graph is acyclic.""" return not self.get_dfs_result().had_cycles def get_edges_array(self) -> np.ndarray: """Returns edge list as np.array.""" if self._edges_array is None: if len(self.edges) == 0: self._edges_array = np.empty((0, 2), dtype=np.int32) else: self._edges_array = np.array(self.edges, dtype=np.int32) return self._edges_array def get_edges_connected(self) -> np.ndarray: """Returns edges, ensuring that graph is connected. If graph is already connected, equivalent to ``get_edges_array``. If graph is not connected, adds minimal amount of edges to make it connected. This is needed for algorithms which require connected graph to work correctly. """ if not self.get_dfs_result().was_disconnected: return self.get_edges_array() additional_edges = [(u, v) for u, v in self.get_dfs_result().dfs_edges if not self.has_edge(u, v)] return np.concatenate([self.get_edges_array(), additional_edges]) def _on_graph_changed(self): """Invalidates cached graphs.""" self._graph = None self._edges_array = None self._dfs_result = None def get_all_interactions(self) -> np.ndarray: """Returns all interaction matrices in compact form. :return: np.array of shape ``(edge_num, al_size, al_size)`` with interaction matrix for every edge. Matrices correspond to edges in the same order as returned by get_edges.array. """ if len(self.edges) == 0: shape = (0, self.al_size, self.al_size) return np.empty(shape, dtype=np.float64) return np.array(self._edges_interactions, dtype=np.float64) def add_factor(self, factor: Factor): """Adds a factor.""" if isinstance(factor, DiscreteFactor): self._add_discrete_factor(factor) elif factor.is_discrete(): self._add_discrete_factor(DiscreteFactor.from_factor(factor)) else: raise ValueError("Can't add non-discrete factor.") def _add_discrete_factor(self, factor: DiscreteFactor): assert factor.model == self with np.errstate(divide='ignore'): log_factor = np.log(factor.values) if len(factor.var_idx) > 2: raise ValueError("Can't add factor with more than 2 variables.") if len(factor.var_idx) == 1: assert factor.values.shape == (self.al_size, ) self.field[factor.var_idx[0], :] += log_factor elif len(factor.var_idx) == 2: v1, v2 = factor.var_idx self.add_interaction(v1, v2, log_factor) def get_factors(self) -> Iterable[Factor]: """Generates explicit list of factors.""" for i in range(self.gr_size): if np.linalg.norm(self.field[i, :]) > 1e-9: yield DiscreteFactor(self, [i], np.exp(self.field[i, :])) for u, v in self.edges: factor = DiscreteFactor(self, [u, v], np.exp(self.get_interaction_matrix(u, v))) if self.num_variables < 10: factor.name = 'J%d%d' % (u, v) else: factor.name = 'J_%d_%d' % (u, v) yield factor def infer(self, algorithm='auto', **kwargs) -> InferenceResult: """Performs inference. Available algorithms * ``auto`` - Automatic. * ``bruteforce`` - Brute force (by definition). Exact * ``mean_field`` - Naive Mean Field. Approximate. * ``message_passing`` - Message passing. Approximate, exact only for trees. * ``path_dp`` - Dynamic programming on path decomposition. Exact. Effective on graphs of small pathwidth. * ``tree_dp`` - Dynamic programming on tree. Exact. Works only on trees. * ``junction_tree`` - DP on junction tree. Exact. Effective on graphs of small treewidth. :param algorithm: Which algorithm to use. String. :return: `InferenceResult` object, which contains logarithm of partition function and matrix of marginal probabilities. """ if algorithm == 'auto': if self.is_graph_acyclic(): return infer_tree_dp(self) try: return infer_junction_tree(self) except TooMuchStatesError: return belief_propagation(self) elif algorithm == 'bruteforce': return infer_bruteforce(self) elif algorithm == 'mean_field': return infer_mean_field(self, **kwargs) elif algorithm == 'message_passing': return infer_message_passing(self, **kwargs) elif algorithm == 'path_dp': return infer_path_dp(self) elif algorithm == 'tree_dp': return infer_tree_dp(self) elif algorithm == 'junction_tree': return infer_junction_tree(self, **kwargs) else: raise ValueError('Unknown algorithm %s' % algorithm) def max_likelihood(self, algorithm='auto', **kwargs) -> np.ndarray: """Finds the most probable state. Available algorithms * ``auto`` - Automatic. * ``bruteforce`` - Brute force (by definition). * ``path_dp`` - Dynamic programming on path decomposition. Exact. Effective on graphs of small pathwidth. * ``tree_dp`` - Dynamic programming on tree. Exact. Works only on trees. * ``junction_tree`` - DP on junction tree. Exact. Effective on graphs of small treewidth. :param algorithm: Which algorithm to use. String. :return: The most probable state as numpy int array. """ if algorithm == 'auto': if self.is_graph_acyclic(): return max_likelihood_tree_dp(self) else: try: return max_lh_bruteforce(self) except TooMuchStatesError: return max_likelihood_junction_tree(self) elif algorithm == 'bruteforce': return max_lh_bruteforce(self) elif algorithm == 'tree_dp': return max_likelihood_tree_dp(self) elif algorithm == 'path_dp': return max_lh_path_dp(self) elif algorithm == 'junction_tree': return max_likelihood_junction_tree(self) else: raise ValueError('Unknown algorithm %s' % algorithm) def sample(self, num_samples: int = 1, algorithm='auto', **kwargs) -> np.ndarray: """Draws i.i.d. samples from the distribution. Available algorithms * ``auto`` - Automatic. * ``bruteforce`` - Sampling from explicitly calculated probabilities for each state. * ``tree_dp`` - Dynamic programming on tree. Works only on trees. * ``junction_tree`` - DP on junction tree. :param num_samples: How many samples to generate. :param algorithm: Which algorithm to use. :return: ``np.array`` of type ``np.int32`` and shape ``(num_samples, gr_size)``. Every row is an independent sample. """ if algorithm == 'auto': if self.is_graph_acyclic(): return sample_tree_dp(self, num_samples=num_samples) else: try: return sample_bruteforce(self, num_samples=num_samples) except TooMuchStatesError: return sample_junction_tree(self, num_samples=num_samples) elif algorithm == 'bruteforce': return sample_bruteforce(self, num_samples=num_samples) elif algorithm == 'tree_dp': return sample_tree_dp(self, num_samples=num_samples) elif algorithm == 'junction_tree': return sample_junction_tree(self, num_samples=num_samples) else: raise ValueError('Unknown algorithm %s' % algorithm) def encode_state(self, state): """Returns state represented by its integer id.""" return encode_state(state, self.gr_size, self.al_size) def decode_state(self, state): """Returns id of given state. State id is integer between `0` and `al_size**gr_size-1`. """ return decode_state(state, self.gr_size, self.al_size) @staticmethod def create(field: np.ndarray, edges: Union[np.ndarray, List], interactions: np.ndarray): """Creates PairwiseFiniteModel from compact representation. Infers number of variables and size of alphabet from shape of ``field``. :param field: Values of the field. ``np.array`` of shape ``(gr_size, al_size)``. :param edges: List of edges with interactions. ``np.array`` of integer dtype and shape ``(edge_num, 2)``. Edges can't repeat. If there is edge (u,v), you can't have edge (v,u). :param interactions: ``np.array`` of shape ``(edge_num, al_size, al_size)``, or Iterable which can be converted to such an array. ``interactons[i,:,:]`` is a matrix decribing interactions between variables ``edges[i, 0]`` and ``edges[i, `]``. """ size, al_size = field.shape model = PairWiseFiniteModel(size, al_size) model.set_field(field) idx = 0 assert len(edges) == len(interactions) for v1, v2 in edges: model.add_interaction(v1, v2, interactions[idx]) idx += 1 return model def draw_pairwise_graph(self, ax): """Draws pairwise graph.""" graph = self.get_graph() pos = nx.kamada_kawai_layout(graph) node_labels = {i: self[i].name for i in range(self.num_variables)} nx.draw_networkx(graph, pos, ax, labels=node_labels, edge_color='green', node_color='#ffaaaa') edge_labels = {(u, v): "J_%d_%d" % (u, v) for u, v in self.edges} nx.draw_networkx_edge_labels(graph, pos, edge_labels=edge_labels) def get_subgraph_factor_values( self, vars_idx: np.ndarray, vars_skip: Set = frozenset()) -> np.ndarray: """Calculates factor values for subgraph. Consider model on subgraph containing only variables with indices ``vars``. That is, containing only factors which depend only on variables from ``vars``. For every possible combination of those variable values, calculate product of all factors in the new model - that's what this function returns. This can also be described as "interactions within subgraph". Or if we condense all variables in ``vars`` in single "supervariable", this function returns field for the new supervariable. :param vars_idx: Indices of variables in subgraph. :param vars_skip_factors: Set. Indices of variables, which should be skipped for factor calculation. Field factors for these variables won't be included in the result. Interaction factors oth arguments of which are in ``vars_skip_factors``, won't be included in the result. However, interaction factors where only one variable appears in ``vars_skip_factors``, will be included in result. This parameter is useful when building junction tree, to avoid double-counting factors. :return: ``np.array`` of length ``al_size ** len(vars)``. Each value is logarithm of product of all relevant factors for certain variable values. Correspondence between indices in this array and states is consistent with ``decode_state``. """ vars_num = len(vars_idx) edges = [] for i in range(vars_num): v1 = vars_idx[i] for j in range(i + 1, vars_num): v2 = vars_idx[j] should_skip = v1 in vars_skip and v2 in vars_skip if not should_skip and self.has_edge(v1, v2): edges.append((i, j, self.get_interaction_matrix(v1, v2))) all_states = decode_all_states(vars_num, self.al_size) a = np.zeros(self.al_size**vars_num) for u in range(vars_num): if vars_idx[u] in vars_skip: continue a += self.field[vars_idx[u]][all_states[:, u]] for u, v, j in edges: a += j[all_states[:, u], all_states[:, v]] return a @staticmethod def from_model(original_model: GraphModel) -> PairWiseFiniteModel: """Constructs Pairwise Finite model which is equivalent to given model. All variables must be discrete. All factors must depend on at most 2 variables. New model will have the same number of variables and factors. If variables in original model have different domain sizes, in new model they will be extended to have the same domain size. """ al_size = max(v.domain.size() for v in original_model.get_variables()) old_factors = list(original_model.get_factors()) def pad_tensor(t): padding = [[0, al_size - dim] for dim in t.shape] return np.pad(t, padding) # Validate model. if al_size > 1000: raise ValueError("Not all variables are discrete.") if max(len(f.var_idx) for f in old_factors) > 2: raise ValueError("Model is not pairwise.") new_model = PairWiseFiniteModel(original_model.num_variables, al_size) for old_factor in old_factors: values = DiscreteFactor.from_factor(old_factor).values values = pad_tensor(values) new_factor = DiscreteFactor(new_model, old_factor.var_idx, values) new_model.add_factor(new_factor) return new_model
import pickle import networkx as nx from networkx import Graph # Reading Graph v1 pickle data #with open('../datasets/github.p', 'rb') as f: # G = pickle.load(f) # Reading Graph v2 pickle data with open('../datasets/github.p2', 'rb') as f: nodes, edges = pickle.load(f) G = Graph() G.add_nodes_from(nodes) G.add_edges_from(edges) ''' INSTRUCTIONS * Write a function called recommend_repositories() that accepts 3 arguments - G, from_user, and to_user - and returns the repositories that the from_user is connected to that the to_user is not connected to. * Get the set of repositories the from_user has contributed to and store it as from_repos. To do this, first obtain the neighbors of from_user and use the set() function on this. * Get the set of repositories the to_user has contributed to and store it as to_repos. * Using the .difference() method, return the repositories that the from_user is connected to that the to_user is not connected to. * Print the repositories to be recommended from 'u7909' to 'u2148'. ''' def recommend_repositories(G, from_user, to_user): # Get the set of repositories that from_user has contributed to from_repos = set(G.neighbors(from_user))
stop = transfer_map[stop] route_new.append(stop) unique_routes_new[pair_new].append(route_new) unique_routes = unique_routes_new print ' tracing rolle connectedness...' # # 2.3.2 collect rolle connectedness of each stop system = Graph() for pair in unique_routes.keys(): # filter out staten island routes if pair[0] not in si_ids and pair[1] not in si_ids: for route in unique_routes[pair]: route = np.array(route) edges = np.vstack([route[:-1], route[1:]]).T system.add_nodes_from(route) system.add_edges_from(edges) # find location of each node as subway stop locs = {node:None for node in system.nodes()} stop_id = stops['stop_id'].tolist() for node in system.nodes(): stop = stops[stops['stop_id']==node].iloc[0] locs[node] = stop[['x','y']].values # alex rolle's connectedness def f(layers, n): if n in layers.keys(): return len(layers[n]) else: return -1
class ClusterNetwork(object): def __init__(self, reps): self.g = Graph() self.N = len(reps.keys()) nodes = [] self.lookup = {} self.attributes = None for i, r in enumerate(sorted(reps.keys())): self.lookup[r] = i if self.attributes is None: self.attributes = list(reps[r].attributes.keys()) nodes.append((i, {'rep': reps[r]})) self.g.add_nodes_from(nodes) self.clusters = None def __iter__(self): for i, d in self.g.nodes_iter(data=True): yield d def __len__(self): return self.N def __getitem__(self, key): if isinstance(key, str): return self.g.node[self.lookup[key]] elif isinstance(key, tuple): return self.simMat[key] return self.g.node[key] def cluster(self, scores, cluster_method, oneCluster): #Clear any edges self.g.remove_edges_from(list(self.g.edges_iter(data=False))) if cluster_method is None: return if scores is not None: self.simMat = zeros((self.N, self.N)) for k, v in scores.items(): indOne = self.lookup[k[0]] indTwo = self.lookup[k[1]] self.simMat[indOne, indTwo] = v self.simMat[indTwo, indOne] = v self.simMat = -1 * self.simMat if cluster_method == 'affinity': true_labels = array( [self[i]['rep']._true_label for i in range(self.N)]) self.clusters = affinity_cluster(self.simMat, true_labels, oneCluster) edges = [] for k, v in self.clusters.items(): for v2 in v: if v2[0] == k: continue edges.append((k, v2[0], v2[1])) elif cluster_method == 'complete': edges = [] for i in range(self.N): for j in range(i + 1, self.N): edges.append((i, j, self.simMat[i, j])) self.g.add_weighted_edges_from(edges) seed = RandomState(seed=3) mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=seed, dissimilarity="precomputed", n_jobs=4) pos = mds.fit(-1 * self.simMat).embedding_ clf = PCA(n_components=2) pos = clf.fit_transform(pos) for i, p in enumerate(pos): self.g.node[i]['pos'] = p def calc_reduction(self): if self.clusters is None: return means = {} reverse_mapping = {} for k, v in self.clusters.items(): s = 0 for ind in v: reverse_mapping[ind[0]] = k s += ind[1] means[k] = s / len(v) for i in self.g.nodes_iter(): clust_center = reverse_mapping[i] if i == clust_center: self.g.node[i]['HyperHypoMeasure'] = 0 continue dist = self.g[i][clust_center]['weight'] norm_dist = abs(dist - means[clust_center]) len_diff = self[clust_center]['representation'].shape[0] - self[i][ 'representation'].shape[0] if len_diff < 0: norm_dist *= -1 self.g.node[i]['HyperHypoMeasure'] = norm_dist if 'HyperHypoMeasure' not in self.attributes: self.attributes.append('HyperHypoMeasure') def get_edges(self): return array(self.g.edges(data=False)) def labels(self): labels = list(range(len(self.g))) for k, v in self.clusters.items(): for v2 in v: labels[v2[0]] = k true_labels = list() for i in range(len(labels)): true_labels.append(self[i]['rep']._true_label) levels = {x: i for i, x in enumerate(set(true_labels))} for i in range(len(true_labels)): true_labels[i] = levels[true_labels[i]] return array(labels), array(true_labels) def silhouette_coefficient(self): labels, true_labels = self.labels() return metrics.silhouette_score(self.simMat, labels, metric='precomputed') def homogeneity(self): labels, true_labels = self.labels() return metrics.homogeneity_score(true_labels, labels) def completeness(self): labels, true_labels = self.labels() return metrics.completeness_score(true_labels, labels) def v_score(self): labels, true_labels = self.labels() return metrics.v_measure_score(true_labels, labels) def adjusted_mutual_information(self): labels, true_labels = self.labels() return metrics.adjusted_mutual_info_score(true_labels, labels) def adjusted_rand_score(self): labels, true_labels = self.labels() return metrics.adjusted_rand_score(true_labels, labels)
def to_nx(self) -> Graph: nx_graph = Graph() nx_graph.add_nodes_from(self._nodes) nx_graph.add_edges_from(self._edges) return nx_graph
def graph_from_vertex_and_edge_lists(vertices, edges): graph = Graph() graph.add_nodes_from(vertices) graph.add_edges_from(edges) return graph
def get_all_aut_classes(F, length, dirname="aut_classes_cache/", verbose=True): """ Get all automorphism classes of words in F_r with bounded length. Caches the result to dirname. """ assert is_FreeGroup(F), "F must be a free group" r = F.rank() cache_dir = os.fsencode(dirname) cache_file = os.fsencode(f"r{r}-len{length}.pkl") if not os.path.exists(cache_dir): os.mkdir(cache_dir) if os.path.exists(cache_dir + cache_file): aut_classes = pickle.load(open(cache_dir + cache_file, 'rb')) return [set([F(w.Tietze()) for w in cls]) for cls in aut_classes] # Maybe we computed something bigger before for file in os.listdir(cache_dir): filename = os.fsdecode(file) r_str, len_str = filename.split(".")[0].split("-") r_cached = int(r_str[1:]) len_cached = int(len_str[3:]) if r_cached >= r and len_cached >= length: cached_aut_classes = pickle.load(open(cache_dir + file, 'rb')) aut_classes = [] for cls in cached_aut_classes: word = cls.pop() word_rep = word.Tietze() if len(word_rep) == 0: aut_classes.append(set([F(1)])) elif len(word_rep) < length and max( set([abs(x) for x in word_rep])) < r: cls.add(word) aut_classes.append(set([F(w.Tietze()) for w in cls])) pickle.dump(aut_classes, open(f"aut_classes_cache/r{r}-len{length}.pkl", 'wb')) return aut_classes letters = list(range(1, r + 1)) + list(range(-r, 0)) minimal_words = set() all_words = set() # To avoid stuff like (1,-1,2,3) and (2,-2,2,3) # We only consider words that are in "canonical order" # We also assume we only check tuple starting with a # (They can still be a*a^-1*b*...) tuples = product(letters, repeat=length - 1) if verbose: print(f"Minimizing all words in {F} of length <={length}") print(f"{2*(len(letters)) ** (length - 1)} words to minimize.") tuples = tqdm(tuples) for tup in tuples: tup = [1] + list(tup) word = F(tup) if word not in all_words and word == canonical_letter_permute_form( F, word): all_words.add(word) minimal_words.add( canonical_letter_permute_form(F, minimize(F, word))) if length > 0: # Due to cancellations (e.g. (1,-1, ...)), we only # need to check words of length N, N - 1. tuples = product(letters, repeat=length - 1) if verbose: tuples = tqdm(tuples) for tup in tuples: word = F(tup) if word == canonical_letter_permute_form( F, word): # We only consider canonized orders minimal_words.add( canonical_letter_permute_form(F, minimize(F, word))) if verbose: print( f"Finished minimizing letters, found {len(minimal_words)} minimal words." ) print("Creating the Whitehead moves graph on minimal words.") G = Graph() G.add_nodes_from(minimal_words) for word in minimal_words: nbrs = get_minword_wh_nbrs(F, word) assert (len(nbrs[0].Tietze()) == len( word.Tietze())), "Something's wrong" for nbr in nbrs: nbr = canonical_letter_permute_form(F, nbr) assert nbr in minimal_words, f"Found a word ({nbr}) not in minimal_words" G.add_edge(word, nbr) aut_classes = list(connected_components(G)) pickle.dump(aut_classes, open(f"aut_classes_cache/r{r}-len{length}.pkl", 'wb')) return aut_classes
def osm_post(lim, file_name_out, around=1000, eps=0.01, safe_dist=100, penalize=20): from limic.util import start, end, status, file_size, load_pickled, distance, save_pickled from scipy.spatial import cKDTree as KDTree from networkx import Graph, astar_path_length from pyproj import CRS, Transformer from itertools import chain from limic.overpass import intersect, pylon lines, substations, towers, id2tower, id2node, id2lines, id2types = lim start("Building KD-tree from white nodes") from limic.util import kdtree towers_tree = kdtree(towers, get_latlon=lambda x: x.latlon) end('') status(len(towers)) start("Deleting black nodes") to_delete = set() from limic.util import nodes_in_geometry for substation in substations: to_delete.update( nodes_in_geometry(towers_tree, list(map(lambda x: id2node[x], substation)))) towers = [tower for tower in towers if tower not in to_delete] end('') status(len(towers)) start("Building initial graph") g = Graph() g.add_nodes_from(towers) for line in lines: line_nodes = list(map(lambda x: id2tower[x], line)) for from_node, to_node in zip(line_nodes, line_nodes[1:]): if from_node in to_delete or to_node in to_delete: continue w = distance(from_node.latlon, to_node.latlon) g.add_edge(from_node, to_node, weight=w, type=id2types[from_node.id]) end('') status(len(g.nodes()), end='/') status(len(g.edges())) start("Finding neighbours within " + str(around) + "m") towers_tree = kdtree(towers, get_latlon=lambda x: x.latlon) end('') neighbour_indices, neighbours = towers_tree.get_neighbours(around=1000) end() start("Computing non-logical intersections") tower2index = {} for i, t in zip(range(len(towers)), towers): tower2index[t] = i for k, v in id2lines.items(): id2lines[k] = tuple(map(tuple, v)) end('') segments = set() for u, v in g.edges(): this = (u, v) if u < v else (v, u) ui, vi = tower2index[u], tower2index[v] lines = set() lines.update(id2lines[u.id]) lines.update(id2lines[v.id]) for neighbour in chain(neighbours[ui], neighbours[vi]): if neighbour == u or neighbour == v: continue if not lines.intersection(id2lines[neighbour.id]): for nn in g.neighbors(neighbour): other = (neighbour, nn) if neighbour < nn else (nn, neighbour) segments.add(tuple(sorted((this, other)))) end('') status(len(segments), end=' ') neighbours2intersection = {} minusid = 0 latlon2id = {} segments2intersections = {} for (t1, t2), (t3, t4) in segments: res = intersect(t1.latlon, t2.latlon, t3.latlon, t4.latlon, eps=eps, no_tu=False) if res: intersection, (t, u) = res if not intersection in latlon2id: minusid -= 1 latlon2id[intersection] = minusid segments2intersections.setdefault((t1, t2), []).append( (t, latlon2id[intersection], intersection)) segments2intersections.setdefault((t3, t4), []).append( (u, latlon2id[intersection], intersection)) end('') status(-minusid, end=' ') for (u, v), intersections in segments2intersections.items(): intersections.sort() g.remove_edge(u, v) type = id2types[u.id] assert (type == id2types[v.id]) seq = [u] for _, id, latlon in intersections: seq.append(pylon(id, latlon)) seq.append(v) for from_node, to_node in zip(seq, seq[1:]): w = distance(from_node.latlon, to_node.latlon) g.add_edge(from_node, to_node, weight=w, type=type) end() start("Adding routing through air") airs = set() for ns in neighbours: n = ns[0] for m in ns[1:]: if not g.has_edge(n, m): airs.add((n, m)) end('') for n, m in airs: w = penalize * distance(n.latlon, m.latlon) g.add_edge(n, m, weight=w, type=-1) end('') status(len(g.nodes()), end='/') status(len(g.edges())) from networkx import relabel_nodes start("Prune redundant edges (incomplete)") prune_incomplete(g) end('') status(len(g.edges())) start("Prune redundant edges (complete)") prune_complete(g) end('') status(len(g.edges())) start("Cleaning up graph") relabel = dict( map( lambda tower: (tower, (tower.id, tower.latlon[0], tower.latlon[1])), g.nodes())) relabel_nodes(g, relabel, copy=False) end() start("Saving graph to", file_name_out) save_pickled(file_name_out, g) end('') file_size(file_name_out)
class TextRank(object): stopwords = [ "이하", "만약", "대한", "아", "휴", "아이구", "아이쿠", "아이고", "어", "나", "우리", "저희", "따라", "의해", "을", "를", "에", "의", "가", ] eng_stopwords = [ "lot", "day", "way", ] def __init__(self, text): self.text = text.strip() self.build() def build(self): self._build_sentences() # self.has_nouns = self._extract_nouns() # 문장 처리 self._build_graph() self.pageranks = pagerank(self.graph, weight='weight') self.reordered = sorted(self.pageranks, key=self.pageranks.get, reverse=True) # 단어 처리 self.word_rank_collections = Counter(self.nouns) #if self.has_noun: # self._build_word_graph() # word_rank_idx = self.get_word_ranks(self.words_graph) # self.sorted_word_rank_idx = sorted(word_rank_idx, key=lambda k: word_rank_idx[k], reverse=True) def _build_sentences(self): okt = Okt() dup = {} candidates = [] #candidates = split(r'(?:(?<=[^0-9])\.|\n)', self.text) #전체 text를 문장단위로 split한다 # 전체 문장 text를 \n으로 split # 나눈 한 line에서 .으로 split 파일명 안잘리게 정규식적용 # line에서 앞 뒤 공백 제거후 append for enter_line in re.split('\n|! |\? ', self.text): for line in split(r'[\.](?=[^0-9])(?=[^a-z])', enter_line): candidates.append(line.strip(' ').strip('.').strip('\t')) self.sentences = [] self.nouns = [] self.has_noun = False index = 0 eng_list = [] eng_nouns = [] for candidate in candidates: if len(candidate) >= 1 and candidate not in dup: dup[candidate] = True # 문장 추가 self.sentences.append(Sentence(candidate + '.', index)) index += 1 # 문장의 명사들 추가 for pos in okt.pos(str(candidate)): if pos[0] not in self.stopwords and len( pos[0]) > 1 and pos[1] == "Noun": self.nouns.append(pos[0]) elif pos[1] == "Alpha": eng_list.append(pos[0]) # 영어 문자열 for pos in nltk.pos_tag(eng_list): if pos[1] == "NN" and pos[0].lower( ) not in self.eng_stopwords and len(pos[0]) > 1: eng_nouns.append(pos[0].lower()) if len(self.nouns) > 0: self.has_noun = True else: if (len(eng_nouns) > 0): self.nouns.extend(eng_nouns) self.has_noun = True del dup del candidates def _build_graph(self): #문장 그래프 처리 self.graph = Graph() self.graph.add_nodes_from(self.sentences) #문장간의 모든 경우에서 유사도 탐색 for sent1, sent2 in combinations(self.sentences, 2): # print(sent1,sent2) weight = self._jaccard(sent1, sent2) if weight: self.graph.add_edge(sent1, sent2, weight=weight) def _jaccard(self, sent1, sent2): p = sum((sent1.bow & sent2.bow).values()) q = sum((sent1.bow | sent2.bow).values()) return p / q if q else 0 def summarize(self, count=3, verbose=True): results = sorted(self.reordered[:count], key=lambda sentence: sentence.index) results = [result.text for result in results] if len(results) < count: for i in range(len(results), count): results.append("None") i += 1 if verbose: return '\n'.join(results) else: return results def keywords(self, word_num=3): keywords = [] keywords = self.word_rank_collections.most_common(word_num) results = [] for keyword in keywords: results.append(keyword[0]) #리턴값 3개 보장 if len(results) < word_num: for i in range(len(results), word_num): results.append("None") i += 1 return results
def rewire_benchmark(g: nx.Graph, com2node: dict, node2com: dict): # rewiring links without destroying structure of the network random.seed(int(time.time())) #com_cnt = {x:len(com2node[x])/4 for x in com2node.keys()} com_cnt = {x: 10 for x in com2node.keys()} # rewire inner links rand_nodes = g.nodes() random.shuffle(rand_nodes) for x1 in rand_nodes: # 节点所属的社团 in_coms = node2com[x1] # 随机选择其中一个社团 com = random.choice(in_coms) if com_cnt[com] < 1: continue else: com_cnt[com] -= 1 # 选择与 x1 相邻的社团内节点 x2,构成边 x1--x2 _t = list(set(g.neighbors(x1)) & set(com2node[com])) if len(_t) < 1: continue x2 = random.choice(_t) # 选择与 x1 在一个社团的边 y1--y2 y1 = random.choice(com2node[com]) _t = list(set(g.neighbors(y1)) & set(com2node[com])) if len(_t) < 1: continue y2 = random.choice(_t) # 交换边对节点 x1--x2, y1--y2 _t = list({x1, x2, y1, y2}) if len(_t) == 4 and x1 not in g.neighbors( y2) and x2 not in g.neighbors(y1): g.remove_edges_from([(x1, x2), (y1, y2)]) g.add_edges_from([(x1, y2), (x2, y1)]) else: pass # rewire outer links rand_nodes = g.nodes() random.shuffle(rand_nodes) for x1 in rand_nodes: # 节点所属的社团 in_coms = node2com[x1] # 随机选择其中一个社团 com = random.choice(in_coms) # 选择与 x1 相邻的社团外节点 x2,构成边 x1--x2 _t = list(set(g.neighbors(x1)) - set(com2node[com])) if len(_t) < 1: continue x2 = random.choice(_t) # 选择与 x1 在一个社团的节点 y1 的外边 y1--y2 y1 = random.choice(com2node[com]) _t = list(set(g.neighbors(y1)) - set(com2node[com])) if len(_t) < 1: continue y2 = random.choice(_t) _c = 0 # random.randint(0,1) if x1 is not y1: g.remove_edges_from([(x1, x2), (y1, y2)]) if _c == 0: # 交换边对节点 x1--x2, y1--y2 g.add_edges_from([(x1, y2), (x2, y1)]) else: g.add_nodes_from([x1, x2, y1, y2])