def alignment_graph(lengths=[], pairings=[], alignments=[]): #print('making graph') g = Graph(directed=False) seq_index = g.new_vertex_property("int") time = g.new_vertex_property("int") #add vertices g.add_vertex(sum(lengths)) seq_index.a = np.concatenate([np.repeat(i,l) for i,l in enumerate(lengths)]) time.a = np.concatenate([np.arange(l) for l in lengths]) #add edges (alignments) alignment_index = g.new_edge_property("int") segment_index = g.new_edge_property("int") for i,a in enumerate(alignments): if len(a) > 0: j, k = pairings[i] pairs = np.concatenate(a, axis=0) indicesJ = (np.arange(lengths[j]) + sum(lengths[:j]))[pairs.T[0]] indicesK = (np.arange(lengths[k]) + sum(lengths[:k]))[pairs.T[1]] seg_indices = np.concatenate([np.repeat(i, len(a)) for i,a in enumerate(a)]) g.add_edge_list(np.vstack([indicesJ, indicesK, np.repeat(i, len(pairs)), seg_indices]).T, eprops=[alignment_index, segment_index]) #g.add_edge_list([(b, a) for (a, b) in g.edges()]) #print('created alignment graph', g) #g = prune_isolated_vertices(g) #print('pruned alignment graph', g) #g = transitive_closure(g) #graph_draw(g, output_size=(1000, 1000), output="results/casey_jones_bars.pdf") return g, seq_index, time, alignment_index, segment_index
def build_word_graph(model_fname, limiar=0.2): """ Constroi um grafo de walavras ponderado pela similaridade entre elas de acordo com o modelo. :param model_fname: Nome do arquivo com o modelo word2vec como foi salvo :return: objeto grafo """ m = Word2Vec.load(model_fname) g = Graph() freq = g.new_vertex_property("int") weight = g.new_edge_property("float") i = 0 vdict = {} for w1, w2 in combinations(m.vocab.keys(), 2): if w1 == '' or w2 == '': continue # print(w1,w2) v1 = g.add_vertex() if w1 not in vdict else vdict[w1] vdict[w1] = v1 freq[v1] = m.vocab[w1].count v2 = g.add_vertex() if w2 not in vdict else vdict[w2] vdict[w2] = v2 freq[v2] = m.vocab[w2].count sim = m.similarity(w1, w2) if sim > 0.1: e = g.add_edge(v1, v2) weight[e] = sim if i > 10000: break i += 1 g.vertex_properties['freq'] = freq g.edge_properties['sim'] = weight return g
def test_mospp_small(): G = Graph() G.add_vertex(1) G.add_vertex(2) G.add_vertex(3) G.add_vertex(4) c1 = G.new_edge_property("int") c2 = G.new_edge_property("int") e1 = G.add_edge(1, 3) e2 = G.add_edge(3, 4) e3 = G.add_edge(1, 2) e4 = G.add_edge(2, 4) e5 = G.add_edge(1, 4) c1[e1] = 1 c1[e2] = 1 c1[e3] = 0 c1[e4] = 0 c1[e5] = 2 c2[e1] = 1 c2[e2] = 1 c2[e3] = 1 c2[e4] = 1 c2[e5] = 0 assert [[G.vertex_index[r] for r in route] for route in mospp(G.vertex(1), G.vertex(4), c1, c2) ] == [[1, 4], [1, 2, 4]]
def build_word_graph(model_fname, limiar=0.2): """ Constroi um grafo de walavras ponderado pela similaridade entre elas de acordo com o modelo. :param model_fname: Nome do arquivo com o modelo word2vec como foi salvo :return: objeto grafo """ m = Word2Vec.load(model_fname) g = Graph() freq = g.new_vertex_property("int") weight = g.new_edge_property("float") i = 0 vdict = {} for w1, w2 in combinations(m.vocab.keys(), 2): if w1 == '' or w2 == '': continue # print(w1,w2) v1 = g.add_vertex() if w1 not in vdict else vdict[w1] vdict[w1] = v1 freq[v1] = m.vocab[w1].count v2 = g.add_vertex() if w2 not in vdict else vdict[w2] vdict[w2] = v2 freq[v2] = m.vocab[w2].count sim = m.similarity(w1, w2) if sim > 0.1: e = g.add_edge(v1, v2) weight[e] = sim if i > 10000: break i += 1 g.vertex_properties['freq'] = freq g.edge_properties['sim'] = weight return g
def mwgm_graph_tool(pairs, sim_mat): from graph_tool.all import Graph, max_cardinality_matching if not isinstance(pairs, list): pairs = list(pairs) g = Graph() weight_map = g.new_edge_property("float") nodes_dict1 = dict() nodes_dict2 = dict() edges = list() for x, y in pairs: if x not in nodes_dict1.keys(): n1 = g.add_vertex() nodes_dict1[x] = n1 if y not in nodes_dict2.keys(): n2 = g.add_vertex() nodes_dict2[y] = n2 n1 = nodes_dict1.get(x) n2 = nodes_dict2.get(y) e = g.add_edge(n1, n2) edges.append(e) weight_map[g.edge(n1, n2)] = sim_mat[x, y] print("graph via graph_tool", g) res = max_cardinality_matching(g, heuristic=True, weight=weight_map, minimize=False) edge_index = np.where(res.get_array() == 1)[0].tolist() matched_pairs = set() for index in edge_index: matched_pairs.add(pairs[index]) return matched_pairs
class StackGraph(object): def __init__(self): self.g = None def load(self, filename): # Initialize the graph self.g = Graph() # Each node will store a FunctionWrapper() class instance. self.g.vertex_properties["functions"] = self.g.new_vertex_property("object") self.g.vertex_properties["display"] = self.g.new_vertex_property("string") # Each edge will store a [ ..tbd.. ] . self.g.edge_properties["calls"] = self.g.new_edge_property("object") # Load the log file and build the graph i = 0 f = open(filename, "rb") for line in f: i += 1 try: # Skip any informational lines if "*" in line: continue # Extract a call stack snapshot words = line.split() time = words[0][2:] depth = words[1][2:] stack = [FunctionWrapper(instring=item) for item in words[2].split("->")] # Add the top 2 functions to the graph, if necessary. Format: f1()->f2() f1, f2 = stack[-2], stack[-1] v1, v2 = None, None # Search for the vertices for v in self.g.vertices(): if self.g.vp.functions[v] == f1: v1 = v if self.g.vp.functions[v] == f2: v2 = v if v1 != None and v2 != None: break # Add new vertices if needed if v1 == None: v1 = self.g.add_vertex() self.g.vp.functions[v1] = f1 self.g.vp.display[v1] = f1.graphDisplayString() if v2 == None: v2 = self.g.add_vertex() self.g.vp.functions[v2] = f2 self.g.vp.display[v2] = f2.graphDisplayString() # Add the edge if necessary, and then add data to it if not self.g.edge(v1, v2): e = self.g.add_edge(v1, v2) self.g.ep.calls[e] = CallList(v1, v2) self.g.ep.calls[e].addCall(time, depth) except Exception as e: print "Exception on line", i, ":", e print [str(x) for x in stack] exit()
def test_graphtool(): g = Graph(directed=True) g.add_vertex(4) g.add_edge_list([(0, 1), (1, 2), (2, 3), (3, 0)]) weight = g.new_edge_property('float') weight[g.edge(0, 1)] = 1 weight[g.edge(1, 2)] = 2 weight[g.edge(2, 3)] = 3 weight[g.edge(3, 0)] = 4 assert set(gt2edges_and_weights(g, weight)) == { (0, 1, 1), (1, 2, 2), (2, 3, 3), (3, 0, 4) }
def vytvořím_graph_tool_graf(): from graph_tool.all import Graph graf = Graph() u1 = graf.add_vertex() u2 = graf.add_vertex() graf.add_edge(u1, u2) vprop_double = graf.new_vertex_property("double") # Double-precision floating point vprop_double[graf.vertex(1)] = 3.1416 vprop_vint = graf.new_vertex_property("vector<int>") # Vector of ints vprop_vint[graf.vertex(0)] = [1, 3, 42, 54] eprop_dict = graf.new_edge_property("object") # Arbitrary python object. In this case, a dictionary. eprop_dict[graf.edges().next()] = {"foo": "bar", "gnu": 42} gprop_bool = graf.new_graph_property("bool") # Boolean gprop_bool[graf] = True graf.save('./data/graph_tool.graphml', fmt='xml')
class BiblioNetwork(): "Bibliography network displayer" def __init__(self, filepath): self.filepath = filepath self.db = None self._auth_betw = None self._auth_betw_computed_from = 0 self.layout_pos = None self.graph = None self.author_list = [] @staticmethod def _split_authors(row): "Split authors of the row" auth = row['Authors'].split(", ") auth = [", ".join(auth[2*i:2*i+2]) for i in range(int(len(auth)/2))] return auth def parse(self, nmb_to_import=None, delimiter=","): "Parse the database csv file" # import database self.db = pd.read_csv(self.filepath, delimiter, index_col=False, nrows=nmb_to_import, encoding="ISO8859", error_bad_lines=False, warn_bad_lines=True) self.db.reset_index() # separate authors self.db['Authors'] = self.db.apply(self._split_authors, axis=1) # Replace missing values self.db['Cited by'].fillna(0, inplace=True) # Updat author list self.update_author_list() def clean(self, min_citations=10): "Remove some entries" len_bef = len(self.db) self.db.drop(self.db[self.db["Cited by"] < min_citations].index, inplace=True) len_after = len(self.db) print(" Removed {} articles, {} remaining".format(len_bef-len_after, len_after)) self.update_author_list() self._auth_betw = None def remove_anterior(self, year): "Remove some entries" len_bef = len(self.db) self.db.drop(self.db[self.db["Year"] <= year].index, inplace=True) len_after = len(self.db) print(" Removed {} articles, {} remaining".format(len_bef-len_after, len_after)) self.update_author_list() self._auth_betw = None def remove_posterior(self, year): "Remove some entries" len_bef = len(self.db) self.db.drop(self.db[self.db["Year"] > year].index, inplace=True) len_after = len(self.db) print(" Removed {} articles, {} remaining".format(len_bef-len_after, len_after)) self.update_author_list() self._auth_betw = None def update_author_list(self): "Update author list from database" auths = list(set(np.concatenate(self.db['Authors'].values))) self.author_list = np.sort(auths) @property def author_betweeness(self): "Compute authors betweness" # If already computed, just return it if self._auth_betw is not None and \ self._auth_betw_computed_from == len(self.db): return self._auth_betw # else compute it self._auth_betw_computed_from = len(self.db) auth_betw = {auth: {} for auth in self.author_list} for auths in self.db['Authors']: # skip if only one author if len(auths) == 1: continue # Loop on authors couples for i1, auth1 in enumerate(auths): for auth2 in auths[i1+1::]: keys = auth_betw.keys() # create couple if necessary, or increment if auth2 not in auth_betw[auth1].keys(): auth_betw[auth1][auth2] = 1 else: auth_betw[auth1][auth2] += 1 if auth1 not in auth_betw[auth2].keys(): auth_betw[auth2][auth1] = 1 else: auth_betw[auth2][auth1] += 1 self._auth_betw = auth_betw return self._auth_betw @author_betweeness.setter def author_betweeness(self, val): raise Exception("You cannot change that") def get_total_citation(self): """ Return total number of citations for each author""" nmbcits = {} for _, art in self.db.iterrows(): auths = art['Authors'] nmbcit = int(art['Cited by']) for auth in auths: if auth in nmbcits.keys(): nmbcits[auth] += nmbcit else: nmbcits[auth] = nmbcit return nmbcits def get_auth_nmb_of_art(self): """ Return number of article for each author""" nmbart = {} for _, art in self.db.iterrows(): auths = art['Authors'] for auth in auths: if auth in nmbart.keys(): nmbart[auth] += 1 else: nmbart[auth] = 1 return nmbart def _get_author_publication(self): auth2pub = {} for _, art in self.db.iterrows(): for auth in art['Authors']: if auth in auth2pub.keys(): auth2pub[auth] += [art.name] else: auth2pub[auth] = [art.name] return auth2pub def write_author_list(self, filepath): with open(filepath, "w") as f: data = ['{}: {}\n'.format(i, auth) for i, auth in enumerate(self.author_list)] f.writelines(data) def make_article_graph(self, layout="arf"): """Make an article graph""" self.graph = Graph(directed=False) # add vertex self.graph.add_vertex(len(self.db)) # add properties cb = self.graph.new_vertex_property("int", self.db['Cited by'].values) self.graph.vertex_properties['nmb_citation'] = cb # Add links auths = list(self.author_betweeness.keys()) auth2ind = {auths[i]: i for i in range(len(auths))} auth2pub = self._get_author_publication() for _, pubs in auth2pub.items(): if len(pubs) < 2: continue combis = itertools.combinations(pubs, 2) self.graph.add_edge_list(list(combis)) # layout if layout == "arf": self.layout_pos = arf_layout(self.graph) elif layout == "sfpd": self.layout_pos = sfdp_layout(self.graph) elif layout == "fr": self.layout_pos = fruchterman_reingold_layout(self.graph) elif layout == "radial": self.layout_pos = radial_tree_layout(self.graph, auth2ind['Logan, B.E.']) else: raise ValueError() def make_author_graph(self, layout="arf"): """Make an author graph""" self.graph = Graph(directed=False) # add vertex auths = self.author_list self.graph.add_vertex(len(auths)) # add links auth2ind = {auths[i]: i for i in range(len(auths))} abet = [] authbet = copy.deepcopy(self.author_betweeness) for auth in auths: for col, weight in authbet[auth].items(): if col == auth: continue self.graph.add_edge(auth2ind[auth], auth2ind[col]) del authbet[col][auth] # ensure that edges are not doubled abet.append(weight) # add properties cb = self.graph.new_edge_property("int", abet) self.graph.edge_properties['weight'] = cb # layout if layout == "arf": self.layout_pos = arf_layout(self.graph, weight=self.graph.ep.weight, pos=self.layout_pos, max_iter=10000) elif layout == "sfpd": self.layout_pos = sfdp_layout(self.graph, eweight=self.graph.ep.weight, pos=self.layout_pos) elif layout == "fr": self.layout_pos = fruchterman_reingold_layout(self.graph, weight=self.graph.ep.weight, circular=True, pos=self.layout_pos) elif layout == "radial": nc = self.get_total_citation() main_auth_ind = np.argmax(list(nc.values())) main_auth = list(nc.keys())[main_auth_ind] self.layout_pos = radial_tree_layout(self.graph, auth2ind[main_auth]) elif layout == "planar": self.layout_pos = planar_layout(self.graph) else: raise ValueError() def display_article_graph(self, out="graph.pdf", min_size=1, max_size=10, indice=False): """Display an article graph One point per article. Size and color corespond to the number of citation. """ cb = np.log(np.array(self.graph.vp.nmb_citation.a)+2) ms = cb/max(cb)*(max_size - min_size) + min_size ms = self.graph.new_vertex_property('float', ms) graph_draw(self.graph, pos=self.layout_pos, output=out, vertex_size=ms, vertex_fill_color=self.graph.vp.nmb_citation, vcmap=plt.cm.viridis) def display_author_graph(self, out="graph.pdf", min_size=1, max_size=10, indice=False): """Display an author graph """ auths = self.author_list nc = self.get_total_citation() nc = [int(nc[auth]) for auth in auths] na = self.get_auth_nmb_of_art() na = [int(na[auth]) for auth in auths] # normalize citation number nc = np.array(nc, dtype=float) nc /= np.max(nc) nc *= (max_size - min_size) nc += min_size # normalize edge width weight = np.array(self.graph.ep.weight.a, dtype=float) weight /= np.max(weight) weight *= (1 - 0.1) weight += 0.1 # Get vertex display order vorder = np.argsort(nc) # Get index if indice: text = range(len(vorder)) textg = self.graph.new_vertex_property('string', text) else: textg = None # plot ncg = self.graph.new_vertex_property('float', nc) nag = self.graph.new_vertex_property('int', na) vorderg = self.graph.new_vertex_property('int', vorder) weightg = self.graph.new_edge_property('float', weight) self.graph.vp['nmb_citation'] = ncg graph_draw(self.graph, pos=self.layout_pos, output=out, vertex_fill_color=nag, vertex_size=ncg, edge_pen_width=weightg, vertex_text=textg, vorder=vorderg, vertex_text_position=0, vcmap=plt.cm.PuBu)
def build_region_closure(g, root, regions, infection_times, obs_nodes, debug=False): """return a closure graph on the the components""" regions = copy(regions) root_region = {'nodes': {root}, 'head': root, 'head_time': -float('inf')} regions[len(regions)] = root_region gc = Graph(directed=True) for _ in range(len(regions)): gc.add_vertex() # connect each region gc_edges = [] original_edge_info = {} for i, j in combinations(regions, 2): # make group i the one with *later* head if regions[i]['head_time'] < regions[j]['head_time']: i, j = j, i if debug: print('i, j={}, {}'.format(i, j)) # only need to connect head i to one of the nodes in group j # where nodes in j have time stamp < head i # then an edge from region j to region i (because j is earlier) head_i = regions[i]['head'] def get_pseudo_time(n): if n == root: return -float('inf') else: return infection_times[n] targets = [ n for n in regions[j]['nodes'] if get_pseudo_time(n) < regions[i]['head_time'] ] if debug: print('head_i: {}'.format(head_i)) print('targets: {}'.format(targets)) print('regions[j]["nodes"]: {}'.format(regions[j]['nodes'])) if len(targets) == 0: continue visitor = init_visitor(g, head_i) forbidden_nodes = list( set(regions[i]['nodes']) | (set(regions[j]['nodes']) - set(targets))) if debug: print('forbidden_nodes: {}'.format(forbidden_nodes)) # NOTE: count_threshold = 1 cpbfs_search(g, source=head_i, terminals=targets, forbidden_nodes=forbidden_nodes, visitor=visitor, count_threshold=1) reachable_targets = [t for t in targets if visitor.dist[t] > 0] if debug: print('reachable_targets: {}'.format(reachable_targets)) if len(reachable_targets) == 0: # cannot reach there continue source = min(reachable_targets, key=visitor.dist.__getitem__) dist = visitor.dist[source] assert dist > 0 gc_edges.append(((j, i, dist))) original_edge_info[(j, i)] = { 'dist': dist, 'pred': visitor.pred, 'original_edge': (source, head_i) } for u, v, _ in gc_edges: gc.add_edge(u, v) eweight = gc.new_edge_property('int') for u, v, c in gc_edges: eweight[gc.edge(gc.vertex(u), gc.vertex(v))] = c return gc, eweight, original_edge_info
class TTC(AbstractMatchingAlgorithm): """This class searches for cycles where each student gets his best option. This takes a list of students, a list of schools and a ruleset (which is used to calculate priorities). This works by generating a directed graph, where each student points at at his best option, and each school points at the student (or students) with the highest priority. """ EDGE_WIDTH_SIZE_FACTOR = 700 """Size factor (in the image) of each edge that is not part of the main cycle.""" EDGE_WIDTH_CYCLE_SIZE = 10 """Size factor (in the image) of each edge that takes part of the main cycle.""" def __init__(self, generate_images=False, images_folder="TTC_images", use_longest_cycle=True): """Initializes the algorithm. :param generate_images: If the process generates images or not. :type generate_images: bool :param images_folder: Where images are saved. :type images_folder: str :param use_longest_cycle: If the algorithm applies the longest cycle available, or the first one encountered. :type use_longest_cycle: bool """ self.generate_images = generate_images self.images_folder = images_folder self.use_longest_cycle = use_longest_cycle self.__graph = None self.__vertices_by_school_id = None self.__vertices_by_student_id = None self.__students_by_id = None self.__schools_by_id = None self.__entity_id = None self.__entity_type = None def reset_variables(self): """Resets all variables.""" self.__graph = Graph() self.__vertices_by_school_id = {} self.__vertices_by_student_id = {} self.__students_by_id = {} self.__schools_by_id = {} self.__entity_id = self.__graph.new_vertex_property("int") self.__graph.vertex_properties["entity_id"] = self.__entity_id self.__entity_type = self.__graph.new_vertex_property("string") self.__graph.vertex_properties["entity_type"] = self.__entity_type def run(self, students, schools, ruleset): """Runs the algorithm. First it creates the graph, then it lists all the cycles available, after that it selects one cycle, and applies it. Finally, it starts the process again. :param students: List of students. :type students: list :param schools: List of school. :type schools: list :param ruleset: Set of rules used. :type ruleset: Ruleset """ self.reset_variables() can_improve = True iteration_counter = 1 while can_improve: self.structure_graph(students, schools) cycles = [c for c in all_circuits(self.__graph, unique=True)] # print("CYCLES", cycles, "iteration", iteration_counter) cycle_edges = [] if cycles: for cycle in cycles: # ToDo: Possible optimisation: apply all disjoint cycles at once for current_v_index in range(len(cycle)): next_v_index = (current_v_index + 1) % len(cycle) from_v = self.__graph.vertex(cycle[current_v_index]) target_v = self.__graph.vertex(cycle[next_v_index]) edge = self.__graph.edge(from_v, target_v) cycle_edges.append(edge) if self.__entity_type[from_v] == "st": sel_student = self.__students_by_id[ self.__entity_id[from_v]] sel_school = self.__schools_by_id[ self.__entity_id[target_v]] sel_student.assigned_school = sel_school sel_school.assignation.append(sel_student) # vertex_school_target_id = self.__entity_id[target_v] # vertex_school_target = self.__schools_by_id[vertex_school_target_id] # print("CYCLE: Student", sel_student.id, "School", sel_school.id) # print("VVV: School {} -> School {} (Student {}) ".format(self.__entity_id[from_v], self.__entity_id[target_v], self.__entity_id[self.__graph.edge(from_v, target_v)])) if self.generate_images: self.generate_image(cycle_edges, iteration_n=iteration_counter) else: can_improve = False self.__graph.clear() iteration_counter += 1 def structure_graph(self, students, schools): """Creates a graph where students points to schools, and schools points to students. In the graph, each student points at at his best option, and each school points at the student (or students) with the highest priority. :param students: List of students. :type students: list :param schools: :type schools: list """ if not self.__students_by_id and not self.__schools_by_id: for student in students: self.__students_by_id[student.id] = student for school in schools: self.__schools_by_id[school.id] = school for school in schools: setattr(school, 'preferences', StudentQueue(school, preference_mode=True)) remaining_students = [ student for student in students if not student.assigned_school ] for student in remaining_students: for pref_school in student.preferences: pref_school.preferences.append(student) for student in remaining_students: v_source_student = self.create_vertex_student(student) pref_school = next( (school for school in student.preferences if len(school.assignation.get_all_students()) < school.capacity), None) if pref_school: v_target_school = self.create_vertex_school(pref_school) self.create_edge(v_source_student, v_target_school) for school in schools: if len(school.assignation.get_all_students()) < school.capacity: v_source_school = self.create_vertex_school(school) pref_student = next( iter(school.preferences.get_all_students()), None) if pref_student: v_target_student = self.create_vertex_student(pref_student) self.create_edge(v_source_school, v_target_student) # graph_draw(self.__graph, # vertex_text=self.__entity_id, vertex_shape="circle", # output_size=(1000, 1000), bg_color=[1., 1., 1., 1], output="graph.png") def create_vertex_student(self, student): """Defines a new student as a vertex in the graph (if it did not existed before).""" if student.id in self.__vertices_by_student_id: vertex = self.__vertices_by_student_id[student.id] else: vertex = self.__graph.add_vertex() self.__vertices_by_student_id[student.id] = vertex self.__entity_id[vertex] = student.id self.__entity_type[ vertex] = "st" # ToDo: There may be other ways to do this. return vertex def create_vertex_school(self, school): """Defines a new school as a vertex in the graph (if it did not existed before).""" if school.id in self.__vertices_by_school_id: vertex = self.__vertices_by_school_id[school.id] else: vertex = self.__graph.add_vertex() self.__vertices_by_school_id[school.id] = vertex self.__entity_id[vertex] = school.id self.__entity_type[vertex] = "sc" return vertex def create_edge(self, source_v, target_v): """Creates a directed edge between two vertices.""" self.__graph.add_edge(source_v, target_v) def generate_image(self, cycle_edges, iteration_n=0): """Generates an image of a graph. :param cycle_edges: Edges which are part of the main cycle (they will be highlighted in red). :type cycle_edges: list :param iteration_n: Number of iteration of the algorithm (this is added in the filename of the image). :type iteration_n: int .. DANGER:: This is an experimental feature. """ edge_color = self.__graph.new_edge_property("vector<float>") edge_width = self.__graph.new_edge_property("int") for edge in self.__graph.edges(): if edge in cycle_edges: edge_color[edge] = [1., 0.2, 0.2, 0.999] edge_width[edge] = 7 else: edge_color[edge] = [0., 0., 0., 0.3] edge_width[edge] = 4 vertex_shape = self.__graph.new_vertex_property("string") vertex_size = self.__graph.new_vertex_property("int") for vertex in self.__graph.vertices(): if self.__entity_type[vertex] == "st": vertex_shape[vertex] = "circle" vertex_size[vertex] = 1 else: vertex_shape[vertex] = "double_circle" vertex_size[vertex] = 100 # pos = sfdp_layout(self.__graph, C=10, p=5, theta=2, gamma=1) pos = arf_layout(self.__graph, d=0.2, a=3) graph_draw( self.__graph, pos=pos, vertex_text=self.__entity_id, vertex_font_size= 1, # ToDo: Move image related code outside the class. vertex_fill_color=[0.97, 0.97, 0.97, 1], vertex_color=[0.05, 0.05, 0.05, 0.95], vertex_shape=vertex_shape, edge_color=edge_color, edge_pen_width=edge_width, output_size=(1000, 1000), bg_color=[1., 1., 1., 1], output=self.generate_filename(iteration_n)) def generate_filename(self, iteration_n): # ToDo: Move this to utils """Returns a filename (which is used to generate the images).""" filename = "Graph (iteration {})".format( iteration_n) if iteration_n > 0 else "Graph" output_file = gen_filepath(self.images_folder, filename=filename, extension="png") return output_file
filename = 'buf.txt' print(filename) coefficient = 3 word_dict = {} add_dict = {} f = open('bad.txt', 'r', encoding="utf-8") for s in f: # print(s.split(' ')[0]) add_dict[s.split(' ')[0]] = 1 f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("int") ver_names = pairs_graph.new_vertex_property("string") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef') if w1 in add_dict or w2 in add_dict:
class GeneralGraph(): """ General wrapper for graph-tool or networkx graphs to add edges and nodes according to constraints """ def __init__(self, directed=True, verbose=1): self.graphtool = GRAPH_TOOL # Initialize graph if self.graphtool: self.graph = Graph(directed=directed) self.weight = self.graph.new_edge_property("float") else: if directed: print("directed graph") self.graph = nx.DiGraph() else: self.graph = nx.Graph() # set metaparameter self.time_logs = {} self.verbose = verbose def set_edge_costs(self, layer_classes=["resistance"], class_weights=[1], **kwargs): """ Initialize edge cost variables :param classes: list of cost categories :param weights: list of weights for cost categories - must be of same shape as classes (if None, then equal weighting) """ class_weights = np.array(class_weights) # set different costs: self.cost_classes = layer_classes if self.graphtool: self.cost_props = [ self.graph.new_edge_property("float") for _ in range(len(layer_classes)) ] self.cost_weights = class_weights / np.sum(class_weights) if self.verbose: print(self.cost_classes, self.cost_weights) # save weighted instance for plotting self.instance = np.sum( np.moveaxis(self.cost_instance, 0, -1) * self.cost_weights, axis=2) * self.hard_constraints def set_shift(self, start, dest, pylon_dist_min=3, pylon_dist_max=5, max_angle=np.pi / 2, **kwargs): """ Initialize shift variable by getting the donut values :param lower, upper: min and max distance of pylons :param vec: vector of diretion of edges :param max_angle: Maximum angle of edges to vec """ vec = dest - start if self.verbose: print("SHIFT:", pylon_dist_min, pylon_dist_max, vec, max_angle) self.shifts = get_half_donut(pylon_dist_min, pylon_dist_max, vec, angle_max=max_angle) self.shift_tuples = self.shifts def set_corridor(self, dist_surface, start_inds, dest_inds, sample_func="mean", sample_method="simple", factor_or_n_edges=1): # set new corridor corridor = (dist_surface > 0).astype(int) self.factor = factor_or_n_edges self.cost_rest = self.cost_instance * (self.hard_constraints > 0).astype(int) * corridor # downsample tic = time.time() if self.factor > 1: self.cost_rest = CostUtils.downsample(self.cost_rest, self.factor, mode=sample_method, func=sample_func) self.time_logs["downsample"] = round(time.time() - tic, 3) # repeat because edge artifacts self.cost_rest = self.cost_rest * (self.hard_constraints > 0).astype(int) * corridor # add start and end TODO ugly self.cost_rest[:, dest_inds[0], dest_inds[1]] = self.cost_instance[:, dest_inds[0], dest_inds[1]] self.cost_rest[:, start_inds[0], start_inds[1]] = self.cost_instance[:, start_inds[0], start_inds[1]] def add_nodes(self, nodes): """ Add vertices to the graph param nodes: list of node names if networkx, integer if graphtool """ tic = time.time() # add nodes to graph if self.graphtool: _ = self.graph.add_vertex(nodes) self.n_nodes = len(list(self.graph.vertices())) else: self.graph.add_nodes_from(np.arange(nodes)) self.n_nodes = len(self.graph.nodes()) # verbose if self.verbose: print("Added nodes:", nodes, "in time:", time.time() - tic) self.time_logs["add_nodes"] = round(time.time() - tic, 3) def add_edges(self): tic_function = time.time() n_edges = 0 # kernels, posneg = ConstraintUtils.get_kernel(self.shifts, # self.shift_vals) # edge_array = [] times_edge_list = [] times_add_edges = [] if self.verbose: print("n_neighbors:", len(self.shift_tuples)) for i in range(len(self.shift_tuples)): tic_edges = time.time() # set cost rest if necessary (random graph) self.set_cost_rest() # compute shift and weights out = self._compute_edges(self.shift_tuples[i]) # Error if -1 entries because graph-tool crashes with -1 nodes if np.any(out[:, :2].flatten() < 0): print(np.where(out[:, :2] < 0)) raise RuntimeError n_edges += len(out) times_edge_list.append(round(time.time() - tic_edges, 3)) # add edges to graph tic_graph = time.time() if self.graphtool: self.graph.add_edge_list(out, eprops=self.cost_props) else: nx_edge_list = [(e[0], e[1], { "weight": np.sum(e[2:] * self.cost_weights) }) for e in out] self.graph.add_edges_from(nx_edge_list) times_add_edges.append(round(time.time() - tic_graph, 3)) # alternative: collect edges here and add alltogether # edge_array.append(out) # # alternative: add edges all in one go # tic_concat = time.time() # edge_lists_concat = np.concatenate(edge_array, axis=0) # self.time_logs["concatenate"] = round(time.time() - tic_concat, 3) # print("time for concatenate:", self.time_logs["concatenate"]) # tic_graph = time.time() # self.graph.add_edge_list(edge_lists_concat, eprops=[self.weight]) # self.time_logs["add_edges"] = round( # (time.time() - tic_graph) / len(shifts), 3 # ) self.n_edges = len(list(self.graph.edges())) self._update_time_logs(times_add_edges, times_edge_list, tic_function) if self.verbose: print("DONE adding", n_edges, "edges:", time.time() - tic_function) def _update_time_logs(self, times_add_edges, times_edge_list, tic_function): self.time_logs["add_edges"] = round(np.mean(times_add_edges), 3) self.time_logs["add_edges_times"] = times_add_edges self.time_logs["edge_list"] = round(np.mean(times_edge_list), 3) self.time_logs["edge_list_times"] = times_edge_list self.time_logs["add_all_edges"] = round(time.time() - tic_function, 3) if self.verbose: print("Done adding edges:", len(list(self.graph.edges()))) def sum_costs(self): """ Additive weighting of costs Take the individual edge costs, compute weighted sum --> self.weight """ # add sum of all costs if not self.graphtool: return tic = time.time() summed_costs_arr = np.zeros(self.cost_props[0].get_array().shape) for i in range(len(self.cost_props)): prop = self.cost_props[i].get_array() summed_costs_arr += prop * self.cost_weights[i] self.weight.a = summed_costs_arr self.time_logs["sum_of_costs"] = round(time.time() - tic, 3) def remove_vertices(self, dist_surface, delete_padding=0): """ Remove edges in a certain corridor (or all) to replace them by a refined surface @param dist_surface: a surface where each pixel value corresponds to the distance of the pixel to the shortest path @param delete_padding: define padding in which part of the corridor to delete vertices (cannot delete all because then graph unconnected) """ tic = time.time() self.graph.clear_edges() self.graph.shrink_to_fit() self.time_logs["remove_edges"] = round(time.time() - tic, 3) def get_pareto(self, vary, source, dest, out_path=None, compare=[0, 1], plot=1): """ Arguments: vary: how many weights to explore e.g 3 --> each cost class can have weight 0, 0.5 or 1 source, dest: as always the source and destination vertex out_path: where to save the pareto figure(s) compare: indices of cost classes to compare Returns: paths: All found paths pareto: The costs for each combination of weights """ tic = time.time() # initialize lists pareto = list() paths = list() cost_sum = list() # get the edge costs cost_arrs = [cost.get_array() for cost in self.cost_props] # [self.cost_props[comp].get_array() for comp in compare] # get vary weights between 0 and 1 var_weights = np.around(np.linspace(0, 1, vary), 2) # construct weights array if len(compare) == 2: weights = [[v, 1 - v] for v in var_weights] elif len(compare) == 3: weights = list() for w0 in var_weights: for w1 in var_weights[var_weights <= 1 - w0]: weights.append([w0, w1, 1 - w0 - w1]) else: raise ValueError("argument compare can only have length 2 or 3") # w_avail: keep weights of non-compare classes, get leftover amount w_avail = np.sum(np.asarray(self.cost_weights)[compare]) # compute paths for each combination of weights for j in range(len(weights)): # option 2: np.zeros(len(cost_arrs)) + non_compare_weight w = self.cost_weights.copy() # replace the ones we want to compare w[compare] = np.array(weights[j]) * w_avail # weighted sum of edge costs self.weight.a = np.sum( [cost_arrs[i] * w[i] for i in range(len(cost_arrs))], axis=0) # get shortest path path, path_costs, _ = self.get_shortest_path(source, dest) # don't take cost_sum bc this is sum of original weighting pareto.append(np.sum(path_costs, axis=0)[compare]) paths.append(path) # take overall sum of costs (unweighted) that this w leads to cost_sum.append(np.sum(path_costs)) # print best weighting best_weight = np.argmin(cost_sum) w = self.cost_weights.copy() w[compare] = np.array(weights[best_weight]) * w_avail print("Best weights:", w, "with (unweighted) costs:", np.min(cost_sum)) self.time_logs["pareto"] = round(time.time() - tic, 3) pareto = np.array(pareto) classes = [self.cost_classes[comp] for comp in compare] # Plotting if plot: if len(compare) == 2: plot_pareto_scatter_2d(pareto, weights, classes, cost_sum=cost_sum, out_path=out_path) elif len(compare) == 3: # plot_pareto_3d(pareto, weights, classes) plot_pareto_scatter_3d(pareto, weights, classes, cost_sum=cost_sum, out_path=out_path) return paths, weights, cost_sum def get_shortest_path(self, source, target): """ Compute shortest path from source vertex to target vertex """ tic = (time.time()) # #if source and target are given as indices: if self.graphtool: vertices_path, _ = shortest_path(self.graph, source, target, weights=self.weight, negative_weights=True) else: try: vertices_path = nx.dijkstra_path(self.graph, source, target) except nx.exception.NetworkXNoPath: return [] self.time_logs["shortest_path"] = round(time.time() - tic, 3) return vertices_path def save_graph(self, OUT_PATH): """ Save the graph in OUT_PATH """ if self.graphtool: for i, cost_class in enumerate(self.cost_classes): self.graph.edge_properties[cost_class] = self.cost_props[i] self.graph.edge_properties["weight"] = self.weight self.graph.save(OUT_PATH + ".xml.gz") else: nx.write_weighted_edgelist(self.graph, OUT_PATH + '.weighted.edgelist') def load_graph(self, IN_PATH): """ Retrieve graph from IN_PATH """ if self.graphtool: self.g_prev = load_graph(IN_PATH + ".xml.gz") self.weight_prev = self.g_prev.ep.weight # weight = G2.ep.weight[G2.edge(66, 69)] else: self.g_prev = nx.read_edgelist(IN_PATH + '.weighted.edgelist', nodetype=int, data=(('weight', float), )) # ----------------------------------------------------------------------- # INTERFACE def single_sp(self, **kwargs): """ Function for full processing until shortest path """ self.start_inds = kwargs["start_inds"] self.dest_inds = kwargs["dest_inds"] self.set_shift(self.start_inds, self.dest_inds, **kwargs) # self.set_corridor( # np.ones(self.hard_constraints.shape) * 0.5, # self.start_inds, # self.dest_inds, # factor_or_n_edges=1 # ) if self.verbose: print("1) Initialize shifts and instance (corridor)") self.set_edge_costs(**kwargs) # add vertices self.add_nodes() if self.verbose: print("2) Initialize distances to inf and predecessors") self.add_edges() if self.verbose: print("3) Compute source shortest path tree") print("number of vertices and edges:", self.n_nodes, self.n_edges) # weighted sum of all costs self.sum_costs() source_v, target_v = self.add_start_and_dest(self.start_inds, self.dest_inds) # get actual best path path, path_costs, cost_sum = self.get_shortest_path(source_v, target_v) if self.verbose: print("4) shortest path", cost_sum) return path, path_costs, cost_sum
plt.plot(range(0, a + 1), ver_attr[:(a + 1)], 'go') plt.plot(range(a + 1, b + 1), ver_attr[(a + 1):(b + 1)], 'bo') plt.plot(range(b + 1, len(ver_attr)), ver_attr[(b + 1):], 'ro') print("-------------------------------------------------") filename = '../pairparser/results/en_pairs(7).txt' ftag = '7_3imp' coefficient = 3 word_dict = {} # dict with indexes of nodes by word f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("double") ver_names = pairs_graph.new_vertex_property("string") ver_id = pairs_graph.new_vertex_property("int") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef')
class MemoryGraph: # "my_graph.xml.gz" def __init__(self, index_path=None, graph_path=None): #index if index_path != None: self.load_index(index_path) else: self.init_index() #graph if graph_path != None: self.load_graph(graph_path) else: self.init_graph() def save_index(self, index_path): self.index.save_index(index_path) def load_index(self, index_path): self.index = hnswlib.Index(space='l2', dim=256) self.index.load_index(index_path) self.index.set_ef(10) def init_index(self): self.index = hnswlib.Index(space='l2', dim=256) self.index.init_index(max_elements=50000, ef_construction=100, M=16) self.index.set_ef(10) def save_graph(self, graph_path): self.graph.save(graph_path) def load_graph(self, graph_path): self.graph = load_graph(graph_path) self.vertex_index = dict() for v in self.graph.vertices(): self.vertex_index[self.graph.vp.id[v]] = v def init_graph(self): self.graph = Graph(directed=False) self.vertex_index = dict() self.graph.graph_properties["id"] = self.graph.new_graph_property( "long") self.graph.graph_properties["id"] = 0 self.graph.vertex_properties["id"] = self.graph.new_vertex_property( "long") self.graph.vertex_properties["x"] = self.graph.new_vertex_property( "double") self.graph.vertex_properties["y"] = self.graph.new_vertex_property( "double") self.graph.vertex_properties["t"] = self.graph.new_vertex_property( "long") self.graph.vertex_properties["f"] = self.graph.new_vertex_property( "vector<double>") self.graph.edge_properties["d"] = self.graph.new_edge_property( "double") def get_observation(self, id): v = self.vertex_index[id] return dict(id=id, x=self.graph.vp.x[v], y=self.graph.vp.y[v], t=self.graph.vp.t[v], f=self.graph.vp.f[v]) def get_observations(self, ids): return [self.get_observation(id) for id in ids] def insert_observation(self, t, y, x, f): v = self.graph.add_vertex() id = self.graph.graph_properties["id"] self.graph.graph_properties["id"] = id + 1 self.graph.vp.id[v] = id self.graph.vp.x[v] = x self.graph.vp.y[v] = y self.graph.vp.t[v] = t self.graph.vp.f[v] = f self.index.add_items([f], [id]) return id def get_adjacencies(self, id, radius): v = self.vertex_index[id] return [self.vertex_index[n] for n in self._neighbors(v, radius)] def _neighbors(self, v, radius, depth=0): result = set() for w in v.out_neighbors(): result.add(w) if depth + 1 < radius: result.update(self._neighbors(w, radius, depth + 1)) return result def insert_adjacency(self, from_id, to_id, distance): va = self.vertex_index[from_id] vb = self.vertex_index[to_id] e = self.graph.add_edge(va, vb) self.graph.ep.d[e] = distance def knn_query(self, feats, k=1): return self.index.knn_query(feats, k)
def build_closure(g, cand_source, terminals, infection_times, k=-1, strictly_smaller=True, debug=False, verbose=False): """ build a clojure graph in which cand_source + terminals are all connected to each other. the number of neighbors of each node is determined by k the larger the k, the denser the graph""" r2pred = {} edges = {} terminals = list(terminals) # from cand_source to terminals vis = init_visitor(g, cand_source) cpbfs_search(g, source=cand_source, visitor=vis, terminals=terminals, forbidden_nodes=terminals, count_threshold=k) r2pred[cand_source] = vis.pred for u, v, c in get_edges(vis.dist, cand_source, terminals): edges[(u, v)] = c if debug: print('cand_source: {}'.format(cand_source)) print('#terminals: {}'.format(len(terminals))) print('edges from cand_source: {}'.format(edges)) if verbose: terminals_iter = tqdm(terminals) print('building closure graph') else: terminals_iter = terminals # from terminal to other terminals for root in terminals_iter: if strictly_smaller: late_terminals = [t for t in terminals if infection_times[t] > infection_times[root]] else: # respect what the paper presents late_terminals = [t for t in terminals if infection_times[t] >= infection_times[root]] late_terminals = set(late_terminals) - {cand_source} # no one can connect to cand_source if debug: print('root: {}'.format(root)) print('late_terminals: {}'.format(late_terminals)) vis = init_visitor(g, root) cpbfs_search(g, source=root, visitor=vis, terminals=list(late_terminals), forbidden_nodes=list(set(terminals) - set(late_terminals)), count_threshold=k) r2pred[root] = vis.pred for u, v, c in get_edges(vis.dist, root, late_terminals): if debug: print('edge ({}, {})'.format(u, v)) edges[(u, v)] = c if verbose: print('returning closure graph') gc = Graph(directed=True) for _ in range(g.num_vertices()): gc.add_vertex() for (u, v) in edges: gc.add_edge(u, v) eweight = gc.new_edge_property('int') eweight.set_2d_array(np.array(list(edges.values()))) # for e, c in edges.items(): # eweight[e] = c return gc, eweight, r2pred
class Network: def __init__(self, nodes_info=None, links_info=None, file_name=None): self.g = Graph() if nodes_info and links_info: self.nodes_info = nodes_info self.links_info = links_info self.g.vertex_properties["name"] = self.g.new_vertex_property( 'string') self.g.vertex_properties["id"] = self.g.new_vertex_property( 'int32_t') self.g.edge_properties["weight"] = self.g.new_edge_property( 'int32_t') self.create_network() self.g.vertex_properties["pagerank"] = pagerank( self.g, weight=self.g.edge_properties["weight"]) self.g.vertex_properties[ "degree_centrality"] = self.degree_centrality() elif file_name: self.load_network(file_name) def create_network(self): # Add Nodes for node in self.nodes_info: self.add_n(node) # Add Links for link in self.links_info: n_loser = 0 n_winner = 0 loser = link['loser'] winner = link['winner'] weight = link['rounds'] for team_id in self.g.vertex_properties.id: if loser == team_id: break n_loser += 1 for team_id in self.g.vertex_properties.id: if winner == team_id: break n_winner += 1 self.add_l(n_loser, n_winner, 16 / weight * 100) def load_network(self, file_name): new_file_name = '..' + sep + '..' + sep + 'network-graphs' + sep + file_name self.g.load(new_file_name, fmt="gt") def get_normalized_pagerank(self): max_pgr = 0 for pgr in self.g.vertex_properties.pagerank: if pgr > max_pgr: max_pgr = pgr return [ self.g.vertex_properties.pagerank[v] / max_pgr for v in self.g.vertices() ] def add_n(self, node_info): n = self.g.add_vertex() self.g.vertex_properties.id[n] = node_info['id'] self.g.vertex_properties.name[n] = node_info['Team_Name'] def add_l(self, loser, winner, weight): n1 = self.g.vertex(loser) n2 = self.g.vertex(winner) l = self.g.add_edge(n1, n2) self.g.edge_properties.weight[l] = weight def draw(self, output_file, fmt): graph_draw(self.g, vertex_text=self.g.vertex_index, output=output_file, fmt=fmt) def save_network(self, file_name): try: new_file_name = '..' + sep + '..' + sep + 'network-graphs' + sep + file_name self.g.save(new_file_name, fmt="gt") except: return False return True def vp_pagerank(self): return self.g.vertex_properties.pagerank def vp_degree_cent(self): return self.g.vertex_properties.degree_centrality def vp_name(self): return self.g.vertex_properties.name def vp_id(self): return self.g.vertex_properties.id def ep_weight(self): return self.g.edge_properties.weight # Calcula as características básicas da rede def get_basic_info(self): info = {} try: n_vertices = self.g.num_vertices() n_edges = self.g.num_edges() density = n_edges / ((n_vertices * (n_vertices - 1)) / 2) mean_degree = (2 * n_edges) / n_vertices # Cálculo do coeficiente de clusterização "na mão", usando a média dos # coeficientes locais calculados pela Graph Tools local_cc = local_clustering(self.g) clustering_coef = fsum( [local_cc[x] for x in self.g.vertices() if local_cc[x] != 0.0]) clustering_coef /= n_vertices info["Número de times"] = n_vertices info["Número de confrontos"] = n_edges info["Densidade"] = density info["Grau médio"] = mean_degree info["Coeficiente de Clusterização"] = clustering_coef except: info.clear() return info def degree_centrality(self): degree_centrality = self.g.new_vertex_property('float') for v in self.g.vertices(): degree_centrality[v] = v.in_degree() / (self.g.num_vertices() - 1) return degree_centrality # Calcula a distribuição de graus da rede def degree_distribution(self): degree_dist = {} try: for v in self.g.vertices(): if v.in_degree() not in degree_dist.keys(): degree_dist[v.in_degree()] = 1 else: degree_dist[v.in_degree()] += 1 for k in degree_dist.keys(): degree_dist[k] /= self.g.num_vertices() except: degree_dist.clear() return degree_dist
def build_closure(g, cand_source, terminals, infection_times, k=-1, strictly_smaller=True, debug=False, verbose=False): """ build a clojure graph in which cand_source + terminals are all connected to each other. the number of neighbors of each node is determined by k the larger the k, the denser the graph""" r2pred = {} edges = {} terminals = list(terminals) # from cand_source to terminals vis = init_visitor(g, cand_source) cpbfs_search(g, source=cand_source, visitor=vis, terminals=terminals, forbidden_nodes=terminals, count_threshold=k) r2pred[cand_source] = vis.pred for u, v, c in get_edges(vis.dist, cand_source, terminals): edges[(u, v)] = c if debug: print('cand_source: {}'.format(cand_source)) print('#terminals: {}'.format(len(terminals))) print('edges from cand_source: {}'.format(edges)) if verbose: terminals_iter = tqdm(terminals) print('building closure graph') else: terminals_iter = terminals # from terminal to other terminals for root in terminals_iter: if strictly_smaller: late_terminals = [ t for t in terminals if infection_times[t] > infection_times[root] ] else: # respect what the paper presents late_terminals = [ t for t in terminals if infection_times[t] >= infection_times[root] ] late_terminals = set(late_terminals) - { cand_source } # no one can connect to cand_source if debug: print('root: {}'.format(root)) print('late_terminals: {}'.format(late_terminals)) vis = init_visitor(g, root) cpbfs_search( g, source=root, visitor=vis, terminals=list(late_terminals), forbidden_nodes=list(set(terminals) - set(late_terminals)), count_threshold=k) r2pred[root] = vis.pred for u, v, c in get_edges(vis.dist, root, late_terminals): if debug: print('edge ({}, {})'.format(u, v)) edges[(u, v)] = c if verbose: print('returning closure graph') gc = Graph(directed=True) for _ in range(g.num_vertices()): gc.add_vertex() for (u, v) in edges: gc.add_edge(u, v) eweight = gc.new_edge_property('int') eweight.set_2d_array(np.array(list(edges.values()))) # for e, c in edges.items(): # eweight[e] = c return gc, eweight, r2pred
class Network: def __init__(self): self.g = Graph(directed=True) self.player_id_to_vertex = {} self.pairs = {} # player pair: edge # property maps for additional information self.g.vertex_properties['player_id'] = self.g.new_vertex_property( "string") self.g.vertex_properties['player_coords'] = self.g.new_vertex_property( "vector<float>") self.g.vertex_properties[ 'average_player_coords'] = self.g.new_vertex_property( "vector<float>") self.g.vertex_properties[ 'player_n_coords'] = self.g.new_vertex_property("int") self.g.edge_properties['weight'] = self.g.new_edge_property("float") @property def edge_weights(self): return self.g.edge_properties['weight'] @property def player_id_pmap(self): return self.g.vertex_properties['player_id'] @property def player_coords_pmap(self): return self.g.vertex_properties['player_coords'] @property def player_n_coords_pmap(self): return self.g.vertex_properties['player_n_coords'] @property def average_player_coords_pmap(self): # lazy evaluation of means for v in self.g.vertices(): self.g.vertex_properties['average_player_coords'][v] = np.asarray( self.player_coords_pmap[v]) / self.player_n_coords_pmap[v] return self.g.vertex_properties['average_player_coords'] def add_players(self, pids: List[str]): n = len(pids) vs = list(self.g.add_vertex(n)) self.player_id_to_vertex.update({pids[i]: vs[i] for i in range(n)}) for i in range(n): self.player_id_pmap[vs[i]] = pids[i] return vs def add_passes(self, id_pairs: List[Tuple], coords_pairs: List[Tuple], pass_scores=None): pairs = [(self.player_id_to_vertex[i1], self.player_id_to_vertex[i2]) for i1, i2 in id_pairs] # append player coordinates n = len(coords_pairs) if pass_scores is None: pass_scores = [1 for _ in range(n)] for i in range(n): # remember orig and dest location # orig player coords = self.player_coords_pmap[pairs[i][0]] if len(coords) == 0: coords = np.asarray([coords_pairs[i][0], coords_pairs[i][1]]) else: # accumulate coords += np.asarray([coords_pairs[i][0], coords_pairs[i][1]]) self.player_coords_pmap[pairs[i][0]] = coords self.player_n_coords_pmap[pairs[i][0]] += 1 # dest player coords = self.player_coords_pmap[pairs[i][1]] if len(coords) == 0: coords = np.asarray([coords_pairs[i][2], coords_pairs[i][3]]) else: # accumulate coords += np.asarray([coords_pairs[i][2], coords_pairs[i][3]]) self.player_coords_pmap[pairs[i][1]] = coords self.player_n_coords_pmap[pairs[i][1]] += 1 # if the edge exists, increment its weight instead of creating a new edge e = self.pairs.get(pairs[i]) if e is not None: self.edge_weights[e] += pass_scores[i] else: e = self.g.add_edge(*pairs[i]) self.pairs[pairs[i]] = e self.edge_weights[e] = pass_scores[i] def cleanup(self): """remove isolated vertices""" to_remove = [] for v in self.g.vertices(): if v.in_degree() + v.out_degree() == 0: to_remove.append(v) n = len(to_remove) self.g.remove_vertex(to_remove, fast=True) print("Removed {0} isolated vertices".format(n)) def save(self, file: str): self.g.save(file, fmt='graphml')
class SentenceGraph(): def __init__(self, sentence, directed=False, graph=None): # Create a SentenceGraph from an existing graph tool graph if graph is not None: self.sentence_graph = graph return # Create a new SentenceGraph from scratch self.sentence_graph = Graph(directed=directed) # Graph properties sentence_property = self.sentence_graph.new_graph_property("string", sentence) self.sentence_graph.graph_properties[SENTENCE_KEY] = sentence_property # Vertex properties word_property = self.sentence_graph.new_vertex_property("string") part_of_speech_property = self.sentence_graph.new_vertex_property("string") vertex_color_property = self.sentence_graph.new_vertex_property("vector<double>") self.sentence_graph.vertex_properties[WORD_KEY] = word_property self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY] = part_of_speech_property self.sentence_graph.vertex_properties[VERTEX_COLOR_KEY] = vertex_color_property # Edge properties sentence_edge_property = self.sentence_graph.new_edge_property("string") definition_edge_property = self.sentence_graph.new_edge_property("string") parsed_dependencies_edge_property = self.sentence_graph.new_edge_property("string") inter_sentence_edge_property = self.sentence_graph.new_edge_property("string") edge_color_property = self.sentence_graph.new_edge_property("vector<double>") dependency_edge_property = self.sentence_graph.new_edge_property("string") self.sentence_graph.edge_properties[SENTENCE_EDGE_KEY] = sentence_edge_property self.sentence_graph.edge_properties[DEFINITION_EDGE_KEY] = definition_edge_property self.sentence_graph.edge_properties[PARSED_DEPENDENCIES_EDGE_KEY] = parsed_dependencies_edge_property self.sentence_graph.edge_properties[INTER_SENTENCE_EDGE_KEY] = inter_sentence_edge_property self.sentence_graph.edge_properties[EDGE_COLOR_KEY] = edge_color_property self.sentence_graph.edge_properties[PARSE_TREE_DEPENDENCY_VALUE_KEY] = dependency_edge_property # Edge filter properties definition_edge_filter_property = self.sentence_graph.new_edge_property("bool") inter_sentence_edge_filter_property = self.sentence_graph.new_edge_property("bool") parsed_dependencies_edge_filter_property = self.sentence_graph.new_edge_property("bool") sentence_edge_filter_property = self.sentence_graph.new_edge_property("bool") self.sentence_graph.edge_properties[FILTER_DEFINITION_EDGE_KEY] = definition_edge_filter_property self.sentence_graph.edge_properties[FILTER_INTER_SENTENCE_EDGE_KEY] = inter_sentence_edge_filter_property self.sentence_graph.edge_properties[FILTER_PARSED_DEPENDENCIES_EDGE_KEY] = parsed_dependencies_edge_filter_property self.sentence_graph.edge_properties[FILTER_SENTENCE_EDGE_KEY] = sentence_edge_filter_property def get_sentence(self): return self.sentence_graph.graph_properties[SENTENCE_KEY] def add_vertex(self, word, pos): word_pos_tuple = (word, pos) # Create vertex, set properties word_vertex = self.sentence_graph.add_vertex() self.sentence_graph.vertex_properties[WORD_KEY][word_vertex] = word self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY][word_vertex] = pos self.sentence_graph.vertex_properties[VERTEX_COLOR_KEY][word_vertex] = [0, 0, 1, 1] return word_vertex def set_vertex_color_from_word(self, word, pos, color=[1, 0, 0, 1]): word_vertex = self.get_vertex(word, pos) return self.set_vertex_color(word_vertex, color) def set_vertex_color(self, vertex, color=[1, 0, 0, 1]): self.sentence_graph.vertex_properties[VERTEX_COLOR_KEY][vertex] = color def set_vertices_color(self, vertices, color=[1, 0, 0, 1]): for vertex in vertices: self.set_vertex_color(vertex, color) def add_sentence_edge_from_words(self, word1, pos1, word2, pos2): return self.add_sentence_edge(self.get_vertex(word1, pos1), self.get_vertex(word2, pos2)) def add_sentence_edge(self, word_vertex1, word_vertex2): sentence_edge = self.sentence_graph.add_edge(word_vertex1, word_vertex2, add_missing=False) self.sentence_graph.edge_properties[SENTENCE_EDGE_KEY][sentence_edge] = sentence_edge # Green self.sentence_graph.edge_properties[EDGE_COLOR_KEY][sentence_edge] = [0.2, 1, 0.2, 1] self._set_edge_to_zero_in_all_filters(sentence_edge) self.sentence_graph.edge_properties[FILTER_SENTENCE_EDGE_KEY][sentence_edge] = True return sentence_edge def add_sentence_edges(self, sentence_vertices): for i in range(1, len(sentence_vertices)): self.add_sentence_edge(sentence_vertices[i - 1], sentence_vertices[i]) def add_parsed_dependency_edge(self, word_vertex1, word_vertex2, dependency_relationship): parsed_dependency_edge = self.sentence_graph.add_edge(word_vertex1, word_vertex2, add_missing=False) self.sentence_graph.edge_properties[PARSED_DEPENDENCIES_EDGE_KEY][parsed_dependency_edge] = parsed_dependency_edge self.sentence_graph.edge_properties[PARSE_TREE_DEPENDENCY_VALUE_KEY][parsed_dependency_edge] = dependency_relationship # Blue self.sentence_graph.edge_properties[EDGE_COLOR_KEY][parsed_dependency_edge] = [0, 0, 1, 1] self._set_edge_to_zero_in_all_filters(parsed_dependency_edge) self.sentence_graph.edge_properties[FILTER_PARSED_DEPENDENCIES_EDGE_KEY][parsed_dependency_edge] = True return parsed_dependency_edge def add_parsed_dependency_edge_from_words(self, word1, pos1, word2, pos2, dependency_relationship): return self.add_parsed_dependency_edge( self.get_vertex(word1, pos1), self.get_vertex(word2, pos2), dependency_relationship) def add_definition_edge_from_words(self, word, pos, definition_word, definition_pos): return self.add_definition_edge( self.get_vertex(word, pos), self.get_vertex(definition_word, definition_pos)) def _set_edge_to_zero_in_all_filters(self, edge): self.sentence_graph.edge_properties[FILTER_DEFINITION_EDGE_KEY][edge] = False self.sentence_graph.edge_properties[FILTER_INTER_SENTENCE_EDGE_KEY][edge] = False self.sentence_graph.edge_properties[FILTER_PARSED_DEPENDENCIES_EDGE_KEY][edge] = False self.sentence_graph.edge_properties[FILTER_SENTENCE_EDGE_KEY][edge] = False def add_definition_edge(self, word_vertex, definition_word_vertex): definition_edge = self.sentence_graph.add_edge(word_vertex, definition_word_vertex, add_missing=False) self.sentence_graph.edge_properties[DEFINITION_EDGE_KEY][definition_edge] = definition_edge # Red self.sentence_graph.edge_properties[EDGE_COLOR_KEY][definition_edge] = [1, 0.1, 0.1, 1] self._set_edge_to_zero_in_all_filters(definition_edge) self.sentence_graph.edge_properties[FILTER_DEFINITION_EDGE_KEY][definition_edge] = True return definition_edge def add_definition_edges(self, word_vertex, definition_word_vertices): # Add edges from the word_vertex to all definition vertices and set # the definition edge property on each edge for definition_word_vertex in definition_word_vertices: self.add_definition_edge(word_vertex, definition_word_vertex) return self def add_inter_sentence_edge(self, sentence1_word_vertex, sentence2_word_vertex): inter_sentence_edge = self.sentence_graph.add_edge(sentence1_word_vertex, sentence2_word_vertex, add_missing=False) self.sentence_graph.edge_properties[INTER_SENTENCE_EDGE_KEY][inter_sentence_edge] = inter_sentence_edge # Pink self.sentence_graph.edge_properties[EDGE_COLOR_KEY][inter_sentence_edge] = [1, 0.05, 1, 1] self._set_edge_to_zero_in_all_filters(inter_sentence_edge) self.sentence_graph.edge_properties[FILTER_INTER_SENTENCE_EDGE_KEY][inter_sentence_edge] = True return inter_sentence_edge def add_inter_sentence_edge_from_words(self, word1, pos1, word2, pos2): return self.add_inter_sentence_edge( self.get_vertex(word1, pos1), self.get_vertex(word2, pos2)) def remove_vertex_by_word(self, word, pos): self.remove_vertex(self.get_vertex(word, pos)) def remove_vertex(self, vertex): word = self.sentence_graph.vertex_properties[WORD_KEY][vertex] pos = self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY][vertex] self.sentence_graph.remove_vertex(vertex) def remove_edge(self, word1, pos1, word2, pos2): self.sentence_graph.remove_edge(self.get_edge(word1, pos1, word2, pos2)) def contains(self, word, pos): return self.get_vertex(word, pos) is not None def get_vertex(self, word, pos): for vertex in self.sentence_graph.vertices(): try: vertex_word = self.sentence_graph.vertex_properties[WORD_KEY][vertex] vertex_pos = self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY][vertex] if vertex_word == word and vertex_pos == pos: return vertex except: pass return None def get_word_pos_tuple(self, vertex): return self.sentence_graph.vertex_properties[WORD_KEY][vertex],\ self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY][vertex] def get_word_pos_tuple_by_index(self, index): return self.get_word_pos_tuple(self.get_vertex_by_index(index)) def get_vertex_by_index(self, index): return self.sentence_graph.vertex(index) def get_vertices_iterator(self): return self.sentence_graph.vertices() def get_vertices(self): return [x for x in self.sentence_graph.vertices()] def get_vertex_out_neighbor_word_pos_tuples(self, vertex): return [self.get_word_pos_tuple(neighbor_vertex) for neighbor_vertex in self.get_vertex_out_neighbors(vertex)] def get_vertex_in_neighbor_word_pos_tuples(self, vertex): return [self.get_word_pos_tuple(neighbor_vertex) for neighbor_vertex in self.get_vertex_in_neighbors(vertex)] def get_vertex_out_neighbors(self, vertex): return [neighbor_vertex for neighbor_vertex in vertex.out_neighbours()] def get_vertex_in_neighbors(self, vertex): return [neighbor_vertex for neighbor_vertex in vertex.in_neighbours()] def get_word_pos_tuples(self): return [self.get_word_pos_tuple(v) for v in self.sentence_graph.vertices()] def get_num_vertices(self): return self.sentence_graph.num_vertices() def get_num_edges(self): return self.sentence_graph.num_edges() def get_edge(self, word1, pos1, word2, pos2): vertex_1 = self.get_vertex(word1, pos1) vertex_2 = self.get_vertex(word2, pos2) return None\ if vertex_1 is None or vertex_2 is None\ else self.sentence_graph.edge(vertex_1, vertex_2) def get_edges_iterator(self): return self.sentence_graph.edges() def get_edges(self): return [x for x in self.sentence_graph.edges()] def set_definition_edge_filter(self, inverted=False): self.sentence_graph.set_edge_filter( self.sentence_graph.edge_properties[FILTER_DEFINITION_EDGE_KEY], inverted=inverted) def set_inter_sentence_edge_filter(self, inverted=False): self.sentence_graph.set_edge_filter( self.sentence_graph.edge_properties[FILTER_INTER_SENTENCE_EDGE_KEY], inverted=inverted) def set_parsed_dependency_edge_filter(self, inverted=False): self.sentence_edge.set_edge_filter( self.sentence_graph.edge_properties[FILTER_PARSED_DEPENDENCIES_EDGE_KEY], inverted=inverted) def set_sentence_edge_filter(self, inverted=False): self.sentence_graph.set_edge_filter( self.sentence_graph.edge_properties[FILTER_SENTENCE_EDGE_KEY], inverted=inverted) def clear_filters(self): self.sentence_graph.clear_filters() def get_definition_edges(self): return filter(lambda x: x in self.get_definition_edge_properties(), self.get_edges()) def get_word_vertex_properties(self): return self.sentence_graph.vertex_properties[WORD_KEY] def get_pos_vertex_properties(self): return self.sentence_graph.vertex_properties[PART_OF_SPEECH_KEY] def get_color_vertex_properties(self): return self.sentence_graph.vertex_properties[VERTEX_COLOR_KEY] def get_sentence_edge_properties(self): return self.sentence_graph.edge_properties[SENTENCE_EDGE_KEY] def get_definition_edge_properties(self): return self.sentence_graph.edge_properties[DEFINITION_EDGE_KEY] def get_inter_sentence_edge_properties(self): return self.sentence_graph.edge_properties[INTER_SENTENCE_EDGE_KEY] def get_color_edge_properties(self): return self.sentence_graph.edge_properties[EDGE_COLOR_KEY] def get_vertex_index(self, vertex): return self.sentence_graph.vertex_index[vertex] def get_degree_properties(self, degree_type): return self.sentence_graph.degree_property_map(degree_type) def get_graph(self): return self.sentence_graph def copy(self): return SentenceGraph( sentence=self.sentence_graph.graph_properties[SENTENCE_KEY], graph=self.sentence_graph.copy())
def plot_networks(stationary_state, free_energies, transition_matrix, cvs, save=True, outputpath="/home/oliverfl/Pictures/network-%s.svg", description="Transition Network", simu_id=None): """ Do imports only when necessarry to avoid errors with GTK symbols backends See the error message I get on https://stackoverflow.com/questions/19773190/graph-tool-pyside-gtk-2-x-and-gtk-3-x """ # import gtk import graph_tool as gt from graph_tool.all import graph_draw, Graph # from pylab import * # for plotting if len(stationary_state.shape) == 2: stationary_state = np.mean(stationary_state, axis=0) free_energy, boxplot_data = _fix_field_and_data(free_energies, 1000) # Create graph and labels graph = Graph(directed=True) vertices = [] vertex_sizes = graph.new_vertex_property("float") vertex_labels = graph.new_vertex_property("string") edge_sizes = graph.new_edge_property("float") edge_labels = graph.new_edge_property("string") cluster_to_label = { 0: "M", # "#"Intermediate", 1: "I", # "Inactive", 2: "A" # "Active" } #####CREATE VERTICES########### for i, rho in enumerate(stationary_state): # print(rho) v = graph.add_vertex() vertices.append(v) vsize = (1 + 1 * rho / max(stationary_state) ) * 50 # np.log(1 + rho / max(stationary_state)) * 100 vertex_sizes[v] = vsize # Beware that long tables can make the nodes expand to fit the text (and thus override vsize) vertex_labels[v] = cluster_to_label.get(i) + " ({0:0.01f})".format( free_energy[i]) ####CREATE EDGES########## max_transition_value = ( transition_matrix - np.diag(transition_matrix.diagonal()) ).max() # Max value of matrix excluding diagonal elements for i, row in enumerate(transition_matrix): total_traj_count = sum(row) for j, rhoij in enumerate(row): if rhoij > 0 and i != j: e = graph.add_edge( vertices[i], vertices[j] ) # , weight=rhoij, label="{}->{}".format(i, j)) edge_labels[e] = "{}/{}".format(int(rhoij), int(total_traj_count)) # The edge width is proportional to the relative number of transition from this starting state size = (1 + 10 * rhoij / total_traj_count) * 3 edge_sizes[e] = size # Using matplotlib for rendering if save==False plt.figure(figsize=(10, 10)) graph_draw( graph, pos=gt.draw.sfdp_layout(graph), # output_size=(400, 400), output=outputpath % simu_id if save else None, # inline=True, mplfig=plt.gcf() if not save else None, vertex_text=vertex_labels, vertex_font_size=10, vertex_size=vertex_sizes, edge_text=edge_labels, edge_pen_width=edge_sizes) if not save: plt.title(description) plt.xticks([], []) plt.yticks([], []) plt.show()
def build_region_closure(g, root, regions, infection_times, obs_nodes, debug=False): """return a closure graph on the the components""" regions = copy(regions) root_region = {'nodes': {root}, 'head': root, 'head_time': -float('inf')} regions[len(regions)] = root_region gc = Graph(directed=True) for _ in range(len(regions)): gc.add_vertex() # connect each region gc_edges = [] original_edge_info = {} for i, j in combinations(regions, 2): # make group i the one with *later* head if regions[i]['head_time'] < regions[j]['head_time']: i, j = j, i if debug: print('i, j={}, {}'.format(i, j)) # only need to connect head i to one of the nodes in group j # where nodes in j have time stamp < head i # then an edge from region j to region i (because j is earlier) head_i = regions[i]['head'] def get_pseudo_time(n): if n == root: return - float('inf') else: return infection_times[n] targets = [n for n in regions[j]['nodes'] if get_pseudo_time(n) < regions[i]['head_time']] if debug: print('head_i: {}'.format(head_i)) print('targets: {}'.format(targets)) print('regions[j]["nodes"]: {}'.format(regions[j]['nodes'])) if len(targets) == 0: continue visitor = init_visitor(g, head_i) forbidden_nodes = list(set(regions[i]['nodes']) | (set(regions[j]['nodes']) - set(targets))) if debug: print('forbidden_nodes: {}'.format(forbidden_nodes)) # NOTE: count_threshold = 1 cpbfs_search(g, source=head_i, terminals=targets, forbidden_nodes=forbidden_nodes, visitor=visitor, count_threshold=1) reachable_targets = [t for t in targets if visitor.dist[t] > 0] if debug: print('reachable_targets: {}'.format(reachable_targets)) if len(reachable_targets) == 0: # cannot reach there continue source = min(reachable_targets, key=visitor.dist.__getitem__) dist = visitor.dist[source] assert dist > 0 gc_edges.append(((j, i, dist))) original_edge_info[(j, i)] = { 'dist': dist, 'pred': visitor.pred, 'original_edge': (source, head_i) } for u, v, _ in gc_edges: gc.add_edge(u, v) eweight = gc.new_edge_property('int') for u, v, c in gc_edges: eweight[gc.edge(gc.vertex(u), gc.vertex(v))] = c return gc, eweight, original_edge_info
filename = 'buf.txt' print(filename) coefficient = 3 word_dict = {} add_dict = {} f = open('bad.txt', 'r', encoding="utf-8") for s in f: # print(s.split(' ')[0]) add_dict[s.split(' ')[0]] = 1 f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("int") ver_names = pairs_graph.new_vertex_property("string") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef') if w1 in add_dict or w2 in add_dict:
plt.plot(range(a + 1, b + 1), ver_attr[(a + 1):(b + 1)], 'bo') plt.plot(range(b + 1, len(ver_attr)), ver_attr[(b + 1):], 'ro') print("-------------------------------------------------") filename = '../pairparser/results/en_pairs(7).txt' ftag = '7_3imp' coefficient = 3 word_dict = {} # dict with indexes of nodes by word f = open(filename, 'r', encoding="utf-8") pairs_graph = Graph(directed=False) edge_weights = pairs_graph.new_edge_property("double") ver_names = pairs_graph.new_vertex_property("string") ver_id = pairs_graph.new_vertex_property("int") for line in f: spl_line = line.split(' ') if len(spl_line) == 1: continue pos = int(spl_line[0]) neg = int(spl_line[1]) cur_weight = pos + coefficient * neg w1 = spl_line[2].strip(' \n\uefef') w2 = spl_line[3].strip(' \n\uefef')
class ResourceGraph(object): v3_color = '#1C366B' v2_color = '#1DACE8' special_color = '#C4CFD0' def __init__(self): self.graph = Graph() self.v_names = self.graph.new_vertex_property("string") self.v_colors = self.graph.new_vertex_property("string") self.e_names = self.graph.new_edge_property("string") self.vertices = {} self.edges = {} def add_resource(self, resource_name, is_v3=True, is_special=False): vertex = self.graph.add_vertex() self.vertices[resource_name] = vertex self.v_names[vertex] = resource_name self.v_colors[vertex] = self.get_color(is_v3, is_special) def get_color(self, is_v3, is_special): if is_special: return self.__class__.special_color elif is_v3: return self.__class__.v3_color else: return self.__class__.v2_color def has_resource(self, resource_name): return resource_name in self.vertices def add_link(self, source_name, destination_name, name): source = self.vertices[source_name] destination = self.vertices[destination_name] edge = self.graph.add_edge(source, destination) edge_id = self.edge_id(source_name, destination_name, name) self.edges[edge_id] = edge self.e_names[edge] = name def has_link(self, source_name, destination_name, name): return self.edge_id(source_name, destination_name, name) in self.edges def edge_id(self, source_name, destination_name, name): return f'{name} -- {source_name} -- {destination_name}' def draw(self): graph_draw( self.graph, pos=radial_tree_layout(self.graph, self.graph.vertex(0)), vertex_text=self.v_names, vertex_fill_color=self.v_colors, edge_text=self.e_names, output_size=(2000, 1300), fit_view=True, vertex_font_size=10, vertex_pen_width=1, vertex_halo=False, edge_pen_width=3, )
def phylomemetic_graph(steps, communities, min_size=3, max_size=50, parent_limit=2, workers='auto', chunksize='auto', method='fast', min_backwards_containment=0, min_forward_containment=0): '''phylomemetic_graph Parameters ---------- steps : :obj:`iter` of :obj:`int` communities : :obj:`iter` of :obj:`iter` of :obj:`int` min_size : :obj:`int` max_size : :obj:`int` parent_limit : :obj:`int` workers : :obj:`int` chunksize : :obj:`int` method : :obj:`str` min_backwards_containment : :obj:`float` min_forward_containment : :obj:`float` Returns ------- g : :obj:`graph_too.Graph` group_link_strength : :obj:`graph_tool.EdgePropertyMap` single_link_strength : :obj:`graph_tool.EdgePropertyMap` vertex_steps : :obj:`graph_tool.VertexPropertyMap` element_vertex_map : :obj:`dict` ''' if workers == 'auto': workers = cpu_count() - 1 communities_filt = [] communities_lengths = [] element_community_mappings = [] for sequences in communities: s_filt = list(filter_by_size(sequences, min_size, max_size)) communities_filt.append(s_filt) communities_lengths.append(len(s_filt)) element_community_mappings.append(reverse_index(s_filt)) community_vertex_maps = [] communities_offsets = [] cumsum_lengths = np.cumsum(communities_lengths) for length, count in zip(communities_lengths, cumsum_lengths): start = count - length end = count communities_offsets.append((start, end)) community_vertex_maps.append( {c: v for c, v in zip(range(length), range(start, end))}) n_communities = np.sum(communities_lengths) phylomemetic_links = [] for i, (cps, cfs) in enumerate(window(communities_filt, 2)): n_cf = len(cfs) logger.info(f'Processing {i+1} of {len(communities)-1} periods') if chunksize == 'auto': chunksize_i = int(np.ceil((1 / workers) * n_cf)) else: chunksize_i = chunksize with Pool(workers) as pool: phylomemetic_links.append( pool.map( find_links, zip( cfs, range(0, len(cfs)), repeat(cps, n_cf), repeat(communities_offsets[i], n_cf), repeat(element_community_mappings[i], n_cf), repeat(parent_limit, n_cf), ), chunksize=chunksize_i, )) pool.close() pool.join() g = Graph(directed=True) g.add_vertex(n_communities) group_link_strength = g.new_edge_property('float') single_link_strength = g.new_edge_property('float') phylomemetic_links = flatten(flatten(phylomemetic_links)) g.add_edge_list(phylomemetic_links, eprops=[group_link_strength, single_link_strength]) element_vertex_map = reverse_index_communities(flatten(communities_filt)) vertex_steps = g.new_vertex_property('int') for (start, end), step in zip(communities_offsets, steps): vertex_steps.a[start:end] = step return (g, group_link_strength, single_link_strength, vertex_steps, element_vertex_map)
def gen_graph((repo, events)): graph = Graph() repo_on_graph = graph.new_graph_property('string') repo_on_graph[graph] = repo graph.graph_properties['repo_on_graph'] = repo_on_graph language_on_graph = graph.new_graph_property('string') language_on_graph[graph] = events[0]['language'] graph.graph_properties['language_on_graph'] = language_on_graph events_on_vertices = graph.new_vertex_property('object') graph.vertex_properties['events_on_vertices'] = events_on_vertices actors_on_vertices = graph.new_vertex_property('string') graph.vertex_properties['actors_on_vertices'] = actors_on_vertices weights_on_edges = graph.new_edge_property('long double') graph.edge_properties['weights_on_edges'] = weights_on_edges # pre_vertices = [] pre_events_map = {} pre_vertices_map = {} # owner_vertex = graph.add_vertex() # owner = repo.split('/')[0] # actors_on_vertices[owner_vertex] = owner # pre_vertices_map[owner] = owner_vertex events = sorted(events, key=lambda x: x['created_at']) for event in events: actor = event['actor'] if actor in pre_events_map: continue created_at = event['created_at'] vertex = graph.add_vertex() events_on_vertices[vertex] = event actors_on_vertices[vertex] = actor if 'actor-following' not in event: continue following = set(event['actor-following']) commons = following.intersection(pre_vertices_map.keys()) # pre_vertices.append(vertex) # if len(commons) == 0: # edge = graph.add_edge(vertex, owner_vertex) # weights_on_edges[edge] = 1.0 for pre_actor in commons: edge = graph.add_edge(vertex, pre_vertices_map[pre_actor]) interval =\ (created_at - pre_events_map[pre_actor]['created_at']).days weight = 1.0 / fib(interval + 2) weights_on_edges[edge] = weight pre_events_map[actor] = event pre_vertices_map[actor] = vertex return graph
def gen_graph((repo, events)): graph = Graph() repo_on_graph = graph.new_graph_property('string') repo_on_graph[graph] = repo graph.graph_properties['repo_on_graph'] = repo_on_graph language_on_graph = graph.new_graph_property('string') language_on_graph[graph] = events[0]['language'] graph.graph_properties['language_on_graph'] = language_on_graph events_on_vertices = graph.new_vertex_property('object') graph.vertex_properties['events_on_vertices'] = events_on_vertices actors_on_vertices = graph.new_vertex_property('string') graph.vertex_properties['actors_on_vertices'] = actors_on_vertices weights_on_edges = graph.new_edge_property('long double') graph.edge_properties['weights_on_edges'] = weights_on_edges # pre_vertices = [] pre_events_map = {} pre_vertices_map = {} owner_vertex = graph.add_vertex() owner = repo.split('/')[0] dummy_event = {'created_at': events[0]['repo-created_at']} actors_on_vertices[owner_vertex] = owner events_on_vertices[owner_vertex] = dummy_event pre_vertices_map[owner] = owner_vertex pre_events_map[owner] = dummy_event events = sorted(events, key=lambda x: x['created_at']) for event in events: actor = event['actor'] if actor in pre_events_map: continue created_at = event['created_at'] vertex = graph.add_vertex() events_on_vertices[vertex] = event actors_on_vertices[vertex] = actor # if 'actor-following' not in event: # continue following = set(event['actor-following']) commons = following.intersection(pre_vertices_map.keys()) # pre_vertices.append(vertex) # if len(commons) == 0: # edge = graph.add_edge(vertex, owner_vertex) # weights_on_edges[edge] = 1.0 for pre_actor in commons: interval =\ (created_at - pre_events_map[pre_actor]['created_at']).days if interval < 0: continue edge = graph.add_edge(vertex, pre_vertices_map[pre_actor]) if pre_actor == owner: weight = 1.0 else: weight = 1.0 / fib(interval + 2) weights_on_edges[edge] = weight pre_events_map[actor] = event pre_vertices_map[actor] = vertex return graph
def load_instance(filepath): g = Graph(directed=False) node_upgraded = g.new_vertex_property("bool") node_cost = g.new_vertex_property("float") edge_weight = g.new_edge_property("float") edge_weight_lv2 = g.new_edge_property("float") edge_weight_lv3 = g.new_edge_property("float") edge_upgradeable_weights = g.new_edge_property("vector<float>") graph_total_cost = g.new_graph_property("float") graph_total_cost[g] = 0 with open(filepath, "r") as f: first_line = f.readline() tokens = first_line.split(" ") assert len(tokens) == 2 n = int(tokens[0]) m = int(tokens[1]) g.add_vertex(n) #print (n, m) for _ in range(m): line = f.readline() tokens = line.split(" ") assert len(tokens) == 5 v1 = int(tokens[0]) v2 = int(tokens[1]) e = g.add_edge(v1, v2) w1 = float(tokens[2]) w2 = float(tokens[3]) w3 = float(tokens[4]) edge_weight[e] = w1 edge_weight_lv2[e] = w2 edge_weight_lv3[e] = w3 # identify how weights are for vertices line = f.readline() v_cost = [float(x) for x in line.split(" ") if x != " "] if len(v_cost) > 1: # case where formatting is incorrect for i in range(n): v = g.vertex(i) node_cost[v] = v_cost[i] node_upgraded[i] = False else: v = g.vertex(0) node_cost[v] = float(v_cost[0]) node_upgraded[v] = False vertices = g.vertices() vertices.next() for v in vertices: line = f.readline() c = float(line) node_cost[v] = c node_upgraded[v] = False g.vp.is_upgraded = node_upgraded g.vp.cost = node_cost g.ep.weight = edge_weight g.ep.weight_2 = edge_weight_lv2 g.ep.weight_3 = edge_weight_lv3 for v in g.vertices(): graph_total_cost[g] += node_cost[v] g.gp.total_cost = graph_total_cost return g