def post_parse(grammar): # Create `G` G = DiGraph() G.add_nodes_from(grammar.prods) for lhs, rhs in grammar.prods.items(): for tok in rhs: if tok in grammar.prods: G.add_edge(lhs, tok) # DEBUG: from ._debug import Drawer # DEBUG # DEBUG: drawer = Drawer(G, grammar.start) # DEBUG # Inlining for root, _ in list(bfs_edges(G, grammar.start)): while True: nodes = [d for _, d in bfs_edges(G, root)] nodes = [root] + nodes edges = [] for n in nodes: edges.extend(G.edges([n])) for ns, nd in reversed(edges): # DEBUG: drawer.draw(G, (ns, nd)) # DEBUG # Skip if `nd` has a self-loop if G.has_edge(nd, nd): continue # Skip if `C` consists of multiple nodes g = G.subgraph(n for n in G.nodes_iter() if n != ns) if len(next((c for c in sccs(g) if nd in c))) != 1: continue # Update grammar expr = [] alter = Token.alter() for tok in grammar.prods[ns]: expr.append(tok) if tok == nd: expr.extend(list(grammar.prods[nd]) + [alter]) grammar.prods[ns] = Expr(expr) # Update G G.remove_edge(ns, nd) for _, dst in G.edges_iter(nd): G.add_edge(ns, dst) # DEBUG: drawer.draw(G) # DEBUG break # Back to `for ns, nd in ...` else: # DEBUG: drawer.draw(G) # DEBUG break # Back to `for root, _ in ...` return {nd for _, nd in G.edges_iter()} # Unexpanded nonterminals
def create_island_to_island_graph(g: nx.DiGraph) -> nx.DiGraph: GG = nx.DiGraph() for node_a, node_b in g.edges_iter(): if node_a in g.edge[node_b]: GG.add_edge((node_a, node_b), (node_b, node_a)) cab = g.edge[node_a][node_b]['region'].centroid GG.node[(node_a, node_b)]['coord'] = (cab.x, cab.y) nx.write_shp() return GG
def reverse_weights(g:nx.DiGraph, weight='weight'): g = g.reverse() for s, t in g.edges_iter(): e = g[s][t] e[weight] = -e[weight] return g
def edges_iter(self, nbunch=None): for edge in DiGraph.edges_iter(self, nbunch, data=True): yield edge
class TaskGraph(ValidatorMixin): """ A task graph builder. Build a operations flow graph """ def __init__(self, name): self.name = name self.id = str(uuid4()) self.graph = DiGraph() def __getattr__(self, attr): try: return getattr(self.graph, attr) except AttributeError: return super(TaskGraph, self).__getattribute__(attr) def __repr__(self): return '{name}(id={self.id}, name={self.name}, graph={self.graph!r})'.format( name=self.__class__.__name__, self=self) @property def tasks(self): """ An iterator on tasks added to the graph """ for _, data in self.graph.nodes_iter(data=True): yield data['task'] @property def leaf_tasks(self): for task in self.tasks_in_order(): if not self.graph.predecessors(task.id): yield task def task_tree(self, reverse=False): """ Iterates over the tasks to be executed in topological order and their dependencies. :param reverse: reverse the order """ for task in self.tasks_in_order(reverse=reverse): yield task, self.task_dependencies(task) def tasks_in_order(self, reverse=False): """ Iterates over the tasks to be executed in topological order :param reverse: reverse the order """ for task_id in topological_sort(self.graph, reverse=reverse): yield self.graph.node[task_id]['task'] def has_dependencies(self, task): return len(self.task_dependencies(task)) > 0 def task_dependencies(self, task): """ Iterates over the task dependencies """ for task_ids in self.graph.edges_iter(task.id): for task_id in task_ids: if task.id != task_id: yield self.get_task(task_id) def add_task(self, task): """ Add a task to this graph :param WorkflowTask|TaskGraph task: The task """ self.graph.add_node(task.id, task=task) def get_task(self, task_id): """ Get a task instance that was inserted to this graph by its id :param basestring task_id: the task id :return: requested task :rtype: WorkflowTask|TaskGraph :raise: TaskNotFoundError if no task found with given id """ try: data = self.graph.node[task_id] return data['task'] except KeyError: raise TaskNotFoundError('Task id: {0}'.format(task_id)) def remove_task(self, task): """ Remove the provided task from the graph :param WorkflowTask|graph task: The task """ self.graph.remove_node(task.id) def dependency(self, source_task, after): """ Add a dependency between tasks. The source task will only be executed after the target task terminates. A source task may depend on several tasks, In which case it will only be executed after all its target tasks will terminate. tasks flow order: after -> source_task :param WorkflowTask|TaskGraph source_task: The source task :type source_task: WorkflowTask :param list after: The target task :raise TaskNotInGraphError """ if not self.graph.has_node(source_task.id): raise TaskNotInGraphError( 'source task {0!r} is not in graph (task id: {0.id})'.format( source_task)) for target_task in after: if not self.graph.has_node(target_task.id): raise TaskNotInGraphError( 'target task {0!r} is not in graph (task id: {0.id})'. format(target_task)) self.graph.add_edge(source_task.id, target_task.id) # workflow creation helper methods def chain(self, tasks, after=()): """ create a chain of tasks. tasks will be added to the graph with a dependency between the tasks by order. tasks flow order: if tasks = (task0, task1, ..., taskn) after -> task0 -> task1 -> ... -> taskn :param tasks: list of WorkflowTask instances. :param after: target to the sequence """ for source_task in tasks: self.add_task(source_task) self.dependency(source_task, after=after) after = (source_task, ) def fan_out(self, tasks, after=()): """ create a fan-out. tasks will be added to the graph with a dependency to the target task. tasks flow order: if tasks = (task0, task1, ..., taskn) after -> task0 |-> task1 |... \-> taskn :param tasks: list of WorkflowTask instances. :param after: target to the tasks """ for source_task in tasks: self.add_task(source_task) self.dependency(source_task, after=after) def fan_in(self, source_task, after=None): """ create a fan-in. source task will be added to the graph with a dependency to the tasks. tasks flow order: if after = (task0, task1, ..., taskn) task0\ task1|-> source_task ... | taskn/ :param source_task: source to the tasks :param after: list of WorkflowTask instances. """ self.add_task(source_task) self.dependency(source_task, after=after)
class FamilyTree: ''' Representation of the family tree. Each key is a string and each node is a dictionary containing 'entity', which stores a TreeEntity for the corresponding key. TreeEntities may also be Members. The edges between Members are the big-little relationships. These are created from the parent fields when Members are loaded into the tree. Non-Members may have edges, but these do not represent big-little relationships. ''' @logged def __init__(self, members, seed=0): self.graph = DiGraph() self.seed = seed self.add_members(members) ########################################################################### #### Members #### ########################################################################### def add_members(self, members): ''' Add the members to the tree, along with their relationships and families. ''' self.add_entities(members) self.add_member_relationships() self.mark_families() def add_member_relationships(self): ''' Connect all Members in the tree, based on the value of their parent field, by adding the edge (parent, member). Parents must also be Members (to add non-Members as parent nodes, use custom edges). ''' for member in self.members(): if member.parent: self.add_member_relationship(member) def add_member_relationship(self, member): ''' Add an edge for the member and its parent to the tree. Ensure that the parent actually exists in the tree already and that the parent is on the same rank or on a rank before the member. ''' ckey = member.key pkey = member.parent if pkey not in self: code = TreeErrorCode.PARENT_UNKNOWN msg = 'member {ckey!r} has unknown parent: {pkey!r}'.format( ckey=ckey, pkey=pkey) raise TreeError(code, msg) parent = self[pkey]['entity'] if member.rank and parent.rank and member.rank < parent.rank: code = TreeErrorCode.PARENT_NOT_PRIOR msg = 'rank {rank!r} of member {ckey!r} cannot be prior to rank of parent {pkey!r}: {parent_rank!r}'.format( rank=member.rank, ckey=ckey, pkey=pkey, parent_rank=parent.rank) raise TreeError(code, msg) self.add_edge(pkey, ckey) def mark_families(self): ''' Mark all families in the tree by adding a 'family' attribute to each Member node, which is a dictionary shared by all members of that family. ''' # Families are weakly connected components of the members-only graph members_only = self.member_subgraph() families = weakly_connected_components(members_only) # Add a pointer to each member's family subgraph for family in families: family_dict = {} for key in family: self[key]['family'] = family_dict ########################################################################### #### Entities #### ########################################################################### def add_entities(self, entities): ''' Add the entities to the tree. ''' for entity in entities: self.add_entity(entity) def add_entity(self, entity, **attributes): ''' Add the entity with the given extra attributes to the tree. Catch any duplicates. ''' key = entity.key if key in self: code = TreeErrorCode.DUPLICATE_ENTITY msg = 'duplicate entity key: {key!r}'.format(key=key) raise TreeError(code, msg) self.graph.add_node(key, entity=entity, **attributes) def get_rank_bounds(self): ''' Find and return the values of the highest and lowest ranks in use. ''' min_rank, max_rank = float('inf'), float('-inf') for entity in self._iter('entity', keys=False): rank = entity.rank if rank and min_rank > rank: min_rank = rank if rank and max_rank < rank: max_rank = rank return min_rank, max_rank ########################################################################### #### Iterators #### ########################################################################### def _iter(self, *attributes, keys=True, nodes=False): ''' Iterates of the nodes of the graph, yielding tuples of the form: (<KEY>, <ATTRIBUTE0>, <ATTRIBUATE1>, ..., <NODE>) By default, returns only the keys. Setting keys=False will remove the keys, setting any kwargs to some attribute name will include those attributes (in the order they are written), and setting nodes=True will include the entire node attribute dictionary at the end. If only one element will be in the yielded tuples, that element will be yielded directly (i.e., not as a singleton tuple). ''' for key, node in self.graph.nodes_iter(data=True): yielded = (*((key, ) if keys else ()), *tuple([node[attr] for attr in attributes]), *((node, ) if nodes else ())) yield yielded[0] if len(yielded) == 1 else yielded def keys(self): ''' Yields all the keys in the tree. ''' yield from self._iter() def nodes(self): ''' Yields all the nodes in the tree. ''' yield from self._iter(keys=False, nodes=True) def items(self): ''' Yields all tree's keys and their nodes. ''' yield from self._iter(nodes=True) def members(self): ''' Yields all the Member objects in the tree's nodes. ''' for entity in self._iter('entity', keys=False): if isinstance(entity, Member): yield entity def orphans(self): ''' Yields all Members in the tree that have no parent nodes. ''' for key, in_degree in self.graph.in_degree().items(): entity = self[key]['entity'] if in_degree == 0 and isinstance(entity, Member): yield entity def singletons(self): ''' Yields all Members that neither have parent nodes nor child nodes. ''' for key, degree in self.graph.degree_iter(): entity = self[key]['entity'] if degree == 0 and isinstance(entity, Member): yield entity def edges(self): ''' Yields all the edge dictionaries in the tree. ''' for _, _, edge in self.graph.edges_iter(data=True): yield edge def member_subgraph(self): ''' Returns a subgraph consisting only of members. ''' member_keys = (member.key for member in self.members()) return self.graph.subgraph(member_keys) ########################################################################### #### Ordered Iterators #### ########################################################################### def ordered_items(self): ''' Yields this graph's nodes and keys in a guaranteed consistent order. First: The (weakly) connected components of the graph are put in a list. The components are then sorted by the minimum (lexicographically-speaking) key they contain. Then, the tree's RNG seed is used to shuffle the connected components. A few notes: + The components are randomized because strange behavior might occur if the nodes are left to be in the same order produced by normal iteration. That would make the resulting graph look ugly. + The user can set the tree's seed field to help obtain the same result every time the program is run on the same input. The user can also change the seed to change the layout instead of fiddling around with the drawn tree itself. + The components are sorted even though they are then immediately shuffled again. This is to ensure the rng.shuffle function produces the same result for the same seed. Second: The keys and node dictionaries for all of the nodes in all of the components are then returned in lexicographical order. ''' components = weakly_connected_components(self.graph) components = sorted(components, key=lambda component: min(component, key=str)) rng = random.Random(self.seed) rng.shuffle(components) for component in components: for key in sorted(component, key=str): yield key, self[key] def ordered_edges(self): ''' Yields this graph's edges in a guaranteed consistent order. Each result returned is a tuple containing a parent key, a child key, and the edge dictionary of the edge between them. The edges are sorted first by parent key, then child key, then the string form of the edge's attribute dictionary. ''' edges = self.graph.edges(data=True) def sort_key(arg): parent_key, child_key, edge_dict = arg return (parent_key, child_key, str(edge_dict)) yield from sorted(edges, key=sort_key) ########################################################################### #### Miscellaneous #### ########################################################################### def add_edges(self, edges, **attributes): self.graph.add_edges_from(edges, **attributes) def add_edge(self, key, pkey, **attributes): self.graph.add_edge(key, pkey, **attributes) def remove(self, key_or_keys): if isinstance(key_or_keys, Iterable): self.graph.remove_nodes_from(key_or_keys) else: self.graph.remove_node(key_or_keys) def __contains__(self, key): return key in self.graph def __getitem__(self, key): return self.graph.node[key]
def edges_iter(self, nbunch=None): for edge in DiGraph.edges_iter(self, nbunch, data=True): yield edge
class Codebase: def __init__(self): self.counter = 0 self._revisions = [] with open(path.join(path.dirname(__file__), '..', 'App.java')) as java_file: parser = Parser() tree = parser.parse_file(java_file) initial_classes = tree.type_declarations self._inheritance_graph = DiGraph() self._method_call_graph = DiGraph() for c in initial_classes: self._inheritance_graph.add_node(c.name, {'class': c}) for m in c.body: if isinstance(m, MethodDeclaration): self._method_call_graph.add_node(m.name, {'method': m, 'class_name': c.name, 'fitness': random() }) def get_class_name(self, method_name): return self._method_call_graph.node[method_name]['class_name'] def size_of(self, method_name): return len(self._method_call_graph.node[method_name]['method'].body) def number_of_methods(self): return len(self._method_call_graph) def number_of_classes(self): return len(self._inheritance_graph) def has_method(self, method_name): return self._method_call_graph.has_node(method_name) def choose_random_method(self): """ Choose a random method, weighted by its size :return: the method name """ return sample([(method_name, len(data['method'].body) + 1) for method_name, data in self._method_call_graph.nodes_iter(True)]) def choose_random_class(self): """ Choose a random class, weighted by its size :return: the class name """ return sample([(class_name, len(data['class'].body) + 1) for class_name, data in self._inheritance_graph.nodes_iter(data=True)]) def least_fit_methods(self, n=1): """ :return: the name of the method with smallest fitness value """ return nsmallest(n, self._method_call_graph, key=lambda method_name: self._method_call_graph.node[method_name]['fitness']) def choose_random_neighbor(self, method_name): neighbors = self._method_call_graph.neighbors(method_name) num_neighbors = len(neighbors) if num_neighbors > 0: return neighbors[floor(random() * num_neighbors)] else: return None def caller_names(self, method_name): """ :param method_name: :return: caller method names iterator """ return self._method_call_graph.predecessors_iter(method_name) def method_invocations(self, method_name): """ Generator for MethodInvocation instances of method_name :param method_name: :return: """ for caller_name in self._method_call_graph.predecessors_iter(method_name): caller = self._method_call_graph.node[caller_name]['method'] for stmt in caller.body: if Codebase.is_invocation(stmt, method_name): yield stmt.expression def create_method(self, class_name): """ The created methods are static methods for now """ klass = self._inheritance_graph.node[class_name]['class'] method = MethodDeclaration( 'method' + str(self.counter), body=[], modifiers=['static']) self.counter += 1 klass.body.append(method) method_info = {'method': method, 'class_name': class_name, 'fitness': random()} self._method_call_graph.add_node(method.name, method_info) return 1, method.name def delete_method(self, method_name): """ Delete the method and update callers :param method_name: :return: """ # remove method invocation method_info = self._method_call_graph.node[method_name] method = method_info['method'] change_size = len(method.body) for caller_name in self._method_call_graph.predecessors_iter(method_name): caller_info = self._method_call_graph.node[caller_name] caller = caller_info['method'] old_size = len(caller.body) caller.body = [stmt for stmt in caller.body if not Codebase.is_invocation(stmt, method_name) ] change_size += old_size - len(caller.body) caller_info['fitness'] = random() class_name = method_info['class_name'] klass = self._inheritance_graph.node[class_name]['class'] klass.body.remove(method) self._method_call_graph.remove_node(method_name) if len(klass.body) == 0: # remove inheritance from all subclasses for subclass_name in self._inheritance_graph.predecessors_iter(class_name): subclass = self._inheritance_graph.node[subclass_name]['class'] subclass.extends = None change_size += 1 self._inheritance_graph.remove_node(class_name) change_size += 1 return change_size def create_class(self, superclass_name): klass = ClassDeclaration('Class' + str(self.counter), []) if superclass_name: klass.extends = Type(Name(superclass_name)) self.counter += 1 self._inheritance_graph.add_node(klass.name, {'class': klass}) if superclass_name: self._inheritance_graph.add_edge(klass.name, superclass_name) return 1, klass.name def add_method_call(self, caller_name, callee_name): callee_info = self._method_call_graph.node[callee_name] caller_info = self._method_call_graph.node[caller_name] caller = caller_info['method'] num_params = len(callee_info['method'].parameters) # trying to find enough variables for the method arguments arguments = [] for p in caller.parameters: if len(arguments) >= num_params: break else: arguments.append(Name(p.variable.name)) for s in caller.body: if len(arguments) >= num_params: break if isinstance(s, VariableDeclaration): for vd in s.variable_declarators: arguments.append(Name(vd.variable.name)) while len(arguments) < num_params: arguments.append(Literal(self.counter)) target_name = callee_info['class_name'] ref = MethodInvocation(callee_name, arguments, target=Name(target_name)) caller.body.append(ExpressionStatement(ref)) self._method_call_graph.add_edge(caller_name, callee_name) caller_info['fitness'] = random() return 1 def add_statement(self, method_name): method_info = self._method_call_graph.node[method_name] method = method_info['method'] stmt = self.create_variable_declaration() method.body.append(stmt) method_info['fitness'] = random() return 1 def add_parameter(self, method_name): """ Add a parameter to the method, and update all its callers :param method_name: :return: """ method_info = self._method_call_graph.node[method_name] method = method_info['method'] parameters = method.parameters parameters.append(FormalParameter(Variable('param%d' % len(parameters)), Type(Name('int')))) change_size = 1 for caller_name in self._method_call_graph.predecessors_iter(method_name): caller_info = self._method_call_graph.node[caller_name] caller = caller_info['method'] local_variables = [p.variable.name for p in caller.parameters] for s in caller.body: if isinstance(s, VariableDeclaration): for vd in s.variable_declarators: local_variables.append(vd.variable.name) elif Codebase.is_invocation(s, method_name): if len(local_variables) > 0: s.expression.arguments.append(Name(local_variables[-1])) else: s.expression.arguments.append(Literal(self.counter)) change_size += 1 caller_info['fitness'] = random() return change_size def move_method(self, method_name, to_class_name): method_info = self._method_call_graph.node[method_name] from_class_name = method_info['class_name'] if from_class_name == to_class_name: return 0 method = method_info['method'] from_class_body = self._inheritance_graph.node[from_class_name]['class'].body from_class_body.remove(method) to_class_body = self._inheritance_graph.node[to_class_name]['class'].body to_class_body.append(method) method_info['class_name'] = to_class_name change_size = len(method.body) # update references for method_invocation in self.method_invocations(method_name): method_invocation.target = Name(to_class_name) change_size += 1 return change_size def rename_method(self, method_name): new_name = 'method%d' % self.counter self.counter += 1 method_info = self._method_call_graph.node[method_name] method_info['method'].name = new_name change_size = 1 for inv in self.method_invocations(method_name): inv.name = new_name change_size += 1 relabel_nodes(self._method_call_graph, {method_name: new_name}, copy=False) method_info['fitness'] = random() return change_size, new_name def create_variable_declaration(self): """ :return: the new variable declaration """ var = VariableDeclaration( type='int', variable_declarators=[VariableDeclarator( variable=Variable( name='var' + str(self.counter) ), initializer=Literal(self.counter) )] ) self.counter += 1 return var def save(self, output_dir, save_src): with open(path.join(output_dir, 'commits.csv'), 'w', newline='') as commits_file: writer = csv.DictWriter(commits_file, ['min_fitness', 'change_size']) writer.writeheader() writer.writerows(self._revisions) with open(path.join(output_dir, 'methods.csv'), 'w', newline='') as methods_file: writer = csv.DictWriter(methods_file, ['method', 'class', 'ref_count']) writer.writeheader() for method_name, in_degree in self._method_call_graph.in_degree_iter(): writer.writerow({ 'method': method_name, 'class': self._method_call_graph.node[method_name]['class_name'], 'ref_count': in_degree }) with open(path.join(output_dir, 'methods.json'), 'w') as methods_file: data = json_graph.node_link_data(self._method_call_graph) json.dump(data, methods_file, skipkeys=True, default=lambda d: None) association_graph = Graph() for e in self._method_call_graph.edges_iter(): association_graph.add_edge( self._method_call_graph.node[e[0]]['class_name'], self._method_call_graph.node[e[1]]['class_name']) for e in self._inheritance_graph.edges_iter(): association_graph.add_edge(*e) with open(path.join(output_dir, 'classes.csv'), 'w', newline='') as classes_file: writer = csv.DictWriter(classes_file, ['class', 'subclasses', 'lines', 'degree']) writer.writeheader() for class_name, in_degree in self._inheritance_graph.in_degree_iter(): klass = self._inheritance_graph.node[class_name]['class'] java_printer = JavaPrinter() klass.accept(java_printer) writer.writerow({'class': class_name, 'subclasses': in_degree, 'lines': java_printer.result.count('\n') + 1, 'degree': association_graph.degree(class_name) if class_name in association_graph else 0 }) if save_src: with open(path.join(output_dir, 'src', class_name + '.java'), 'w') as java_file: java_file.write(java_printer.result) with open(path.join(output_dir, 'classes.json'), 'w') as classes_file: data = json_graph.node_link_data(association_graph) json.dump(data, classes_file, skipkeys=True) def commit(self, change_size): self._revisions.append({ 'min_fitness': min(self._method_call_graph.node[method_name]['fitness'] for method_name in self._method_call_graph), 'change_size': change_size }) @staticmethod def is_invocation(stmt, method_name): return isinstance(stmt, ExpressionStatement) and \ isinstance(stmt.expression, MethodInvocation) and \ stmt.expression.name == method_name
class extract: def __init__(self, dbpath): self.connection = connect(dbpath) print("sqlite3 connected:", self.connection) self.connection.row_factory = Row # Magic line! self.cursor = self.connection.cursor() print("sqlite3 gotcursor:", self.cursor) self.scholars = {} self.scholars_colors = {} self.terms_colors = {} self.Graph = DiGraph() self.min_num_friends = 0 self.imsize = 80 self.terms_array = {} self.unique_id = {} def jaccard(self, occ1, occ2, cooc): if occ1 == 0 or occ2 == 0: return 0 else: return cooc * cooc / float(occ1 * occ2) def getRealSize(self, array): suma = 0 ID = 0 idflag = True for i in array: if i != "": if idflag: ID = i idflag = False if isinstance(i, (long, int)): suma += 1 else: suma += len(i) return suma def getScholarsList(self, qtype, query): scholar_array = {} sql1 = None sql2 = None if qtype == "unique_id": try: sql1 = "SELECT * FROM scholars where unique_id='" + query + "'" STR_keywords_ids = "" self.cursor.execute(sql1) results = self.cursor.fetchall() if len(results) == 0: return [] if len(results) == 1: self.unique_id = {query: "D::" + str(results[0]["id"])} STR_keywords_ids = results[0]["keywords_ids"] # [ Solving duplicates ] if len(results) > 1: candidates = [] for res1 in results: elementSize = self.getRealSize(res1) candidate = [ res1["id"], elementSize, res1["keywords_ids"] ] # candidate = ( integerID , realSize , #keywords ) candidates.append(candidate) candidates = sorted(candidates, key=lambda candit: candit[1], reverse=True) print("candidates:", candidates) self.unique_id = {query: "D::" + str(candidates[0][0])} STR_keywords_ids = candidates[0][2] # [ / Solving duplicates ] keywords_ids = STR_keywords_ids.split(',') for keywords_id in keywords_ids: if keywords_id != "": sql2 = "SELECT * FROM scholars2terms where term_id=" + keywords_id try: self.cursor.execute(sql2) res2 = self.cursor.fetchone() while res2 is not None: scholar_array[res2['scholar']] = 1 res2 = self.cursor.fetchone() #res2++ except Exception as error: print("sql2:\t" + sql2) print(error) return scholar_array except Exception as error: if sql1 != None: print("sql1:\t" + sql1) if sql2 != None: print("sql2:\t" + sql2) print(error) if qtype == "filter": try: self.cursor.execute(query) res1 = self.cursor.fetchall() # print(res1) for unique_id in res1: scholar_array[unique_id[0]] = 1 return scholar_array except Exception as error: if sql1 != None: print("qtype filter sql1:\t" + sql1) if sql2 != None: print("qtype filter sql2:\t" + sql2) print(error) def extract(self, scholar_array): """ Adding each connected scholar per unique_id """ for scholar_id in scholar_array: sql3 = 'SELECT * FROM scholars where unique_id="' + scholar_id + '"' # debug # print("db.extract: sql3="+sql3) try: self.cursor.execute(sql3) res3 = self.cursor.fetchall() n = len(res3) #in the DB, there are unique_ids duplicated info = {} #With (n-1) we're fetching only the last result. ide = "D::" + str(res3[n - 1]['id']) info['id'] = ide info['unique_id'] = res3[n - 1]['unique_id'] info['photo_url'] = res3[n - 1]['photo_url'] info['first_name'] = res3[n - 1]['first_name'] info['initials'] = res3[n - 1]['initials'] info['last_name'] = res3[n - 1]['last_name'] info['nb_keywords'] = res3[n - 1]['nb_keywords'] info['css_voter'] = res3[n - 1]['css_voter'] info['css_member'] = res3[n - 1]['css_member'] info['keywords_ids'] = res3[n - 1]['keywords_ids'].split(',') info['keywords'] = res3[n - 1]['keywords'] info['country'] = res3[n - 1]['country'] info['ACR'] = res3[n - 1]['affiliation_acronym'] #info['CC'] = res3[n-1]['norm_country']; info['homepage'] = res3[n - 1]['homepage'] info['lab'] = res3[n - 1]['lab'] info['affiliation'] = res3[n - 1]['affiliation'] info['lab2'] = res3[n - 1]['lab2'] info['affiliation2'] = res3[n - 1]['affiliation2'] info['homepage'] = res3[n - 1]['homepage'] info['title'] = res3[n - 1]['title'] info['position'] = res3[n - 1]['position'] info['job_market'] = res3[n - 1]['job_market'] info['login'] = res3[n - 1]['login'] if info['nb_keywords'] > 0: self.scholars[ide] = info except Exception as error: print("sql3:\t" + sql3) print(error) # génère le gexf # include('gexf_generator.php'); imsize = 80 termsMatrix = {} scholarsMatrix = {} scholarsIncluded = 0 for i in self.scholars: self.scholars_colors[self.scholars[i]['login'].strip()] = 0 scholar_keywords = self.scholars[i]['keywords_ids'] for k in range(len(scholar_keywords)): kw_k = scholar_keywords[k] if kw_k != None and kw_k != "": #print(kw_k) if kw_k in termsMatrix: termsMatrix[kw_k]['occ'] = termsMatrix[kw_k]['occ'] + 1 for l in range(len(scholar_keywords)): kw_l = scholar_keywords[l] if kw_l in termsMatrix[kw_k]['cooc']: termsMatrix[kw_k]['cooc'][kw_l] += 1 else: termsMatrix[kw_k]['cooc'][kw_l] = 1 else: termsMatrix[kw_k] = {} termsMatrix[kw_k]['occ'] = 1 termsMatrix[kw_k]['cooc'] = {} for l in range(len(scholar_keywords)): kw_l = scholar_keywords[l] if kw_l in termsMatrix[kw_k]['cooc']: termsMatrix[kw_k]['cooc'][kw_l] += 1 else: termsMatrix[kw_k]['cooc'][kw_l] = 1 sql = 'select login from jobs' for res in self.cursor.execute(sql): if res['login'].strip() in self.scholars_colors: self.scholars_colors[res['login'].strip()] += 1 # sql="SELECT term,id,occurrences FROM terms" # #self.cursor.execute(sql) # cont=0 # ## for t in termsMatrix: ## if cont==0: ## sql+=' where id='+t ## cont+=1 ## else: sql+=' or id='+t ## print("before crash") ## print(sql) ## print("nb terms:",len(termsMatrix)) query = "SELECT term,id,occurrences FROM terms WHERE id IN " conditions = ' (' + ','.join(sorted(list(termsMatrix))) + ')' # debug # print("SQL query ===============================") # print(query+conditions) # print("/SQL query ==============================") for res in self.cursor.execute(query + conditions): idT = res['id'] info = {} info['id'] = idT info['occurrences'] = res['occurrences'] info['term'] = res['term'] self.terms_array[idT] = info count = 1 for term in self.terms_array: self.terms_colors[term] = 0 sql = 'select term_id from jobs2terms' for row in self.cursor.execute(sql): if row['term_id'] in self.terms_colors: self.terms_colors[row['term_id']] += 1 cont = 0 for term in self.terms_array: #sql="SELECT scholar FROM scholars2terms where term_id='"+str(term)+"'"; sql = "SELECT scholars.id FROM scholars,scholars2terms where term_id='" + str( term) + "' and scholars.unique_id=scholars2terms.scholar" term_scholars = [] for row in self.cursor.execute(sql): term_scholars.append("D::" + str(row['id'])) for k in range(len(term_scholars)): if term_scholars[k] in scholarsMatrix: scholarsMatrix[term_scholars[k]]['occ'] = scholarsMatrix[ term_scholars[k]]['occ'] + 1 for l in range(len(term_scholars)): if term_scholars[l] in self.scholars: if term_scholars[l] in scholarsMatrix[ term_scholars[k]]['cooc']: scholarsMatrix[term_scholars[k]]['cooc'][ term_scholars[l]] += 1 else: scholarsMatrix[term_scholars[k]]['cooc'][ term_scholars[l]] = 1 else: scholarsMatrix[term_scholars[k]] = {} scholarsMatrix[term_scholars[k]]['occ'] = 1 scholarsMatrix[term_scholars[k]]['cooc'] = {} for l in range(len(term_scholars)): if term_scholars[l] in self.scholars: if term_scholars[l] in scholarsMatrix[ term_scholars[k]]['cooc']: scholarsMatrix[term_scholars[k]]['cooc'][ term_scholars[l]] += 1 else: scholarsMatrix[term_scholars[k]]['cooc'][ term_scholars[l]] = 1 nodeId = "N::" + str(term) self.Graph.add_node(nodeId) for scholar in self.scholars: if scholar in scholarsMatrix: if len(scholarsMatrix[scholar] ['cooc']) >= self.min_num_friends: scholarsIncluded += 1 nodeId = str(scholar) self.Graph.add_node(nodeId) edgeid = 0 for scholar in self.scholars: if scholar in scholarsMatrix: if len(scholarsMatrix[scholar]['cooc']) >= 1: for keyword in self.scholars[scholar]['keywords_ids']: if keyword: source = str(scholar) target = "N::" + str(keyword) self.Graph.add_edge(source, target, { 'weight': 1, 'type': "bipartite" }) #Some bipartite relations are missing (just the 1%) for term in self.terms_array: nodeId1 = self.terms_array[term]['id'] if str(nodeId1) in termsMatrix: neighbors = termsMatrix[str(nodeId1)]['cooc'] for i, neigh in enumerate(neighbors): if neigh != str(term): source = "N::" + str(term) target = "N::" + neigh weight = neighbors[str(neigh)] / float( self.terms_array[term]['occurrences']) self.Graph.add_edge(source, target, { 'weight': weight, 'type': "nodes2" }) for scholar in self.scholars: nodeId1 = scholar if str(nodeId1) in scholarsMatrix: neighbors = scholarsMatrix[str(nodeId1)]['cooc'] for i, neigh in enumerate(neighbors): if neigh != str(scholar): source = str(scholar) target = str(neigh) weight = self.jaccard(scholarsMatrix[nodeId1]['occ'], scholarsMatrix[neigh]['occ'], neighbors[str(neigh)]) #print("\t"+source+","+target+" = "+str(weight)) self.Graph.add_edge(source, target, { 'weight': weight, 'type': "nodes1" }) def toHTML(self, string): escaped = escape(string).encode("ascii", "xmlcharrefreplace").decode() print(type(escaped)) return escaped def buildJSON_sansfa2(self, graph, coordsRAW=None): inst = CountryConverter("", "", "", "") ISO = inst.getCountries("countries_ISO3166.txt") Alternatives = inst.getCountries("countries_alternatives.txt") inst.createInvertedDicts(ISO, Alternatives) nodesA = 0 nodesB = 0 edgesA = 0 edgesB = 0 edgesAB = 0 # print("printing in buildJSON_sansfa2()") nodes = {} edges = {} if coordsRAW: xy = coordsRAW #For FA2.java: loads(coordsRAW) #print(xy) coords = {} for i in xy: coords[i['sID']] = {} coords[i['sID']]['x'] = i['x'] coords[i['sID']]['y'] = i['y'] #print(coords) for idNode in graph.nodes_iter(): if idNode[0] == "N": #If it is NGram numID = int(idNode.split("::")[1]) # print("DBG terms_array:", self.terms_array) try: nodeLabel = self.terms_array[numID]['term'].replace( "&", " and ") colorg = max(0, 180 - (100 * self.terms_colors[numID])) term_occ = self.terms_array[numID]['occurrences'] except KeyError: print("WARN: couldn't find label and meta for term " + str(numID)) nodeLabel = "UNKNOWN" colorg = 0 term_occ = 1 node = {} node["type"] = "NGram" node["label"] = nodeLabel node["color"] = "19," + str(colorg) + ",244" node["term_occ"] = term_occ if coordsRAW: node["x"] = str(coords[idNode]['x']) if coordsRAW: node["y"] = str(coords[idNode]['y']) nodes[idNode] = node # print("NGR","\t",idNode,"\t",nodeLabel,"\t",term_occ) nodesB += 1 if idNode[0] == 'D': #If it is Document nodeLabel = self.scholars[idNode][ 'title'] + " " + self.scholars[idNode][ 'first_name'] + " " + self.scholars[idNode][ 'initials'] + " " + self.scholars[idNode][ 'last_name'] color = "" if self.scholars_colors[self.scholars[idNode]['login']] == 1: color = '243,183,19' elif self.scholars[idNode]['job_market'] == "Yes": color = '139,28,28' else: color = '78,193,127' content = "" photo_url = self.scholars[idNode]['photo_url'] if photo_url != "": content += '<img src=http://main.csregistry.org/' + photo_url + ' width=' + str( self.imsize) + 'px style=float:left;margin:5px>' else: if len(self.scholars) < 2000: im_id = int(floor(randint(0, 11))) content += '<img src=http://communityexplorer.org/img/' + str( im_id) + '.png width=' + str( self.imsize ) + 'px style=float:left;margin:5px>' content += '<b>Country: </b>' + self.scholars[idNode][ 'country'] + '</br>' if self.scholars[idNode]['position'] != "": content += '<b>Position: </b>' + self.scholars[idNode][ 'position'].replace("&", " and ") + '</br>' affiliation = "" if self.scholars[idNode]['lab'] != "": affiliation += self.scholars[idNode]['lab'] + ',' if self.scholars[idNode]['affiliation'] != "": affiliation += self.scholars[idNode]['affiliation'] if self.scholars[idNode]['affiliation'] != "" or self.scholars[ idNode]['lab'] != "": content += '<b>Affiliation: </b>' + affiliation.replace( "&", " and ") + '</br>' if len(self.scholars[idNode]['keywords']) > 3: content += '<b>Keywords: </b>' + self.scholars[idNode][ 'keywords'][:-2].replace(",", ", ") + '.</br>' if self.scholars[idNode]['homepage'][0:3] == "www": content += '[ <a href=http://' + self.scholars[idNode][ 'homepage'].replace( "&", " and " ) + ' target=blank > View homepage </a ><br/>]' elif self.scholars[idNode]['homepage'][0:4] == "http": content += '[ <a href=' + self.scholars[idNode][ 'homepage'].replace( "&", " and " ) + ' target=blank > View homepage </a >]<br/>' node = {} node["type"] = "Document" node["label"] = nodeLabel node["color"] = color dacountry = self.scholars[idNode]["country"] code = inst.searchCode(dacountry) # country code if code: node["CC"] = code else: node["CC"] = "-" # Affiliation node["ACR"] = self.scholars[idNode]["ACR"] if node["ACR"] == "": node["ACR"] = "-" node["term_occ"] = "12" if coordsRAW: node["x"] = str(coords[idNode]['x']) if coordsRAW: node["y"] = str(coords[idNode]['y']) node["content"] = str(self.toHTML(content)) nodes[idNode] = node # print("SCH","\t",idNode,"\t",nodeLabel) nodesA += 1 GG = Graph() for n in self.Graph.edges_iter(): s = n[0] t = n[1] w = float(self.Graph[n[0]][n[1]]['weight']) tp = self.Graph[n[0]][n[1]]['type'] if GG.has_edge(s, t): oldw = GG[s][t]['weight'] avgw = (oldw + w) / 2 GG[s][t]['weight'] = avgw else: GG.add_edge(s, t, {"weight": w, "type": tp}) e = 0 for n in GG.edges_iter(): #Memory, what's wrong with you? wr = 0.0 origw = GG[n[0]][n[1]]['weight'] for i in range(2, 10): wr = round(origw, i) if wr > 0.0: break edge = {} edge["s"] = n[0] edge["t"] = n[1] edge["w"] = str(wr) # edge["type"] = GG[n[0]][n[1]]['type'] if GG[n[0]][n[1]]['type'] == "nodes1": edgesA += 1 if GG[n[0]][n[1]]['type'] == "nodes2": edgesB += 1 if GG[n[0]][n[1]]['type'] == "bipartite": edgesAB += 1 # print(edge["type"],"\t",nodes[n[0]]["label"],"\t",nodes[n[1]]["label"],"\t",edge["w"]) # if edge["type"]=="nodes1": print(wr) edges[str(e)] = edge e += 1 #if e%1000 == 0: # print(e) # for n in GG.nodes_iter(): # if nodes[n]["type"]=="NGram": # concepto = nodes[n]["label"] # nodes2 = [] # neigh = GG.neighbors(n) # for i in neigh: # if nodes[i]["type"]=="NGram": # nodes2.append(nodes[i]["label"]) # print(concepto,"\t",", ".join(nodes2)) graph = {} graph["nodes"] = nodes graph["links"] = edges graph["stats"] = { "sch": nodesA, "kw": nodesB, "n1": edgesA, "n2": edgesB, "nbi": edgesAB, } graph["ID"] = self.unique_id pprint(graph["stats"]) # print("scholars",nodesA) # print("concepts",nodesB) # print("nodes1",edgesA) # print("nodes2",edgesB) # print("bipartite",edgesAB) return graph
class DBGraph(object): def __init__(self, tables, map_method_capabilities): """ Maintains a JOIN graph between the different tables of the database """ self.graph = DiGraph() for table in tables: self.append(table) self.map_method_capabilities = map_method_capabilities def get_capabilities(self, platform, method): return self.map_method_capabilities[(platform, method)] def get_key(self, method): self.find_node(method).get_keys() def make_arc(self, u, v): """ \brief Connect a "u" Table to a "v" Table (if necessary) in the DbGraph \param u The source node (Table instance) \param v The target node (Table instance) """ #----------------------------------------------------------------------- #returns(Predicate) #@accepts(set, Key) def make_predicate(fields_u, key_v): """ \brief Compute the Predicate to JOIN a "u" Table with "v" Table \param fields_u The set of Field of u required to JOIN with v \param key_v The Key of v involved in the JOIN. You may pass None if v has no key. \return This function returns : - either None iif u embeds a set of v instances - either a Predicate instance which indicates how to join u and v """ if len(fields_u) == 1 and list(fields_u)[0].is_array(): # u embed an array of element of type v, so there is # no JOIN and thus no Predicate. # Note that v do not even require to have a key return None # u and v can be joined # This code only support Key made of only one Field assert key_v, "Can't join with None key" assert len(fields_u) == len(key_v), "Can't join fields = %r with key = %r" % (fields_u, key_v) assert len(key_v) == 1, "Composite key not supported: key = %r" % key_v return Predicate( "%s" % list(fields_u)[0].get_name(), "==", "%s" % list(key_v)[0].get_name() ) #----------------------------------------------------------------------- if u == v: return relations = u.get_relations(v, self) if relations: self.graph.add_edge(u, v, relations=relations) Log.debug("NEW EDGE %s" % self.print_arc(u, v)) relations_str = [ r.get_str_type() for r in relations] # DOT FORMAT FOR plot_metadata snippet # print '%s -> %s [ label="%s" ]' % (u.get_name(), v.get_name(), relations_str) # if relation_uv: # (type, fields_u) = relation_uv # key_v = list(v.get_keys())[0] if len(v.get_keys()) > 0 else None # # # XXX Predicate and field_u are redundant, but fields are needed # # for pruned tree while predicate only hold field names. Shall we # # evolve predicates towards supporting Fields ? # predicate = make_predicate(fields_u, key_v) # self.graph.add_edge(u, v, relation=Relation(type, predicate)) # Log.debug("NEW EDGE %s" % self.print_arc(u, v)) def append(self, u): """ \brief Add a table node not yet in the DB graph and build the arcs to connect this node to the existing node. There are 3 types of arcs (determines, includes, provides) \sa manifold.util.table.py \param u The Table instance we are adding to the graph. """ # Adding the node u in the graph (if not yet in the graph) if u in self.graph.nodes(): raise ValueError("%r is already in the graph" % u) self.graph.add_node(u) # For each node v != u in the graph, check whether we can connect # u to v and v to u for v, data in self.graph.nodes(True): self.make_arc(u, v) self.make_arc(v, u) def print_arc(self, u, v): """ \brief Print a (u, v) arc \param u The source node (Table instance) \param v The target node (Table instance) """ relations = self.get_relations(u,v) relation_str = ', '.join(map(lambda r: "%r" % r, relations)) return "%r -> %r : %s" % (u, v, relation_str) def plot(self): """ \brief Produce de graphviz file related to this DBGraph and show the graph """ DBGraph.plot_graph(self.graph) @staticmethod #@accepts(DBGraph) def plot_graph(graph): """ \brief Produce de graphviz file related to a DBGraph and show the graph \param graph A DBGraph instance """ import matplotlib.pyplot as plt draw_graphviz(graph) plt.show() #OBSOLETE| def get_tree_edges(self, root): #OBSOLETE| return [e for e in dfs_edges(self.graph, root)] def find_node(self, table_name, get_parent=True): """ \brief Search a Table instance in the DbGraph for a given table name \param table_name A String value (the name of the table) \return The corresponding Table instance, None if not found """ for table in self.graph.nodes(False): if table.get_name() == table_name: if get_parent: # We need to check whether it has a parent with the same name for parent, _ in self.graph.in_edges(table): if parent.get_name() == table_name: return parent return table return None def get_table_names(self): """ Retrieve the list of Table names belonging to this DBGraph. Returns: A list of String instances. """ return [table.get_name() for table in self.graph.nodes(False)] def is_parent(self, table_or_table_name): return not bool(self.get_parent(table_or_table_name)) def get_parent(self, table_or_table_name): if not isinstance(table_or_table_name, Table): table_or_table_name = self.find_node(table_or_table_name, get_parent=False) for parent, x in self.graph.in_edges(table_or_table_name): if parent.get_name() == table_or_table_name.get_name(): return parent return None def get_announce_tables(self): tables = [] for table in self.graph.nodes(False): # Ignore child tables with the same name as parents keep = True for parent, _ in self.graph.in_edges(table): if parent.get_name() == table.get_name(): keep = False if keep: tables.append(Table(None, None, table.get_name(), set(self.get_fields(table)), table.get_keys())) return tables # Let's do a DFS by maintaining a prefix def get_fields(self, root, prefix=''): """ Produce edges in a depth-first-search starting at source. """ def table_fields(table, prefix): #return ["%s%s" % (prefix, f) for f in table.fields] out = [] for f in table.fields.values(): # We will modify the fields of the Field object, hence we need # to make a copy not to affect the original one g = deepcopy(f) g.field_name = "%s%s" % (prefix, f.get_name()) out.append(g) return out visited = set() for f in table_fields(root, prefix): yield f visited.add(root) stack = [(root, self.graph.edges_iter(root, data=True), prefix)] # iterate considering edges ... while stack: parent,children,prefix = stack[-1] try: parent, child, data = next(children) relation = data['relations'] if child not in visited: if relation.get_type() in [Relation.types.LINK_1N, Relation.types.LINK_1N_BACKWARDS]: # Recursive call #for f in self.get_fields(child, "%s%s." % (prefix, child.get_name())): # yield f pass else: # Normal JOINed table for f in table_fields(child, prefix): yield f visited.add(child) stack.append((child, self.graph.edges_iter(child, data=True), prefix)) except StopIteration: stack.pop() def get_relations(self, u, v): # u --> v if isinstance(u, StringTypes): u = self.find_node(u) if isinstance(v, StringTypes): v = self.find_node(v) return self.graph.edge[u][v]['relations'] def get_field_type(self, table, field_name): return self.find_node(table).get_field_type(field_name)