def rank(self, nodes, edges) -> dict: d = 0.85 # damping coefficient, usually is .85 min_diff = 1e-2 # convergence threshold steps = 1000 # iteration steps self.weight_default = 1.0 / (len(nodes.keys()) or 1.0) self.score_dict = {k: self.weight_default for k in list(nodes)} self.indegree_dict = { k: self.indegree_nodes(k, edges) for k in list(nodes) } self.outdegree_dict = { k: len(self.outdegree_nodes(k, edges)) for k in list(nodes) } nodeweight_dict = defaultdict(float) # store weight of node step_tuple = (1000, 0) for step in range(steps): for node in list(nodes): self.score(node, d) new_tuple = (step_tuple[1], sum(self.score_dict.values())) step_tuple = new_tuple if abs(step_tuple[1] - step_tuple[0]) <= min_diff: break # Normalize and Standardization if len(self.score_dict.values()) > 0: self.score_dict = utils.standardize_dict(self.score_dict) self.score_dict = utils.normalize_dict(self.score_dict) assert np.min(list(self.score_dict.values())) >= 0 for node in nodes.values(): node.weight = self.score_dict[node.name]
def equalize_term_and_entities(graph): """Normalize NE weights and tf-idf weights to obtain comparable weight.""" term_weights = {'dummy0000': 0.0} entity_weights = {'dummy0000': 0.0} for node in graph.nodes.values(): if node.node_type == 'term': term_weights[node] = node.weight else: entity_weights[node] = node.weight normalized_term_weights = utils.normalize_dict(term_weights) normalized_entity_weights = utils.normalize_dict(entity_weights) del normalized_term_weights['dummy0000'] del normalized_entity_weights['dummy0000'] for node in graph.nodes.values(): if node.node_type == 'term': node.weight = normalized_term_weights[node] else: node.weight = normalized_entity_weights[node]
def rank(self, nodes, edges) -> dict: d = 0.85 # damping coefficient, usually is .85 min_diff = 1e-5 # convergence threshold steps = 1000 # iteration steps weight_default = 1.0 / (len(nodes.keys()) or 1.0) nodeweight_dict = defaultdict(float) # store weight of node outsum_node_dict = defaultdict(float) # store weight of out nodes for node in nodes.values(): # initilize nodes weight by edges nodeweight_dict[node.name] = weight_default #node.weight # Sum of all edges leaving a specific node outsum_node_dict[node.name] = sum((edges[edge_key] for edge_key in edges.keys() if node.name in edge_key)) #print(edges) sorted_keys = sorted([node_name for node_name in nodes.keys() ]) # save node name as a list for iteration step_dict = [0] for step in range(1, steps): new_weights = defaultdict(float) for edge, weight in edges.items(): node_a, node_b = edge new_weights[node_a] += weight / outsum_node_dict[ node_b] * nodeweight_dict[node_b] new_weights[node_b] += weight / outsum_node_dict[ node_a] * nodeweight_dict[node_a] for node in sorted_keys: nodeweight_dict[node] = (1 - d) + d * new_weights[node] step_dict.append(sum(nodeweight_dict.values())) if abs(step_dict[step] - step_dict[step - 1]) <= min_diff: break # Normalize and Standardization if len(list(nodeweight_dict.values())) > 0: nodeweight_dict = utils.standardize_dict(nodeweight_dict) nodeweight_dict = utils.normalize_dict(nodeweight_dict) assert np.min(list(nodeweight_dict.values())) >= 0, nodeweight_dict for node in nodes.values(): node.weight = nodeweight_dict[node.name]
# Build (initialize) graph nodes and edges. candidate_graph.build(**build_arguments) candidate_graph.trim(d_core_k, d_core_l) # recalculate node weights using TextRank if args.textrank: candidate_graph.rank() relevance, diversity_type = candidate_graph.compare( query_graph, args.novelty, args.node_edge_l) ranking[docid] = relevance addition_types[docid] = diversity_type # Sort retrieved documents according to new similarity score. sorted_ranking = utils.normalize_dict({ k: v for k, v in sorted( ranking.items(), key=lambda item: item[1], reverse=True) }) # Diversify if args.diversify: nr_types = len( np.unique([ item for sublist in addition_types.values() for item in sublist ])) present_types = [] to_delete_docids = [] for key in sorted_ranking.keys(): if len(present_types) == nr_types: break if len(addition_types[key]) > 1: