def build_graph(self, weight_calculator): graph = RelationGraph() vertex_map = {} #create vertices for article in self.articles: vertex_map[article.name] = graph.add_vertex(article.name) #build graph number_of_articles = len(self.articles) print(len(graph.vertices)) for first_article_index in range(number_of_articles): for second_article_index in range(first_article_index+1, number_of_articles): # print("Firt Article Index " + str(first_article_index)) # print("Second Article Index " + str(second_article_index)) first_article = self.articles[first_article_index] second_article = self.articles[second_article_index] # weight = first_article.calculateNumberOfUnigramsInCommon(second_article) weight = weight_calculator.calculate_weight(first_article, second_article) # print("Weight between " + first_article.name + " and " + second_article.name + " is " + str(weight)) first_vertex = vertex_map[first_article.name] second_vertex = vertex_map[second_article.name] if weight > weight_calculator.getThreshold(): print("Weight between " + first_article.name + " and " + second_article.name + " is " + str(weight)) graph.add_edge(weight, first_vertex, second_vertex) graph.add_edge(weight, second_vertex, first_vertex) return graph
def buildDirectedGraph(self): primaryNodes = self.primaryNodes vertex_map = {} g = RelationGraph() for a in primaryNodes.keys(): vertex_map[a] = g.add_vertex(a) for a in primaryNodes.keys(): denominator = 0 for b in primaryNodes[a].keys(): denominator += primaryNodes[a][b] for b in primaryNodes[a].keys(): av = vertex_map[a] bv = vertex_map[b] if primaryNodes[a][b] != 0: g.add_edge( ((primaryNodes[a][b] + 0.00001) / (denominator + 1)), av, bv) #To avoid division by zero conflicts print("Adding edge: ", a, b, ': ', ((primaryNodes[a][b] + 0.00001) / (denominator + 1))) return g
def buildUndirectedGraph(self): primaryNodes = self.primaryNodes print("Number of primary nodes: ", len(primaryNodes)) denominator = 0 vertex_map = {} g = RelationGraph() for a in primaryNodes.keys(): vertex_map[a] = g.add_vertex(a) for b in primaryNodes[a].keys(): denominator += primaryNodes[a][b] for a in primaryNodes.keys(): for b in primaryNodes[a].keys(): av = vertex_map[a] bv = vertex_map[b] if primaryNodes[a][b] != 0: g.add_edge((primaryNodes[a][b] / denominator), av, bv) print("Adding edge: ", a, b, ': ', (primaryNodes[a][b] / denominator)) return g
def build_graph(self): if not self.ran: self.run() # Create the graph g = RelationGraph() relation_sums = {} #create a dictionary that maps the name of a vertex to the vertex object vertex_map = {} # Building the sums, which will be used for the weights for v, c in self.data.items(): if not v in relation_sums: relation_sums[v] = 0 for _, i in c.items(): relation_sums[v] += i.sum # Building all the vertex # for every vertex name, create a new vertex, store it in the map for a in self.articles.keys(): vertex_map[a] = g.add_vertex(a) #Building the edges # graph.add_edge(for me, prop is the weight, and source and target are the vertex objects) # This creates the edge uni-directionally # Switch the source and target to add the other direction. for a, tb in self.data.items(): # print("Looking at: {}".format(a)) for b, c in tb.items(): av = vertex_map[a] bv = vertex_map[b] # print("add_edge") weight = (c.sum) / (relation_sums[a] + AtoA.BIAS) weight_offset = self.data[b][a].sum / ( (relation_sums[b] + AtoA.BIAS) * 2) if weight + weight_offset != 0: g.add_edge(weight + weight_offset, av, bv) return g
for vertex in graph.iter_vertex(): print(vertex.prop) for edge in vertex.iter_edge(): print(edge.source.prop, end=" ") print(" == ", end=" ") print(edge.prop, end=" ") print(" ==>", end=" ") print(edge.target.prop) print() # path = graph.search('agriculture', 'plants') # print(path) tl = TopicLocality(topics, articles) g = tl.buildUndirectedGraph() graph2 = RelationGraph() graph2.load_graph('RelationModels/MiniCorpusUnigram.txt') print("number of vertices %f" % len(graph2.vertices)) for vertex in graph2.iter_vertex(): print(vertex.prop) for edge in vertex.iter_edge(): print(edge.source.prop, end=" ") print(" == ", end=" ") print(edge.prop, end=" ") print(" ==>", end=" ") print(edge.target.prop) print() # print("Number of unigrams in common ", bovine.calculateNumberOfUnigramsInCommon(cattle)) # print("Number of bigrams in common: ", bovine.calculateNumberOfBigramsInCommon(cattle)) # print("Number of trigrams in common: ", bovine.calculateNumberOfTrigramsInCommon(cattle))
def main(): if len(sys.argv) < 3: usage() exit(1) model_options = ['u', 'b', 't', 'a', 'lu', 'ld'] model = sys.argv[1] if model not in model_options: print( "Please input a valid model.\nu-unigram\nb-bigram\nt-trigram\na-Article to Article\nl-Topic Locality" ) return if len(sys.argv) > 4: corpus = sys.argv[4] if corpus not in ['Corpus', 'MiniCorpus']: print('Please input a valid corpus (Corpus or MiniCorpus).') return else: corpus = 'Corpus' graph = RelationGraph() model_file = 'RelationModels/' if model == 'u': model_file += corpus + 'Unigram.txt' graph.load_graph(model_file) elif model == 'b': model_file += corpus + 'Bigram.txt' graph.load_graph(model_file) elif model == 't': model_file += corpus + 'Trigram.txt' graph.load_graph(model_file) elif model == 'a': model_file += corpus + 'AtoA.txt' graph.load_graph(model_file) elif model == 'ld': model_file += corpus + 'TopicLocalityDirected.txt' graph.load_graph(model_file) elif model == 'lu': model_file += corpus + 'TopicLocalityUndirected.txt' graph.load_graph(model_file) if len(graph.vertices) == 0: #TODO check if corpus exists and construct it if needed instead of quitting print("This model needs to be constructed first: " + model_file + ".") return source = sys.argv[2] if source == 'all': run_all(graph) elif graph.find_vertex(source) is not None: target = sys.argv[3] if graph.find_vertex(target) == None: print("Target not in model.") return path, path_weight = graph.search(source, target) for t in path: print(t.prop) print("Weight", path_weight) else: print("Source not in model.") return