コード例 #1
0
    def build_graph(self, weight_calculator):
        graph = RelationGraph()
        vertex_map = {}

        #create vertices
        for article in self.articles:
            vertex_map[article.name] = graph.add_vertex(article.name)

        #build graph
        number_of_articles = len(self.articles)
        print(len(graph.vertices))

        for first_article_index in range(number_of_articles):
            for second_article_index in range(first_article_index+1, number_of_articles):
                # print("Firt Article Index " + str(first_article_index))
                # print("Second Article Index " + str(second_article_index))
                first_article = self.articles[first_article_index]
                second_article = self.articles[second_article_index]


                # weight = first_article.calculateNumberOfUnigramsInCommon(second_article)
                weight = weight_calculator.calculate_weight(first_article, second_article)
                # print("Weight between " + first_article.name + " and " + second_article.name + " is " + str(weight))

                first_vertex = vertex_map[first_article.name]
                second_vertex = vertex_map[second_article.name]

                if weight > weight_calculator.getThreshold():
                    print("Weight between " + first_article.name + " and " + second_article.name + " is " + str(weight))
                    graph.add_edge(weight, first_vertex, second_vertex)
                    graph.add_edge(weight, second_vertex, first_vertex)


        return graph
コード例 #2
0
 def buildDirectedGraph(self):
     primaryNodes = self.primaryNodes
     vertex_map = {}
     g = RelationGraph()
     for a in primaryNodes.keys():
         vertex_map[a] = g.add_vertex(a)
     for a in primaryNodes.keys():
         denominator = 0
         for b in primaryNodes[a].keys():
             denominator += primaryNodes[a][b]
         for b in primaryNodes[a].keys():
             av = vertex_map[a]
             bv = vertex_map[b]
             if primaryNodes[a][b] != 0:
                 g.add_edge(
                     ((primaryNodes[a][b] + 0.00001) / (denominator + 1)),
                     av, bv)  #To avoid division by zero conflicts
                 print("Adding edge: ", a, b, ': ',
                       ((primaryNodes[a][b] + 0.00001) / (denominator + 1)))
     return g
コード例 #3
0
    def buildUndirectedGraph(self):

        primaryNodes = self.primaryNodes
        print("Number of primary nodes: ", len(primaryNodes))

        denominator = 0
        vertex_map = {}
        g = RelationGraph()
        for a in primaryNodes.keys():
            vertex_map[a] = g.add_vertex(a)
            for b in primaryNodes[a].keys():
                denominator += primaryNodes[a][b]
        for a in primaryNodes.keys():
            for b in primaryNodes[a].keys():
                av = vertex_map[a]
                bv = vertex_map[b]
                if primaryNodes[a][b] != 0:
                    g.add_edge((primaryNodes[a][b] / denominator), av, bv)
                    print("Adding edge: ", a, b, ': ',
                          (primaryNodes[a][b] / denominator))
        return g
コード例 #4
0
    def build_graph(self):
        if not self.ran:
            self.run()

# Create the graph
        g = RelationGraph()
        relation_sums = {}
        #create a dictionary that maps the name of a vertex to the vertex object
        vertex_map = {}

        # Building the sums, which will be used for the weights
        for v, c in self.data.items():
            if not v in relation_sums:
                relation_sums[v] = 0
            for _, i in c.items():
                relation_sums[v] += i.sum

# Building all the vertex
# for every vertex name, create a new vertex, store it in the map
        for a in self.articles.keys():
            vertex_map[a] = g.add_vertex(a)

#Building the edges
# graph.add_edge(for me, prop is the weight, and source and target are the vertex objects)
# This creates the edge uni-directionally
# Switch the source and target to add the other direction.
        for a, tb in self.data.items():
            # print("Looking at: {}".format(a))
            for b, c in tb.items():
                av = vertex_map[a]
                bv = vertex_map[b]
                # print("add_edge")
                weight = (c.sum) / (relation_sums[a] + AtoA.BIAS)
                weight_offset = self.data[b][a].sum / (
                    (relation_sums[b] + AtoA.BIAS) * 2)

                if weight + weight_offset != 0:
                    g.add_edge(weight + weight_offset, av, bv)
        return g
コード例 #5
0
for vertex in graph.iter_vertex():
    print(vertex.prop)
    for edge in vertex.iter_edge():
        print(edge.source.prop, end=" ")
        print(" == ", end=" ")
        print(edge.prop, end=" ")
        print(" ==>", end=" ")
        print(edge.target.prop)
    print()
# path = graph.search('agriculture', 'plants')
# print(path)

tl = TopicLocality(topics, articles)
g = tl.buildUndirectedGraph()

graph2 = RelationGraph()
graph2.load_graph('RelationModels/MiniCorpusUnigram.txt')
print("number of vertices %f" % len(graph2.vertices))
for vertex in graph2.iter_vertex():
    print(vertex.prop)
    for edge in vertex.iter_edge():
        print(edge.source.prop, end=" ")
        print(" == ", end=" ")
        print(edge.prop, end=" ")
        print(" ==>", end=" ")
        print(edge.target.prop)
    print()

# print("Number of unigrams in common ", bovine.calculateNumberOfUnigramsInCommon(cattle))
# print("Number of bigrams in common: ", bovine.calculateNumberOfBigramsInCommon(cattle))
# print("Number of trigrams in common: ", bovine.calculateNumberOfTrigramsInCommon(cattle))
コード例 #6
0
ファイル: search.py プロジェクト: compSci91/nlp-project
def main():
    if len(sys.argv) < 3:
        usage()
        exit(1)

    model_options = ['u', 'b', 't', 'a', 'lu', 'ld']
    model = sys.argv[1]
    if model not in model_options:
        print(
            "Please input a valid model.\nu-unigram\nb-bigram\nt-trigram\na-Article to Article\nl-Topic Locality"
        )
        return
    if len(sys.argv) > 4:
        corpus = sys.argv[4]
        if corpus not in ['Corpus', 'MiniCorpus']:
            print('Please input a valid corpus (Corpus or MiniCorpus).')
            return
    else:
        corpus = 'Corpus'
    graph = RelationGraph()
    model_file = 'RelationModels/'
    if model == 'u':
        model_file += corpus + 'Unigram.txt'
        graph.load_graph(model_file)
    elif model == 'b':
        model_file += corpus + 'Bigram.txt'
        graph.load_graph(model_file)
    elif model == 't':
        model_file += corpus + 'Trigram.txt'
        graph.load_graph(model_file)
    elif model == 'a':
        model_file += corpus + 'AtoA.txt'
        graph.load_graph(model_file)
    elif model == 'ld':
        model_file += corpus + 'TopicLocalityDirected.txt'
        graph.load_graph(model_file)
    elif model == 'lu':
        model_file += corpus + 'TopicLocalityUndirected.txt'
        graph.load_graph(model_file)

    if len(graph.vertices) == 0:
        #TODO check if corpus exists and construct it if needed instead of quitting
        print("This model needs to be constructed first: " + model_file + ".")

        return
    source = sys.argv[2]
    if source == 'all':
        run_all(graph)
    elif graph.find_vertex(source) is not None:
        target = sys.argv[3]
        if graph.find_vertex(target) == None:
            print("Target not in model.")
            return

        path, path_weight = graph.search(source, target)
        for t in path:
            print(t.prop)

        print("Weight", path_weight)
    else:
        print("Source not in model.")
        return