예제 #1
0
    def build_tag_graph(self, tagged_graph_path, threshold=None):
        
        filename = tagged_graph_path
#    def __init__(self, filename, bound_tag_dist=None, calc_tag_distance=False):
        tags = Tags(filename, threshold, True)
        outfilename = filename + '.tags'
        tags.save_tag_freqs(outfilename)
        
        dist_dict = tags.get_tag_dist()
        tag_freq = tags.get_tag_freq()
        
        print 'tag set with distance: %d' % len(tags.get_tag_set())
        print 'tag edges with distance: %d' % len(dist_dict.keys())    
        
        nodes = []
        for node in tags.get_tag_set():
            nodes.append((node,tag_freq[node]))
            #print str((node,tag_freq[node]))
        
         
        dists = []
        for key, val in dist_dict.iteritems():
            dists.append(val)
        max_dist, min_dist = max(dists), min(dists)
        
        # use opposite distances as weights.        
        edges = []
        for key, val in dist_dict.iteritems():
            edges.append((key[0],key[1],(max_dist-val)/(max_dist-min_dist)))
        
        #edges.sort(thr_fst_cmp, None, True)

        self.__graph.add_nodes_from(nodes)
        self.__graph.add_edges_from(edges)
        
        self.set_corpus_size(tags.get_corpus_size())
        dist_graph_path = filename + '.tags.graph'
        self.save(dist_graph_path)