Python SentenceGraph Examples

Programming Language: Python

Namespace/Package Name: precis.text

Class/Type: SentenceGraph

Examples at hotexamples.com: 2

Python SentenceGraph - 2 examples found. These are the top rated real world Python examples of precis.text.SentenceGraph extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add_edges(1)

add_nodes(1)

find_best_community_level(1)

multilevel_communities(1)

Example #1

Show file

File: summarizer.py Project: hetieke/precis

 def __init__(self, transformation=LSITransformation):
     self.dissimilar_sentences = SentenceGraph()
     self.transformation = transformation

Example #2

Show file

File: summarizer.py Project: hetieke/precis

class Summarizer:
    DISSIMILARITY_THRESHOLD = 0.8

    def __init__(self, transformation=LSITransformation):
        self.dissimilar_sentences = SentenceGraph()
        self.transformation = transformation

    def semantic_summary(self, tokenised_sentence_dict):
        print "Building Dissimilar Sentences Graph"
        self.build_dissimilarity_graph_using_wordnet(tokenised_sentence_dict)
        print "Dissimilar Sentences Graph build complete"
        cliques = self.dissimilar_sentences.multilevel_communities()
        print cliques
        return list(cliques)[0]
        #TODO Return Sentences

    def community_summary(self, tokenised_sentence_dict):
        self.build_dissimilarity_graph(tokenised_sentence_dict)
        community_levels = self.dissimilar_sentences.multilevel_communities()
        best_community = self.best_community(community_levels, tokenised_sentence_dict)
        return best_community.vs["name"]

    def build_dissimilarity_graph_using_wordnet(self, tokenised_sentence_dict):
        sentence_keys = tokenised_sentence_dict.keys()
        synonyms = Synonyms(tokenised_sentence_dict.values())
        self.dissimilar_sentences.add_nodes(sentence_keys)
        connections = ConnectedNodes()
        edges = list()
        for every_key, tokens in tokenised_sentence_dict.iteritems():
            for other_key, other_tokens in tokenised_sentence_dict.iteritems():
                if connections.not_connected(every_key, other_key):
                    score = synonyms.dissimilarity_score(tokens, other_tokens)
                    if score > self.DISSIMILARITY_THRESHOLD:
                        edge = (every_key, other_key, {"weight": score})
                        edges.append(edge)
                        connections.add((every_key, other_key))
        self.dissimilar_sentences.add_edges(edges)

    def build_dissimilarity_graph(self, tokenised_sentence_dict):
        print "Building Dissimilar Sentences Graph"
        sentence_keys = tokenised_sentence_dict.keys()
        transformed_sentences = self.transformation(tokenised_sentence_dict)
        self.dissimilar_sentences.add_nodes(sentence_keys)
        connections = ConnectedNodes()
        edges = list()
        for every_key, tokens in tokenised_sentence_dict.iteritems():
            for other_key, other_tokens in tokenised_sentence_dict.iteritems():
                if connections.not_connected(every_key, other_key):
                    score = transformed_sentences.dissimilarity_score(tokens, other_tokens)
                    if score > self.DISSIMILARITY_THRESHOLD:
                        edge = (every_key, other_key, {"weight": score})
                        edges.append(edge)
                        connections.add((every_key, other_key))
        self.dissimilar_sentences.add_edges(edges)
        print "Dissimilar Sentences Graph build complete"

    def best_community(self, community_levels, tokenised_sentences_dict):
        best_communities = self.dissimilar_sentences.find_best_community_level(community_levels)
        communities_subgraphs = best_communities.subgraphs()
        best_community_id = 0
        best_community_index = 0.0
        text_processor = TextProcessor()
        for id, community in enumerate(communities_subgraphs):
            vertices = community.vs["name"]
            sigma_info_index = 0.0
            for vertex in vertices:
                sentence = tokenised_sentences_dict[vertex]
                info_index = text_processor.information_index(sentence)
                sigma_info_index += info_index
            sigma_info_index /= float(len(vertices))
            if best_community_index < sigma_info_index:
                best_community_index = sigma_info_index
                best_community_id = id

        return communities_subgraphs[best_community_id]