Exemple #1
0
    def __init__(self, snippets=[]):
        """
        Args:
            snippets - list of strings where every element is a news snippet -
            not required. You can just use it without parametrs:
            STC = SuffixTreeClustering()
            STC.add_strings(snippet)
        """
        self.snippets = snippets
        self.final_phrases = {}
        self.cluster_document = {}  #base cluster -> documents it covers
        self.phrases = {}  #phrases for each base cluster
        self.scores = {}  #scores for base clusters
        self.sorted_clusters = []  #sorted base-clusters by the scores
        self.final_clusters = []  #final merged clusters
        self.top_final_clusters = []  #top n final clusters

        self.suffix_tree = SuffixTree()
        if len(snippets) > 0:
            self.add_strings(snippets)