def __init__(self, snippets=[]): """ Args: snippets - list of strings where every element is a news snippet - not required. You can just use it without parametrs: STC = SuffixTreeClustering() STC.add_strings(snippet) """ self.snippets = snippets self.final_phrases = {} self.cluster_document = {} #base cluster -> documents it covers self.phrases = {} #phrases for each base cluster self.scores = {} #scores for base clusters self.sorted_clusters = [] #sorted base-clusters by the scores self.final_clusters = [] #final merged clusters self.top_final_clusters = [] #top n final clusters self.suffix_tree = SuffixTree() if len(snippets) > 0: self.add_strings(snippets)