def restemming(word, num_snippets): for num_snippet in num_snippets: tokenized_snippet = tokenize_and_stem(self.snippets[num_snippet], stem = 0) for sn in tokenized_snippet: if sn.find(word) != -1: return sn return ''
def restemming(word, num_snippets): for num_snippet in num_snippets: tokenized_snippet = tokenize_and_stem(self.rawsnippets[num_snippet], stem=0) for sn in tokenized_snippet: if sn.find(word) != -1: return sn return ""
def add_strings(self, strings): """ strings - strings (snippets) to add to suffix tree """ for string in strings: if string is not None: self.suffix_tree.append_string(tokenize_and_stem(string)) self.suffix_tree.fix_input_string()
def __init__(self, rawsnippets): self.snippets = [] self.attrs = [] self.attrib = {} self.objs = [] self.context = None self.lattice = None self.concept_system = None self.rawsnippets = rawsnippets count = 0 for snippet in rawsnippets: self.snippets.append(tokenize_and_stem(snippet)) for word in self.snippets[-1]: if word not in self.attrib.values(): self.attrib[count] = word self.attrs.append(word) count += 1