def process(self, ind, path): tok = Tokenizer() tok.tokenize_html(path) self._titles.append(tok.title or u'Untitled') for token, freq in tok._counter.items(): # Normalize by unique tokens. self.globalht[token].append([ind, freq / len(tok._counter)])
def process(self, path): tok = Tokenizer() tok.tokenize_html(path) self._counter.update(tok._counter) self.write_tokens(path, tok._counter.keys())