Exemple #1
0
 def process(self, ind, path):
     tok = Tokenizer()
     tok.tokenize_html(path)
     self._titles.append(tok.title or u'Untitled')
     for token, freq in tok._counter.items():
         # Normalize by unique tokens.
         self.globalht[token].append([ind, freq / len(tok._counter)])
Exemple #2
0
 def process(self, path):
     tok = Tokenizer()
     tok.tokenize_html(path)
     self._counter.update(tok._counter)
     self.write_tokens(path, tok._counter.keys())