class Indexer(Task): def __init__(self, splitter): super(Indexer, self).__init__(self) self.page_storage = PageStorageClient() self.index_storage = IndexStorageClient() self.splitter = splitter def __call__(self, page_info): self.put_message(MakeIndexMessage(page_info)) def handle_MakeIndexMessage(self, page_info): self.show_timestamped_message("Indexing ... %s" % page_info.url) page_info.status |= PageInfo.INDEXED self.page_storage.set_page(page_info) index = [ ("".join(ngram), (page_info.id, pos)) \ for (pos, ngram) in enumerate(self.splitter((page_info.text)))] self.index_storage.set_index(index)
def __init__(self, splitter): super(Indexer, self).__init__(self) self.page_storage = PageStorageClient() self.index_storage = IndexStorageClient() self.splitter = splitter