def __init__(self, path, tokenizer, search, write): start = time.time() cr = CorpusReader(path, tokenizer) cr.processFile() #performance metrics print("Index time: {:.2f} seconds".format(time.time() - start)) size = os.stat(path + '.bin').st_size print("Index Size on disk :", sizeof_fmt(size)) words = cr.index.keys() metrics(cr.indexer) print( f"Vocabulary: {len(words)} words, size: {sizeof_fmt(len(''.join(words)))}" ) if search != '': print(cr.indexer.search(search)) if write: cr.indexer.writeIndexToFile(f"{path}_indexer.txt")