def create_index(args): reader = DocumentStreamReader(args[2:]) if args[1] == 'varbyte': vocabulary = Vocabulary(Simple9) elif args[1] == 'simple9': vocabulary = Vocabulary(Simple9) else: raise AssertionError('Expected varbyte|simple9 as a compressor') for doc in reader: for word in extract_words(doc.text): vocabulary.append(word, doc.url) dump(args[0], vocabulary)