コード例 #1
0
def create_index(args):
    reader = DocumentStreamReader(args[2:])
    if args[1] == 'varbyte':
        vocabulary = Vocabulary(Simple9)
    elif args[1] == 'simple9':
        vocabulary = Vocabulary(Simple9)
    else:
        raise AssertionError('Expected varbyte|simple9 as a compressor')

    for doc in reader:
        for word in extract_words(doc.text):
            vocabulary.append(word, doc.url)

    dump(args[0], vocabulary)