コード例 #1
0
def extract_vocabulary(options, dir_name):
    vocabulary = Vocabulary()

    dir_path = os.path.join(options.input_dir, dir_name)
    for filename in os.listdir(dir_path):
        file_path = os.path.join(dir_path, filename)
        if os.path.isfile(file_path):
            with io.open(file_path, 'r', encoding='utf-8') as file:
                raw_text = file.read()
                vocabulary.expand_vocab(raw_text.split())
    return vocabulary