def extract_vocabulary(options, dir_name): vocabulary = Vocabulary() dir_path = os.path.join(options.input_dir, dir_name) for filename in os.listdir(dir_path): file_path = os.path.join(dir_path, filename) if os.path.isfile(file_path): with io.open(file_path, 'r', encoding='utf-8') as file: raw_text = file.read() vocabulary.expand_vocab(raw_text.split()) return vocabulary