def exe_extract_topic_words(argv): from nltk.tokenize import word_tokenize; topic_path, word_list_path = argv; trec_format = StandardFormat(); word_list_file = open(word_list_path, 'w'); topics = trec_format.read(topic_path); word_set = set(); for topic_id, topic_text in topics.items(): words = map(lambda word: word.lower(), word_tokenize(topic_text)); word_set.update(words); word_list_file.write('\n'.join(word_set)); word_list_file.close();