def create_speeches_by_person(input_speeches_file,\ input_subjects_file,\ output_person_input_for_word_cloud_directory,\ output_person_word_cloud): MalletParser.createInputForMallet() speeches_dict = read_speeches_by_person(input_speeches_file) subjects_dict = read_subjects(input_subjects_file) person_fd_dict = create_key_fd_dict(speeches_dict) person_subjects = create_person_subjects_dict(subjects_dict, person_fd_dict) write_persons_output_files(output_person_input_for_word_cloud_directory, output_person_word_cloud, person_subjects)
def create_speeches_by_year(input_speeches_file,\ output_person_input_for_word_cloud_directory,\ output_year_word_cloud): MalletParser.createInputForMallet() speeches_dict = read_speeches_by_year(input_speeches_file) year_fd_dict = create_key_fd_dict(speeches_dict) year_frequent_words_dict = {} tf_idf = calculate_tf_idf(speeches_dict) for key, value in year_fd_dict.items(): year_frequent_words_dict[key] = get_most_common_words(value, tf_idf) write_by_year_output_files(output_person_input_for_word_cloud_directory, output_year_word_cloud, year_frequent_words_dict)