Ejemplo n.º 1
0
def create_speeches_by_person(input_speeches_file,\
         input_subjects_file,\
         output_person_input_for_word_cloud_directory,\
         output_person_word_cloud):

    MalletParser.createInputForMallet()
    speeches_dict = read_speeches_by_person(input_speeches_file)
    subjects_dict = read_subjects(input_subjects_file)

    person_fd_dict = create_key_fd_dict(speeches_dict)

    person_subjects = create_person_subjects_dict(subjects_dict, person_fd_dict)

    write_persons_output_files(output_person_input_for_word_cloud_directory, output_person_word_cloud, person_subjects)
Ejemplo n.º 2
0
def create_speeches_by_year(input_speeches_file,\
         output_person_input_for_word_cloud_directory,\
         output_year_word_cloud):

    MalletParser.createInputForMallet()
    speeches_dict = read_speeches_by_year(input_speeches_file)

    year_fd_dict = create_key_fd_dict(speeches_dict)
    year_frequent_words_dict = {}

    tf_idf = calculate_tf_idf(speeches_dict)

    for key, value in year_fd_dict.items():
        year_frequent_words_dict[key] = get_most_common_words(value, tf_idf)

    write_by_year_output_files(output_person_input_for_word_cloud_directory, output_year_word_cloud, year_frequent_words_dict)