def different_dictionaries_experiment(in_text_name, in_text_handler): for dictionary in dict_config.DICTS: freq_dict = freq_dictionary.build_freq_dictionary(in_text_handler.sentences, dictionary) critical_freq = text_model.calculate_critical_frequency(freq_dict) empirical_critical_freq = critical_frequency_analysis.get_empirical_critical_frequency(freq_dict.values()) plot_filename = os.path.join('./plots_w_cr_%s' % dictionary, in_text_name + '.png') plotting.make_plot_with_critical_frequency(plot_filename, \ in_text_handler.text_info, \ freq_dict.values(), \ critical_freq) two_w_cr_plot_filename = os.path.join('./plots_two_w_cr_%s' % dictionary, in_text_name + '.png') plotting.make_plot_with_two_critical_frequencies(two_w_cr_plot_filename, \ in_text_handler.text_info, \ freq_dict.values(), \ critical_freq, empirical_critical_freq) chart_filename = os.path.join('./charts_%s' % dictionary, in_text_name + '.txt') write_words_chart(chart_filename, freq_dict, critical_freq) if (dictionary == 'fs_dict'): dominant_lexemes = [lexeme for lexeme in freq_dict if freq_dict[lexeme] > critical_freq] GLOBAL_SENTENCES_STAT['sentence_counts'].append(int(in_text_handler.text_info['sentences'])) GLOBAL_SENTENCES_STAT['dominant_counts'].append(len(dominant_lexemes)) GLOBAL_WORDS_STAT['word_counts'].append(int(in_text_handler.text_info['words'])) GLOBAL_WORDS_STAT['dominant_counts'].append(len(dominant_lexemes)) critical_frequency_error = critical_frequency_analysis.get_critical_frequency_error(freq_dict) GLOBAL_CRITICAL_FREQUENCY_ERROR_STAT.append(critical_frequency_error)
def empirical_critical_frequency_experiment(in_text_name, in_text_handler): for dictionary in ['fs_dict', 'assoc_power_dict']: freq_dict = freq_dictionary.build_freq_dictionary(in_text_handler.sentences, dictionary) critical_freq = critical_frequency_analysis.get_empirical_critical_frequency(freq_dict.values()) chart_filename = os.path.join('./charts_empirical_%s' % dictionary, in_text_name + '.txt') write_words_chart(chart_filename, freq_dict, critical_freq)