def frequency(words, alphabet): ''' Returns a dictionary of alphabets with their frequencies. :param words: dictionary of named tuples containing the names, years, and times used :param alphabet: dictionary of the alphabet as keys and 0 as the values :return: alphabet with updated values as frequencies ''' used = [] occurences = 0 for x in words: if words[ x] not in used: #if the word hasn't been run through word_count counter = word_count.count(words, words[x].word) used.append(words[x]) #assign the word to used currentword = words[x].word for char in currentword: #goes through every single letter in the current word alphabet[ char] += counter #assigns the times it was used to the letter's corresponding value occurences += counter #adds counter to total number of times a letter was counted for char in "abcdefghijklmnopqrstuvwxyz": alphabet[char] = alphabet[ char] / occurences #updates the values by dividing the individual number of times a letter was used by total occurences to get frequencies. return alphabet
def test_word_3(self): self.assertEqual(word_count.count(""), 0)
def test_word(self): self.assertEqual(word_count.count("Hello from the other side"), 5)
def test_false2(self): self.assertNotEqual(word_count.count("All the worlds a stage"), 6)
def test_true1(self): self.assertEqual(word_count.count("To be or not to be"), 6)
def test_count_single_letter_word(): assert word_count.count ("A") == 1
def test_true3(self): self.assertEqual(word_count.count("Hello there"), 2)
def test_non_letters(self): self.assertEqual(count('word,test, word'), {'word': 2, 'test': 1})
def test_double_barrelled(self): self.assertEqual(count('word-count'), {'word-count': 1})
def test_multi_spaces(self): self.assertEqual(count('word test'), {'word': 1, 'test': 1})
def test_newline(self): self.assertEqual(count('word\ntest'), {'word': 1, 'test': 1})
def test_spaces(self): self.assertEqual(count(' '), {})
def test_containing_words(self): self.assertEqual(count('test tested'), {'test': 1, 'tested': 1})
def test_case_insensitive(self): self.assertEqual(count('word Word'), {'word': 2})
def test_two_words(self): self.assertEqual(count('word word test'), {'word': 2, 'test': 1})
def test_triple_barrelled(self): self.assertEqual(count('over-the-top'), {'over-the-top': 1})
def test_count_sentence(): assert word_count.count ("I was wondering after all these years if you'd like to meet") == 12
def test_empty_string(self): self.assertEqual(count(""), {})
def test_true2(self): self.assertEqual(word_count.count(""), 0)
def test_one_word(self): self.assertEqual(count('word'), {'word': 1})
def test_false1(self): self.assertNotEqual(word_count.count("Hello"), 2)
def test_it_counts_the_words_and_stores_results_in_a_list(): results = wc.count(data['word_list']) assert results == data['word_dict']
def test_false3(self): self.assertNotEqual(word_count.count("Me Myself and I"), 3)
def test_answer6(): assert word_count.count("12 33 21") == 2
def test_add2(self): self.assertEqual(word_count.count("I like pie."), 4)
def test_answer2(): assert word_count.count("aba") == 2
def test_word_4(self): self.assertEqual(word_count.count("Hello,There"), 2)
def test_count_not_a_word(): assert word_count.count ("_") == 0
def test_word_2(self): self.assertEqual(word_count.count("I"), 1)
def test_count_comma_no_space(): assert word_count.count ("General,Kenobi") == 2
import pprint bug_xml = 'bugreports.xml' summary_xml = 'annotation.xml' bug_reports, report_structure = get_bugs(bug_xml) bug_reports = clean(bug_reports) pp = pprint.PrettyPrinter(indent=4) # pp.pprint(report_structure[0]) # pp.pprint(bug_reports[20]) # word_count has the word count of the bug report # sentence_word_count has the word count of each sentence word_count, sentence_word_count = count(bug_reports) # pp.pprint(sentence_word_count[10]) #df_summary contains a dataframe for extractive summary (GSS) of each bug report ext_summary = get_summary(summary_xml, report_structure) df_summary = summary(ext_summary) # pp.pprint(df_summary[20]) # pp.pprint(df_summary[35]) #this function creates a dataframe for each bug report df_len = length_features(bug_reports) # pp.pprint(df_len[11])