def test_count_words(self): self.assertEqual(count_words(["apple", "banana", "apple", "pie"]), { 'apple': 2, 'pie': 1, 'banana': 1 }) self.assertEqual(count_words(["python", "python", "python", "ruby"]), { 'ruby': 1, 'python': 3 })
def test_if_count_words_returns_corect_data(self): self.assertEqual({ 'apple': 2, 'pie': 1, 'banana': 1 }, count_words(["apple", "banana", "apple", "pie"])) self.assertEqual({ 'ruby': 1, 'python': 3 }, count_words(["python", "python", "python", "ruby"]))
def test_count_words_2(): lines = [ "aa bB, Bb bb A a.", "chin chin chin!. BB", ] expected_top_three = [('bb', 4), ('chin', 3), ('a', 2)] assert count_words(lines)[:3] == expected_top_three
def test_count_words(): lines = [ "Little pig, let me come in.", "No, no, no, no, not by the hair on my little chin chin.", ] expected_top_three = [('no', 4), ('chin', 2), ('little', 2)] assert count_words(lines)[:3] == expected_top_three
def flesch(s): num_of_sentances = count_words.count_sentances(s) tup = count_words.count_words(s) num_of_words = tup[0] num_of_syllables = tup[1] return k1 + k2 * (num_of_words / num_of_sentances) + k3 * ( num_of_syllables / num_of_words)
def main(): lt = LanguageTool('en-US') dir = os.fsencode(DATA_DIR_STR) count = {} for file in os.listdir(dir)[:10]: filename = os.fsdecode(file) path = os.path.join(dir, file) print(path) with open(path) as f: for line in f.readlines(): print(line) cleaned_line = clean_text(line) count = count_words(cleaned_line, count) # matches = lt.check(line) # for match in matches: # print(match) #print(os.path.join(os.fsdecode(dir), filename)) words = sorted(count.items(), key=lambda x : x[1], reverse=True) for item in words: print(item) x = [i for i in range(1, len(words)+1)] y = [num[1] for num in words] plt.plot(x, y) plt.show()
def test_unique_words(self): correct = { 'apple': 1, 'orange': 1, 'pineapple': 1 } test_words = ['apple', 'orange', 'pineapple'] self.assertEqual(correct, count_words(test_words))
def unique_words_count(arr): # my_dict = {} counter = 0 # for word in arr: # if word in my_dict: # my_dict[word] += 1 # else: # my_dict[word] = 1 for new in count_words(arr): counter += 1 return counter
def test_repetitive_words(self): correct = { 'apple': 4, 'strawberry': 2, 'plum': 3 } test_words = ['apple', 'apple', 'plum', 'strawberry', 'apple', 'strawberry', 'plum', 'apple', 'plum'] self.assertEqual(correct, count_words(test_words))
def post(self): text = self.request.get('text') dictionary = {} if text != '': dictionary = count_words.count_words(text) ordered_list = count_words.order(dictionary) total_words = count_words.count_all_words(dictionary) different_words = len(dictionary) most_common = len(count_words.most_common_words(dictionary, 80)) percentage = 0 if different_words: percentage = round(100.0*most_common/different_words, 2) self.render("main_page.html", text=text, dictionary=dictionary, ordered_list=ordered_list, total_words=total_words, different_words=different_words, most_common=most_common, percentage=percentage)
def work_json(): with open("files/newsafr.json", encoding='utf-8') as datafile: json_data = json.load(datafile) for headers in json_data.values(): channel = headers['channel'] for item in channel.values(): if type(item) == list: for dct_news in item: description = dct_news.get('description') description = description.lower() temp_file = open("temp.txt", "a", encoding = 'utf-8') temp_file.write(description) temp_file.close() output_message = c_w.count_words() print(output_message)
def work_xml(): parser = ET.XMLParser(encoding="utf-8") tree = ET.parse("files/newsafr.xml", parser) titles = [] root = tree.getroot() xml_title = root.find("channel/title") xml_items = root.findall("channel/item") for xmli in xml_items: xml_descripction = xmli.findall("description") for words in xml_descripction: words = words.text find_words = words.lower() tmp_file = open("temp.txt", "a", encoding = 'utf-8') tmp_file.write(find_words) tmp_file.close() output_message = c_w.count_words() print(output_message)
def get_tf(dir): """get tf value for words in each files of a dir Args: dir (str): directory of the files Returns: dir_tf(dict): key is file path,value is also a dict,which contains tf values for the words of the file """ from math import log file_list = [] read_dir(dir, file_list) dir_tf = {} for f in file_list: result = count_words(f) total = sum(result.values()) for word in result: result[word] = log(float(total) / result[word], 2) dir_tf[f] = result return dir_tf
def get_idf(dir): """return idf value of all words in the files of a dir Args: dir (str):directory of the files Returns: dir_idf(dict):words with their idf values """ file_list = [] read_dir(dir, file_list) doc_num = len(file_list) dir_idf = {} for f in file_list: result = count_words(f) words = result.keys() for word in words: if word in dir_idf: dir_idf[word] += 1 else: dir_idf[word] = 1 for word in dir_idf: dir_idf[word] = float(dir_idf[word]) / doc_num return dir_idf
def test_two(self): self.assertEqual({'ruby': 1, 'python': 3}, count_words(["python", "python", "python", "ruby"]))
def test_if_count_words_returns_corect_data(self): self.assertEqual({'apple': 2, 'pie': 1, 'banana': 1}, count_words(["apple", "banana", "apple", "pie"])) self.assertEqual({'ruby': 1, 'python': 3}, count_words(["python", "python", "python", "ruby"]))
def test_empty_list(self): self.assertEqual({}, count_words([]))
def test_if_list_is_empty(self): self.assertEqual({}, count_words([]))
def test_with_3_variables(self): result = {'apple': 2, 'pie': 1, 'banana': 1} inputt = count_words(["apple", "banana", "apple", "pie"]) self.assertEqual(inputt, result)
def test_with_3_variables1(self): result = {'ruby': 1, 'python': 3} inputt = count_words(["python", "python", "python", "ruby"]) self.assertEqual(inputt, result)
def test_if_no_words(self): test_arr = [] result = {} self.assertEqual(result, count_words(test_arr))
def test_empty(): assert count_words('') == {}
def test_upper_case(): assert count_words('Hello world hello') == {'hello': 2, 'world': 1}
def test_one(self): self.assertEqual({'apple': 2, 'pie': 1, 'banana': 1}, count_words(["apple", "banana", "apple", "pie"]))
def test_unique_words(self): unique_input = ["apple", "banana", "pie"] unique_output = {'apple': 1, 'pie': 1, 'banana': 1} self.assertEqual(unique_output, count_words(unique_input))
def test_count_one_word(self): self.assertEqual({"apple": 1}, count_words(["apple"]))
def test_count_words(self): self.assertEqual(3, len(count_words(["apple", "banana", "apple", "pie"]))) self.assertEqual(2, len(count_words(["python", "python", "python", "ruby"])))
def test_count_words(): textfile, n = 'words.md', 5 expected_top_five = ['the', 'a', 'of', 'and', 'file'] assert count_words(textfile, n)[:5] == expected_top_five
def test_punctuation(): assert count_words('hello, world hello!') == {'hello': 2, 'world': 1}
def invoke_result(num): return cw.count_words( "betty bought a bit of butter but the butter was bitter", num)
def test_hello_world(): assert count_words('hello world') == {'hello': 1, 'world': 1}
def test_output(self): self.assertEqual(count_words(["apple", "banana", "apple", "pie"]), {'apple': 2, 'pie': 1, 'banana': 1}) self.assertEqual(count_words(["python", "python", "python", "ruby"]), {'ruby': 1, 'python': 3}) self.assertEqual(count_words([]), {})
def test_count_words(self): test_arr = ["heaSS", "ASAS", "A", "A"] result = {"heaSS": 1, "ASAS": 1, "A": 2} self.assertEqual(result, count_words(test_arr))
def test_with_no_variables(self): result = {'':1} inputt = count_words([""]) self.assertEqual(inputt, result)
def unique_words_count(arr): words = count_words(arr) return len(words)
def test_if_the_word_count_is_true(self): self.assertEqual({"apple": 2, "banana": 2, "pie": 1}, count_words([ "apple", "banana", "apple", "pie", "banana"]))
def test_if_word_is_added(self): self.assertEqual({'kaluf': 1}, count_words(["kaluf"]))
def test_if_the_list_is_not_empty(self): self.assertNotEqual({}, count_words([ "apple", "banana", "apple", "pie", "banana"]))
def test_two_words_if_added(self): self.assertEqual( {'kaluf': 1, "4u6ka": 1}, count_words(["kaluf", "4u6ka"]))
def test_no_word(self): self.assertEqual({}, count_words([]))
def test_count_more_different_word(self): self.assertEqual({"apple": 1, "ass": 1}, count_words(["apple", "ass"]))
def test_if_word_second_added(self): self.assertEqual({'kaluf': 2}, count_words(["kaluf", "kaluf"]))
def test_arr(self): result={"python":2} self.assertEqual(count_words(["python", "python"]),result)
def test_zero_dictionary(self): self.assertEqual({}, count_words([]))
def unique_words_count(arr): return len(count_words(arr))
def test_empty_arr(self): result= {} self.assertEqual(result,count_words([]))
def test_multiple_words(): assert count_words('hello world hello') == {'hello': 2, 'world': 1}