def test_count_words(self):
     self.assertEqual(count_words(["apple", "banana", "apple", "pie"]), {
         'apple': 2,
         'pie': 1,
         'banana': 1
     })
     self.assertEqual(count_words(["python", "python", "python", "ruby"]), {
         'ruby': 1,
         'python': 3
     })
Exemple #2
0
 def test_if_count_words_returns_corect_data(self):
     self.assertEqual({
         'apple': 2,
         'pie': 1,
         'banana': 1
     }, count_words(["apple", "banana", "apple", "pie"]))
     self.assertEqual({
         'ruby': 1,
         'python': 3
     }, count_words(["python", "python", "python", "ruby"]))
def test_count_words_2():
    lines = [
        "aa bB, Bb bb A a.",
        "chin chin chin!. BB",
    ]
    expected_top_three = [('bb', 4), ('chin', 3), ('a', 2)]
    assert count_words(lines)[:3] == expected_top_three
def test_count_words():
    lines = [
        "Little pig, let me come in.",
        "No, no, no, no, not by the hair on my little chin chin.",
    ]
    expected_top_three = [('no', 4), ('chin', 2), ('little', 2)]
    assert count_words(lines)[:3] == expected_top_three
Exemple #5
0
def flesch(s):
    num_of_sentances = count_words.count_sentances(s)
    tup = count_words.count_words(s)
    num_of_words = tup[0]
    num_of_syllables = tup[1]
    return k1 + k2 * (num_of_words / num_of_sentances) + k3 * (
        num_of_syllables / num_of_words)
Exemple #6
0
def test_count_words():
    lines = [
        "Little pig, let me come in.",
        "No, no, no, no, not by the hair on my little chin chin.",
    ]
    expected_top_three = [('no', 4), ('chin', 2), ('little', 2)]
    assert count_words(lines)[:3] == expected_top_three
def main():
    lt = LanguageTool('en-US')

    dir = os.fsencode(DATA_DIR_STR)
    count = {}

    for file in os.listdir(dir)[:10]:
        filename = os.fsdecode(file)
        path = os.path.join(dir, file)
        print(path)
        with open(path) as f:
            for line in f.readlines():
                print(line)
                cleaned_line = clean_text(line)
                count = count_words(cleaned_line, count)
                # matches = lt.check(line)
                # for match in matches:
                #     print(match)
        #print(os.path.join(os.fsdecode(dir), filename))
    words = sorted(count.items(), key=lambda x : x[1], reverse=True)
    for item in words:
        print(item)
    x = [i for i in range(1, len(words)+1)]
    y = [num[1] for num in words]
    plt.plot(x, y)
    plt.show()
Exemple #8
0
    def test_unique_words(self):
        correct = {
            'apple': 1,
            'orange': 1,
            'pineapple': 1
            }

        test_words = ['apple', 'orange', 'pineapple']

        self.assertEqual(correct, count_words(test_words))
def unique_words_count(arr):
    # my_dict = {}
    counter = 0
    # for word in arr:
    #     if word in my_dict:
    #         my_dict[word] += 1
    #     else:
    #         my_dict[word] = 1
    for new in count_words(arr):
        counter += 1
    return counter
Exemple #10
0
    def test_repetitive_words(self):
        correct = {
            'apple': 4,
            'strawberry': 2,
            'plum': 3
        }

        test_words = ['apple', 'apple', 'plum', 'strawberry',
                      'apple', 'strawberry', 'plum', 'apple', 'plum']

        self.assertEqual(correct, count_words(test_words))
Exemple #11
0
 def post(self):
     text = self.request.get('text')
     dictionary = {}
     if text != '':
         dictionary = count_words.count_words(text)
     ordered_list = count_words.order(dictionary)
     total_words = count_words.count_all_words(dictionary)
     different_words = len(dictionary)
     most_common = len(count_words.most_common_words(dictionary, 80))
     percentage = 0
     if different_words:
         percentage = round(100.0*most_common/different_words, 2)
     self.render("main_page.html", text=text, dictionary=dictionary, 
                 ordered_list=ordered_list, total_words=total_words, 
                 different_words=different_words, most_common=most_common,
                 percentage=percentage)
Exemple #12
0
def work_json():
    with open("files/newsafr.json", encoding='utf-8') as datafile:
        json_data = json.load(datafile)

    for headers in json_data.values():
        channel = headers['channel']
        for item in channel.values():
            if type(item) == list:
                for dct_news in item:
                    description = dct_news.get('description')
                    description = description.lower()
                    temp_file = open("temp.txt", "a", encoding = 'utf-8')
                    temp_file.write(description)
                    temp_file.close()

    output_message = c_w.count_words()
    print(output_message)
Exemple #13
0
def work_xml():
    parser = ET.XMLParser(encoding="utf-8")

    tree = ET.parse("files/newsafr.xml", parser)
    titles = []
    root = tree.getroot()
    xml_title = root.find("channel/title")
    xml_items = root.findall("channel/item")
    for xmli in xml_items:
        xml_descripction = xmli.findall("description")
        for words in xml_descripction:
            words = words.text
            find_words = words.lower()
            tmp_file = open("temp.txt", "a", encoding = 'utf-8')
            tmp_file.write(find_words)
            tmp_file.close()

    output_message = c_w.count_words()
    print(output_message)
Exemple #14
0
def get_tf(dir):
    """get tf value for  words in each files of a dir
	
	Args:
	    dir (str): directory of the files 
	
	Returns:
	    dir_tf(dict): key is file path,value is also a dict,which contains tf values for the words of the file
	"""
    from math import log
    file_list = []
    read_dir(dir, file_list)
    dir_tf = {}
    for f in file_list:
        result = count_words(f)
        total = sum(result.values())
        for word in result:
            result[word] = log(float(total) / result[word], 2)
        dir_tf[f] = result
    return dir_tf
Exemple #15
0
def get_idf(dir):
    """return idf value of all words in the files of a dir  
	
	Args:
	    dir (str):directory of the files
	
	Returns:
	    dir_idf(dict):words with their idf values
	"""
    file_list = []
    read_dir(dir, file_list)
    doc_num = len(file_list)
    dir_idf = {}
    for f in file_list:
        result = count_words(f)
        words = result.keys()
        for word in words:
            if word in dir_idf:
                dir_idf[word] += 1
            else:
                dir_idf[word] = 1
    for word in dir_idf:
        dir_idf[word] = float(dir_idf[word]) / doc_num
    return dir_idf
 def test_two(self):
     self.assertEqual({'ruby': 1, 'python': 3}, count_words(["python", "python", "python", "ruby"]))
 def test_if_count_words_returns_corect_data(self):
     self.assertEqual({'apple': 2, 'pie': 1, 'banana': 1}, count_words(["apple", "banana", "apple", "pie"]))
     self.assertEqual({'ruby': 1, 'python': 3}, count_words(["python", "python", "python", "ruby"]))
 def test_empty_list(self):
     self.assertEqual({}, count_words([]))
 def test_if_list_is_empty(self):
     self.assertEqual({}, count_words([]))
Exemple #20
0
 def test_with_3_variables(self):
     result = {'apple': 2, 'pie': 1, 'banana': 1}
     inputt = count_words(["apple", "banana", "apple", "pie"])
     self.assertEqual(inputt, result)
Exemple #21
0
 def test_with_3_variables1(self):
     result = {'ruby': 1, 'python': 3}
     inputt = count_words(["python", "python", "python", "ruby"])
     self.assertEqual(inputt, result)
Exemple #22
0
 def test_if_no_words(self):
     test_arr = []
     result = {}
     self.assertEqual(result, count_words(test_arr))
Exemple #23
0
def test_empty():
    assert count_words('') == {}
Exemple #24
0
def test_upper_case():
    assert count_words('Hello world hello') == {'hello': 2, 'world': 1}
 def test_one(self):
     self.assertEqual({'apple': 2, 'pie': 1, 'banana': 1}, count_words(["apple", "banana", "apple", "pie"]))
 def test_unique_words(self):
     unique_input = ["apple", "banana", "pie"]
     unique_output = {'apple': 1, 'pie': 1, 'banana': 1}
     self.assertEqual(unique_output, count_words(unique_input))
 def test_count_one_word(self):
     self.assertEqual({"apple": 1}, count_words(["apple"]))
 def test_count_words(self):
     self.assertEqual(3, len(count_words(["apple", "banana", "apple", "pie"])))
     self.assertEqual(2, len(count_words(["python", "python", "python", "ruby"])))
Exemple #29
0
def test_count_words():
    textfile, n = 'words.md', 5

    expected_top_five = ['the', 'a', 'of', 'and', 'file']
    assert count_words(textfile, n)[:5] == expected_top_five
Exemple #30
0
def test_punctuation():
    assert count_words('hello, world hello!') == {'hello': 2, 'world': 1}
Exemple #31
0
 def invoke_result(num):
     return cw.count_words(
         "betty bought a bit of butter but the butter was bitter", num)
Exemple #32
0
def test_hello_world():
    assert count_words('hello world') == {'hello': 1, 'world': 1}
 def test_output(self):
     self.assertEqual(count_words(["apple", "banana", "apple", "pie"]), {'apple': 2, 'pie': 1, 'banana': 1})
     self.assertEqual(count_words(["python", "python", "python", "ruby"]), {'ruby': 1, 'python': 3})
     self.assertEqual(count_words([]), {})
Exemple #34
0
 def test_count_words(self):
     test_arr = ["heaSS", "ASAS", "A", "A"]
     result = {"heaSS": 1, "ASAS": 1, "A": 2}
     self.assertEqual(result, count_words(test_arr))
 def test_empty_list(self):
     self.assertEqual({}, count_words([]))
Exemple #36
0
 def test_with_no_variables(self):
     result = {'':1}
     inputt = count_words([""])
     self.assertEqual(inputt, result)
def unique_words_count(arr):
    words = count_words(arr)

    return len(words)
 def test_if_the_word_count_is_true(self):
     self.assertEqual({"apple": 2, "banana": 2, "pie": 1}, count_words([
         "apple", "banana", "apple", "pie", "banana"]))
Exemple #39
0
 def test_if_word_is_added(self):
     self.assertEqual({'kaluf': 1}, count_words(["kaluf"]))
 def test_if_the_list_is_not_empty(self):
     self.assertNotEqual({}, count_words([
         "apple", "banana", "apple", "pie", "banana"]))
Exemple #41
0
 def test_two_words_if_added(self):
     self.assertEqual(
         {'kaluf': 1, "4u6ka": 1}, count_words(["kaluf", "4u6ka"]))
 def test_no_word(self):
     self.assertEqual({}, count_words([]))
 def test_count_more_different_word(self):
     self.assertEqual({"apple": 1, "ass": 1}, count_words(["apple", "ass"]))
Exemple #44
0
 def test_if_word_second_added(self):
     self.assertEqual({'kaluf': 2}, count_words(["kaluf", "kaluf"]))
	def test_arr(self):
		result={"python":2}
		self.assertEqual(count_words(["python", "python"]),result)
Exemple #46
0
 def test_zero_dictionary(self):
     self.assertEqual({}, count_words([]))
Exemple #47
0
def unique_words_count(arr):
    return len(count_words(arr))
	def test_empty_arr(self):
		result= {}
		self.assertEqual(result,count_words([]))
Exemple #49
0
def test_multiple_words():
    assert count_words('hello world hello') == {'hello': 2, 'world': 1}