def test_support_of_custom_tokenizer(self): data = 'a a b b c' def tokenizer(text: str) -> list: return text.split() w_c = word_count(data, tokenizer=tokenizer) self.assertDictEqual({'a': 2, 'b': 2, 'c': 1}, w_c)
def summarize_text(text: AnyStr, n_common_words: int = 10) -> str: """Returns a string was made of most common words of given text :param text: textual data :type text: str :param n_common_words: count of words to make sentence :return: summary of the text :rtype: str """ def tokenizer(text): return regexp_tokenize(text, "[\w']+") word_frequencies = word_count(text, tokenizer) common = [w for w, f in word_frequencies.most_common(n_common_words)] return " ".join(common).capitalize() + "."
def test_best_case(self): data = 'a a b b c' w_c = word_count(data) self.assertDictEqual({'a': 2, 'b': 2, 'c': 1}, w_c)
args = parser.parse_args() task_num = args.task fib_count = args.fib_count path = args.path data = args.data except Exception: print("Incorrect args") sys.exit() try: if task_num == 1: if path: text = open_as_text(path) else: text = data word_counts = word_count(text) print_word_count_result(word_counts) elif task_num == 2: if path: text = open_as_text(path) else: text = data sentence = summarize_text(text) print(sentence) elif task_num == 3: if path: nums = open_as_list_of_ints(path) else: nums = [int(s_i) for s_i in data.split(' ')] sorted_nums = quick_sort(nums) print_list_of_ints(sorted_nums)