def test_check_calculate_tf_idf_no_tf(self): """check tf_idf calculation no tf""" clean_texts = [] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_values = [] tf_instance.idf_values = { 'this': math.log(3 / 2), 'is': math.log(3 / 3), 'an': math.log(3 / 1), 'example': math.log(3 / 1), 'of': math.log(3 / 1), 'test': math.log(3 / 2), 'text': math.log(3 / 2), 'contains': math.log(3 / 1), 'two': math.log(3 / 1), 'sentences': math.log(3 / 1), 'written': math.log(3 / 1), 'on': math.log(3 / 1), 'english': math.log(3 / 1), 'simple': math.log(3 / 1), 'third': math.log(3 / 1), 'one': math.log(3 / 1), 'there': math.log(3 / 1), 'no': math.log(3 / 1), 'much': math.log(3 / 1), 'sense': math.log(3 / 1), } expected_res = [] tf_instance.calculate() self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
def test_check_calculate_tf_idf_none_tf(self): """check tf_idf calculation none tf_none""" clean_texts = [] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_values = None expected_res = [] tf_instance.calculate() self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
def test_report_on_word_not_in_tfidf(self): """Check report_on none tf_idf""" clean_texts = [ ['this', 'is', 'an', 'example', 'of', 'test', 'text', 'it', 'contains', 'two', 'sentences'], ['das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf', 'deutsch', 'geschrieben'] ] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_idf_values = None tf_instance.calculate() res = tf_instance.report_on('wtf', 0) exp_res = () self.assertEqual(res, exp_res)
def test_report_on_index_bigger(self): """Check report_on invalid doc index""" clean_texts = [ ['this', 'is', 'an', 'example', 'of', 'test', 'text', 'it', 'contains', 'two', 'sentences'], ['das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf', 'deutsch', 'geschrieben'] ] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_idf_values = [ { 'this': 10, 'that': 9, 'another': 5 } ] tf_instance.calculate() res = tf_instance.report_on('this', 2) exp_res = () self.assertEqual(res, exp_res)
def test_check_calculate_tf_idf_no_idf(self): """check tf_idf calculation no idf""" clean_texts = [] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_values = [{ 'this': 1 / 11, 'is': 1 / 11, 'an': 1 / 11, 'example': 1 / 11, 'of': 1 / 11, 'test': 1 / 11, 'text': 2 / 11, 'contains': 1 / 11, 'two': 1 / 11, 'sentences': 1 / 11 }, { 'this': 1 / 12, 'is': 3 / 12, 'test': 1 / 12, 'text': 3 / 12, 'written': 1 / 12, 'on': 1 / 12, 'english': 1 / 12, 'simple': 1 / 12 }, { 'there': 1 / 5, 'is': 1 / 5, 'no': 1 / 5, 'much': 1 / 5, 'sense': 1 / 5 }] tf_instance.idf_values = {} expected_res = [] tf_instance.calculate() self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
def test_check_calculate_tf_idf_ideal(self): """check tf_idf calculation ideal case""" clean_texts = [] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_values = [{ 'this': 1 / 11, 'is': 1 / 11, 'an': 1 / 11, 'example': 1 / 11, 'of': 1 / 11, 'test': 1 / 11, 'text': 2 / 11, 'contains': 1 / 11, 'two': 1 / 11, 'sentences': 1 / 11 }, { 'this': 1 / 12, 'is': 3 / 12, 'test': 1 / 12, 'text': 3 / 12, 'written': 1 / 12, 'on': 1 / 12, 'english': 1 / 12, 'simple': 1 / 12 }, { 'there': 1 / 5, 'is': 1 / 5, 'no': 1 / 5, 'much': 1 / 5, 'sense': 1 / 5 }] tf_instance.idf_values = { 'this': math.log(3 / 2), 'is': math.log(3 / 3), 'an': math.log(3 / 1), 'example': math.log(3 / 1), 'of': math.log(3 / 1), 'test': math.log(3 / 2), 'text': math.log(3 / 2), 'contains': math.log(3 / 1), 'two': math.log(3 / 1), 'sentences': math.log(3 / 1), 'written': math.log(3 / 1), 'on': math.log(3 / 1), 'english': math.log(3 / 1), 'simple': math.log(3 / 1), 'third': math.log(3 / 1), 'one': math.log(3 / 1), 'there': math.log(3 / 1), 'no': math.log(3 / 1), 'much': math.log(3 / 1), 'sense': math.log(3 / 1), } expected_res = [{ 'this': (1 / 11) * math.log(3 / 2), 'is': (1 / 11) * math.log(3 / 3), 'an': (1 / 11) * math.log(3 / 1), 'example': (1 / 11) * math.log(3 / 1), 'of': (1 / 11) * math.log(3 / 1), 'test': (1 / 11) * math.log(3 / 2), 'text': (2 / 11) * math.log(3 / 2), 'contains': (1 / 11) * math.log(3 / 1), 'two': (1 / 11) * math.log(3 / 1), 'sentences': (1 / 11) * math.log(3 / 1) }, { 'this': 1 / 12 * math.log(3 / 2), 'is': 3 / 12 * math.log(3 / 3), 'test': 1 / 12 * math.log(3 / 2), 'text': 3 / 12 * math.log(3 / 2), 'written': 1 / 12 * math.log(3 / 1), 'on': 1 / 12 * math.log(3 / 1), 'english': 1 / 12 * math.log(3 / 1), 'simple': 1 / 12 * math.log(3 / 1) }, { 'there': 1 / 5 * math.log(3 / 1), 'is': 1 / 5 * math.log(3 / 3), 'no': 1 / 5 * math.log(3 / 1), 'much': 1 / 5 * math.log(3 / 1), 'sense': 1 / 5 * math.log(3 / 1) }] tf_instance.calculate() self.assertCountEqual(tf_instance.tf_idf_values, expected_res)
from lab_4.main import TfIdfCalculator clean_texts = [[ 'this', 'is', 'an', 'example', 'of', 'test', 'text', 'it', 'contains', 'two', 'sentences' ], [ 'das', 'ist', 'ein', 'testtext', 'es', 'ist', 'auf', 'deutsch', 'geschrieben' ]] tf_instance = TfIdfCalculator(clean_texts) tf_instance.tf_idf_values = [{'this': 10, 'that': 9, 'another': 5}] tf_instance.calculate() res = tf_instance.report_on('this', 0) print(res)