def test_term_document_matrix2(self): input_dataframe = self.df res_bow = bow(table=input_dataframe, input_col='words', add_words=None, no_below=1, no_above=0.8, keep_n=10000)['model'] res = doc_term_mtx(table=input_dataframe, model=res_bow, input_col='words', result_type='term_doc_mtx') print(res['out_table']) table = res['out_table'].values.tolist() self.assertListEqual(table[0], ['What', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertListEqual(table[1], ['a', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertListEqual(table[2], ['life', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertListEqual(table[3], ['wonderful', 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertListEqual(table[4], ['You', 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
def test_term_document_matrix1(self): input_dataframe = self.df res_bow = bow(table=input_dataframe, input_col='words', add_words=None, no_below=1, no_above=0.8, keep_n=10000)['model'] res = doc_term_mtx(table=input_dataframe, model=res_bow, input_col='words', result_type='doc_to_bow_token') print(res['out_table']) table = res['out_table'].values.tolist() self.assertListEqual(table[0], ['doc_0', "['(What, 1)', '(a, 1)', '(life, 1)', '(wonderful, 1)']"]) self.assertListEqual(table[1], ['doc_1', "['(You, 1)', '(about, 1)', '(cried, 1)', '(know, 1)', '(may, 1)', '(reason, 1)', '(she, 1)', '(the, 1)', '(why, 1)']"]) self.assertListEqual(table[2], ['doc_2', "['(I, 1)', '(like, 1)', '(stop, 1)', '(to, 1)', '(working, 1)', '(would, 1)']"]) self.assertListEqual(table[3], ['doc_3', "['(the, 1)', '(I, 2)', '(could, 1)', '(hear, 1)', '(not, 1)', '(voice, 1)', '(wish, 1)']"]) self.assertListEqual(table[4], ['doc_4', "['(would, 1)', '(It, 1)', '(be, 1)', '(can, 1)', '(help, 1)', '(if, 1)', '(me, 1)', '(nice, 1)', '(this, 1)', '(with, 1)', '(you, 1)']"])
def test_bag_of_words2(self): input_dataframe = self.df res = bow(table=input_dataframe, input_col='words', add_words=None, no_below=2, no_above=0.7, keep_n=10)['out_table'] print(res) table = res.values.tolist() self.assertListEqual(table[0], ['know', 2]) self.assertListEqual(table[1], ['the', 2]) self.assertListEqual(table[2], ['I', 3]) self.assertListEqual(table[3], ['would', 2]) self.assertListEqual(table[4], ['me', 4])
def test_bag_of_words1(self): input_dataframe = self.df res = bow(table=input_dataframe, input_col='words', add_words=None, no_below=1, no_above=0.8, keep_n=10000)['out_table'] print(res) table = res.values.tolist() self.assertListEqual(table[0], ['What', 1]) self.assertListEqual(table[1], ['a', 1]) self.assertListEqual(table[7], ['know', 2]) self.assertListEqual(table[13], ['I', 3]) self.assertListEqual(table[14], ['like', 1])
def test_document_document_matrix1(self): input_dataframe = self.df res_bow = bow(table=input_dataframe, input_col='words', add_words=None, no_below=1, no_above=0.8, keep_n=10000)['model'] res = doc_doc_mtx(table=input_dataframe, model=res_bow, input_col='words', result_type='sparse') print(res['out_table']) table = res['out_table'].values.tolist() self.assertListEqual(table[0], [1, 3, 1]) self.assertListEqual(table[1], [1, 5, 1]) self.assertListEqual(table[2], [2, 4, 1]) self.assertListEqual(table[3], [2, 6, 1]) self.assertListEqual(table[4], [2, 3, 1])
def test_document_document_matrix2(self): input_dataframe = self.df res_bow = bow(table=input_dataframe, input_col='words', add_words=None, no_below=1, no_above=0.8, keep_n=10000)['model'] res = doc_doc_mtx(table=input_dataframe, model=res_bow, input_col='words', result_type='dense') print(res['out_table']) table = res['out_table'].values.tolist() self.assertListEqual(table[0], ['doc_0', 4, 0, 0, 0, 0, 0, 0, 0, 0, 0]) self.assertListEqual(table[1], ['doc_1', 0, 9, 0, 1, 0, 1, 0, 0, 0, 0]) self.assertListEqual(table[2], ['doc_2', 0, 0, 6, 1, 1, 0, 1, 0, 0, 0]) self.assertListEqual(table[3], ['doc_3', 0, 1, 1, 7, 0, 0, 1, 0, 0, 0]) self.assertListEqual(table[4], ['doc_4', 0, 0, 1, 0, 11, 2, 1, 2, 0, 1])