def test_chain_union(self): '''Test the WordChain's union method. ''' doc_path_list = [] for i in range(3): file_name = 'test_data{0:d}.txt'.format((i + 1)) doc_path_list.append(os.path.join(DATA_FILE_DIR, file_name)) doc_list = (process_doc(doc_path_list[i], i + 1) for i in range(3)) iitable = build_iitable(build_sitable(doc_list)) chain = iitable['a'].union(iitable['in']) chain_str = '(a OR in, freq:3) * --> 1 --> 2 --> 3' self.assertEqual(str(chain), chain_str) chain = iitable['a'].union(iitable['deepen']) chain_str = '(a OR deepen, freq:3) * --> 1 --> 2 --> 3' self.assertEqual(str(chain), chain_str) chain = iitable['a'].union(WordChain('test')) chain_str = '(a OR test, freq:3) * --> 1 --> 2 --> 3' self.assertEqual(str(chain), chain_str) chain = WordChain('test').union(iitable['a']) chain_str = '(test OR a, freq:3) * --> 1 --> 2 --> 3' self.assertEqual(str(chain), chain_str) chain = iitable['nature'].union(iitable['a']) chain_str = '(nature OR a, freq:3) * --> 1 --> 2 --> 3' self.assertEqual(str(chain), chain_str) with self.assertRaises(ValueError): iitable['a'].union(iitable['a'])
def get_iitable(): '''Build the inverted index table. Returns: An iitable. ''' doc_path_list = [] for i in range(3): doc_path_list.append(doc_loc(i + 1)) doc_list = (process_doc(doc_path_list[i], i + 1) for i in range(3)) iitable = build_iitable(build_sitable(doc_list)) return iitable
def test_build_iitable(self): '''Test the build_iitable function. ''' doc_path_list = [] for i in range(3): file_name = 'test_data{0:d}.txt'.format((i + 1)) doc_path_list.append(os.path.join(DATA_FILE_DIR, file_name)) doc_list = (process_doc(doc_path_list[i], i + 1) for i in range(3)) iitable = build_iitable(build_sitable(doc_list)) chain_a = iitable['a'] self.assertEqual('a', chain_a.word) self.assertEqual(chain_a.freq, 3) chain_deepen = iitable['deepen'] self.assertEqual(chain_deepen.word, 'deepen') self.assertEqual(chain_deepen.freq, 1)