def test_most_similar(self): sent_ind = IndexedList(SENTENCES) sentences = IndexedLineDocument(CORPUS) m = Average(W2V) m.train(sentences) o = m.sv.most_similar(positive=0) self.assertEqual(45, o[0][0]) self.assertEqual(35, o[1][0]) o = m.sv.most_similar(positive=0, indexable=sentences) self.assertEqual("Looks good and fits snug", o[0][0]) o = m.sv.most_similar(positive=0, indexable=sent_ind) self.assertEqual("Looks good and fits snug".split(), o[0][0][0])
def _train(self): self.sens = IndexedList(self.concepts) print('training SIF...') self.se = SIF(self.w2v_model) self.se.train(self.sens)
def test__str(self): target = "[\'the dog is good\', \"it's nice and comfy\"]" self.assertEqual(target, str(IndexedList(self.list_a)))
def test_getitem_presplitted(self): l = IndexedList(self.list_c, pre_splitted=True) self.assertEqual(["the", "dog", "is", "good"], self.il.__getitem__(0).words)
def test_init_multiple_splits(self): with self.assertRaises(RuntimeError): IndexedList(self.list_a, split_func=self.list_a, pre_splitted=True)
def test__len(self): l = IndexedList(self.list_a) self.assertEqual(2, len(l))
def test_init_dict(self): tmp = {0: "hello there"} with self.assertRaises(TypeError): IndexedList(tmp)
def test_init_multiple_args(self): with self.assertRaises(RuntimeError): IndexedList(self.list_a, split=True, split_func=self.list_a)
def test_init_multiple_list(self): l = IndexedList(self.list_a, self.list_b) self.assertEqual(4, len(l))
def test_init_set(self): l = IndexedList(self.set_a)
def test_init_list(self): l = IndexedList(self.list_a)
def setUp(self): self.list_a = ["the dog is good", "it's nice and comfy"] self.list_b = ["lorem ipsum dolor", "si amet"] self.list_c = [s.split() for s in self.list_a] self.set_a = set(["hello there", "its a set"]) self.il = IndexedList(self.list_a, self.list_b, self.set_a, split=True)
class TestIndexedListFuncs(unittest.TestCase): def setUp(self): self.list_a = ["the dog is good", "it's nice and comfy"] self.list_b = ["lorem ipsum dolor", "si amet"] self.list_c = [s.split() for s in self.list_a] self.set_a = set(["hello there", "its a set"]) self.il = IndexedList(self.list_a, self.list_b, self.set_a, split=True) def test_init_list(self): l = IndexedList(self.list_a) def test_init_multiple_list(self): l = IndexedList(self.list_a, self.list_b) self.assertEqual(4, len(l)) def test_init_set(self): l = IndexedList(self.set_a) def test_init_dict(self): tmp = {0: "hello there"} with self.assertRaises(TypeError): IndexedList(tmp) def test_init_multiple_args(self): with self.assertRaises(RuntimeError): IndexedList(self.list_a, split=True, split_func=self.list_a) def test_init_multiple_splits(self): with self.assertRaises(RuntimeError): IndexedList(self.list_a, split_func=self.list_a, pre_splitted=True) def test__len(self): l = IndexedList(self.list_a) self.assertEqual(2, len(l)) def test__str(self): target = "[\'the dog is good\', \"it's nice and comfy\"]" self.assertEqual(target, str(IndexedList(self.list_a))) def test_getitem(self): self.assertEqual(["the", "dog", "is", "good"], self.il.__getitem__(0).words) self.assertEqual(0, self.il.__getitem__(0).index) def test_getitem_presplitted(self): l = IndexedList(self.list_c, pre_splitted=True) self.assertEqual(["the", "dog", "is", "good"], self.il.__getitem__(0).words) def test_delitem(self): self.il.__delitem__(0) self.assertEqual(5, len(self.il)) def test_setitem(self): self.il.__setitem__(0, "is it me?") self.assertEqual(["is", "it", "me?"], self.il[0].words) def test_setitem_wrong_dtype(self): with self.assertRaises(TypeError): self.il.__setitem__(0, ["is it me?"]) def test_append(self): self.il.append("is it me?") self.assertEqual(["is", "it", "me?"], self.il[-1].words) def test_extend(self): self.il.extend(self.list_a, self.list_b) self.assertEqual(10, len(self.il))
def test_init(self): _ = IndexedList(self.list_a)
def setUp(self): self.list_a = ["the dog is good", "it's nice and comfy"] self.list_b = [s.split() for s in self.list_a] self.il = IndexedList(self.list_a, self.list_b)