class Test(unittest.TestCase): def setUp(self): self.dir_ = data_dir + "/space_test_resources/" self.init_test_cases = [(DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], ["feat1", "feat2"], {"man":1, "car":0}, {"feat1":0, "feat2":1}, [ScalingOperation(EpmiWeighting())]), (DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], [], {"man":1, "car":0}, {}, [ScalingOperation(EpmiWeighting())])] self.m1 = np.array([[1,2,3]]) self.row1 = ["a"] self.row2 = ["a", "b", "c"] self.ft1 = ["f1","f2","f3"] self.space1 = Space(DenseMatrix(self.m1),self.row1, self.ft1) self.x = np.mat([[1,2,3],[2,4,6],[4,675,43]]) self.us = np.mat([[ 2.19272110e+00, 3.03174768e+00], [ 4.38544220e+00, 6.06349536e+00], [ 6.76369708e+02, -4.91431927e-02]]) self.space2 = Space(DenseMatrix(self.x), self.row2, self.ft1) def test_init1(self): for (m, id2row, id2col, row2id, col2id, ops) in self.init_test_cases: space_ = Space(m, id2row, id2col) self.assertIs(m, space_.cooccurrence_matrix) self.assertIs(id2row, space_.id2row) self.assertIs(id2col, space_.id2column) self.assertDictEqual(row2id, space_.row2id) self.assertDictEqual(col2id, space_.column2id) self.assertListEqual([], space_.operations) def test_init2(self): for (m, id2row, id2col, row2id, col2id, ops) in self.init_test_cases: space_ = Space(m, id2row, id2col, row2id, col2id) self.assertIs(m, space_.cooccurrence_matrix) self.assertIs(id2row, space_.id2row) self.assertIs(id2col, space_.id2column) self.assertIs(row2id, space_.row2id) self.assertIs(col2id, space_.column2id) self.assertListEqual([], space_.operations) def test_init3(self): for (m, id2row, id2col, row2id, col2id, ops) in self.init_test_cases: space_ = Space(m, id2row, id2col, operations = ops) self.assertIs(m, space_.cooccurrence_matrix) self.assertIs(id2row, space_.id2row) self.assertIs(id2col, space_.id2column) self.assertDictEqual(row2id, space_.row2id) self.assertDictEqual(col2id, space_.column2id) self.assertListEqual(ops, space_.operations) def test_init4(self): for (m, id2row, id2col, row2id, col2id, ops) in self.init_test_cases: space_ = Space(m, id2row, id2col, row2id, col2id, operations = ops) self.assertIs(m, space_.cooccurrence_matrix) self.assertIs(id2row, space_.id2row) self.assertIs(id2col, space_.id2column) self.assertIs(row2id, space_.row2id) self.assertIs(col2id, space_.column2id) self.assertIs(ops, space_.operations) def test_init_raise(self): test_cases = [(DenseMatrix(np.array([[1,2],[3,4],[5,6]])), ["car", "man"], ["feat1", "feat2"], {"man":1, "car":0}, {"feat1":0, "feat2":1}), (DenseMatrix(np.array([[1,2],[3,4]])), [], ["feat1", "feat2"], {"man":1, "car":0}, {"feat1":0, "feat2":1}), (DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], ["feat1", "feat2"], {}, {"feat1":0, "feat2":1}), (DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], ["feat1"], {"man":1, "car":0}, {"feat1":0, "feat2":1}), (DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], ["feat1"], {"man":1, "car":0}, {"feat1":0, "feat2":1}), (DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], ["feat1","feat2"], {"man":1, "car":0}, {"feat1":0}), (DenseMatrix(np.array([[1,2],[3,4]])), ["car", "man"], ["feat1","feat2"], {"man":1, "car":0}, {"feat1":1,"feat2":0}) ] for (m, id2row, id2col, row2id, col2id) in test_cases: self.assertRaises(ValueError, Space, m, id2row, id2col, row2id, col2id) def test_apply_weighting_operation(self): test_cases = [(self.space1, np.array([[1,1,1]]))] w = EpmiWeighting() for in_s, expected_mat in test_cases: out_s = in_s.apply(w) np.testing.assert_array_almost_equal(expected_mat, out_s.cooccurrence_matrix.mat, 7) self.assertListEqual(out_s.id2row, in_s.id2row) self.assertListEqual(out_s.id2column, in_s.id2column) self.assertDictEqual(out_s.row2id, in_s.row2id) self.assertDictEqual(out_s.column2id, in_s.column2id) self.assertEqual(1, len(out_s.operations)) def test_get_sim(self): test_cases = [(self.space2, None, "a", "b", 28), (self.space2, None, "a", "none", 0), (self.space2, self.space2, "a", "b", 28), (self.space2, self.space1, "a", "a", 14), (self.space2, self.space1, "a", "none", 0), (self.space2, self.space1, "none", "a", 0) ] for space, space2, w1, w2, expected_sim in test_cases: if space2 is None: sim = space.get_sim(w1, w2, DotProdSimilarity()) else: sim = space.get_sim(w1, w2, DotProdSimilarity(), space2) self.assertEqual(sim, expected_sim) def test_get_sim_raises(self): space3 = self.space2.apply(Svd(2)) self.assertRaises(ValueError, self.space2.get_sim, "a", "b", DotProdSimilarity(), space3) def test_get_neighbours(self): test_cases = [(self.space2, None, "a", 0,[]), (self.space2, None, "a", 1,[("c",1483)]), (self.space2, None, "a", 2,[("c",1483),("b",28)]), (self.space2, None, "a", 3,[("c",1483),("b",28),("a",14)]), (self.space2, None, "a", 4,[("c",1483),("b",28),("a",14)]), (self.space2, self.space1, "a", 1,[("a",14)]), (self.space2, self.space1, "a", 3,[("a",14)]) #(self.space2, self.space1, "none", 4,[]) ] for space, space2, word, no_neigh, expected in test_cases: if space2 is None: neighbours = space.get_neighbours(word, no_neigh, DotProdSimilarity()) else: neighbours = space.get_neighbours(word, no_neigh, DotProdSimilarity(), space2) self.assertListEqual(expected, neighbours) def test_get_neighbours_raises(self): space3 = self.space2.apply(Svd(2)) self.assertRaises(ValueError, self.space2.get_neighbours, "a", 2, DotProdSimilarity(), space3) def test_vstack(self): space3 = Space(DenseMatrix(self.x[0:2,:]), ["e","f"], self.ft1) space4 = Space(DenseMatrix(np.vstack((self.x, self.x[0:2,:]))), ["a", "b", "c", "e","f"], self.ft1) test_cases = [(self.space2, space3, space4)] for space1, space2, expected_space in test_cases: outcome = space1.vstack(space1, space2) np.testing.assert_array_equal(expected_space.cooccurrence_matrix.mat, outcome.cooccurrence_matrix.mat) self.assertListEqual(outcome.id2column, space1.id2column) self.assertListEqual(outcome.id2column, expected_space.id2column) self.assertDictEqual(outcome.column2id, space1.column2id) self.assertDictEqual(outcome.column2id, expected_space.column2id) self.assertListEqual(outcome.id2row, expected_space.id2row) self.assertDictEqual(outcome.row2id, expected_space.row2id) self.assertListEqual([], outcome.operations) def test_vstack_raises(self): space3 = Space(DenseMatrix(self.x[0:2,0:1]), ["e","f"], self.ft1[0:1]) space4 = Space(DenseMatrix(self.x[0:2,:]), ["a","f"], self.ft1) space5 = Space(DenseMatrix(self.x[0:2,:]), ["e","f"], []) space6 = Space(DenseMatrix(self.x[0:2,:]), ["e","f"], ["f1","f2","f4"]) test_cases = [(self.space2, space3), (self.space2, space4), (self.space2, space5), (self.space2, space6) ] for space1, space2 in test_cases: self.assertRaises(ValueError, space1.vstack, space1, space2) def test_apply_svd_reduction(self): test_cases = [(self.space2, self.us)] red = Svd(2) for in_s, expected_mat in test_cases: out_s = in_s.apply(red) np.testing.assert_array_almost_equal(expected_mat, out_s.cooccurrence_matrix.mat, 2) self.assertListEqual(out_s.id2row, in_s.id2row) self.assertListEqual(out_s.id2column, []) self.assertDictEqual(out_s.row2id, in_s.row2id) self.assertDictEqual(out_s.column2id, {}) self.assertEqual(1, len(out_s.operations)) def test_apply_nmf_reduction(self): test_cases = [(self.space2, self.us)] red = Nmf(2) for in_s, expected_mat in test_cases: out_s = in_s.apply(red) self.assertListEqual(out_s.id2row, in_s.id2row) self.assertListEqual(out_s.id2column, []) self.assertDictEqual(out_s.row2id, in_s.row2id) self.assertDictEqual(out_s.column2id, {}) self.assertEqual(1, len(out_s.operations)) def test_build_data(self): test_cases = [("data1",["red", "blue"], ["car", "man"], np.mat([[3,5],[0,10]]), np.mat([[3,5],[0,10]])), ("data2",["red"], ["car"], np.mat([[3]]), np.mat([[3]])), ("data3",["red", "blue"], ["car", "man"], np.mat([[15,0],[0,6]]), np.mat([[5,0],[0,6]])), ("data7",["red"], ["car"], np.mat([[0]]), np.mat([[0]])), ("data9",["man"], ["car"], np.mat([[4]]), None), ] for data_file, rows, cols, smat, dmat in test_cases: data_file1 = self.dir_ + data_file + ".sparse" sp = Space.build(data=data_file1, cols= self.dir_ + data_file + ".cols", format="sm") self.assertListEqual(rows, sp.id2row) self.assertListEqual(cols, sp.id2column) self.assertIsInstance(sp.cooccurrence_matrix, SparseMatrix) np.testing.assert_array_equal(smat, sp.cooccurrence_matrix.mat.todense()) data_file2 = self.dir_ + data_file + ".dense" if not dmat is None: sp = Space.build(data=data_file2, format="dm") self.assertListEqual(rows, sp.id2row) self.assertListEqual([], sp.id2column) self.assertIsInstance(sp.cooccurrence_matrix, DenseMatrix) np.testing.assert_array_equal(dmat, sp.cooccurrence_matrix.mat) def test_build_data_row(self): test_cases = [("data1", "row1.row", ["red"], ["car", "man"], np.mat([[3,5]]), np.mat([[3,5]])), ("data2", "row1.row",["red"], ["car"], np.mat([[3]]), np.mat([[3]])), ("data3", "row2.row", ["blue", "red"], ["car", "man"], np.mat([[0,6],[15,0]]), np.mat([[0,6],[5,0]])), ("data3", "row3.row", ["blue", "red"], ["car", "man"], np.mat([[0,6],[15,0]]), np.mat([[0,6],[5,0]])), ("data7", "row2.row", ["blue", "red"], ["car"], np.mat([[0],[0]]), np.mat([[0],[0]])), ] for data_file, row_file, rows, cols, smat, dmat in test_cases: row_file = self.dir_ + row_file data_file1 = self.dir_ + data_file + ".sparse" sp = Space.build(data=data_file1, rows= row_file, cols= self.dir_ + data_file + ".cols", format="sm") self.assertListEqual(rows, sp.id2row) self.assertListEqual(cols, sp.id2column) self.assertIsInstance(sp.cooccurrence_matrix, SparseMatrix) np.testing.assert_array_equal(smat, sp.cooccurrence_matrix.mat.todense()) data_file2 = self.dir_ + data_file + ".dense" sp = Space.build(data=data_file2, rows= row_file, format="dm") self.assertListEqual(rows, sp.id2row) self.assertListEqual([], sp.id2column) self.assertIsInstance(sp.cooccurrence_matrix, DenseMatrix) np.testing.assert_array_equal(dmat, sp.cooccurrence_matrix.mat) def test_build_data_row_col(self): test_cases = [("data1", "row1.row", "col1.col", ["red"], ["man", "car"], np.mat([[5,3]]), np.mat([[3,5]])), ("data1", "row1.row", "col5.col", ["red"], ["man", "car"], np.mat([[5,3]]), np.mat([[3,5]])), ("data3", "row2.row", "col2.col", ["blue", "red"], ["car"], np.mat([[0],[15]]), None), ("data2", "row1.row","col1.col", ["red"], ["man","car"], np.mat([[0,3]]), None), ("data3", "row3.row", "col3.col", ["blue", "red"], ["man", "car"], np.mat([[6,0],[0,15]]), np.mat([[0,6],[5,0]])), ("data7", "row2.row", "col3.col", ["blue", "red"], ["man", "car"], np.mat([[0,0],[0,0]]), None), ("data3", "row2.row", "col4.col", ["blue", "red"], ["airplane"], np.mat([[0],[0]]), None) ] for data_file, row_file, col_file, rows, cols, smat, dmat in test_cases: row_file = self.dir_ + row_file col_file = self.dir_ + col_file data_file1 = self.dir_ + data_file + ".sparse" if smat is None: self.assertRaises(ValueError, Space.build, data=data_file1, rows= row_file, cols=col_file, format="sm") else: sp = Space.build(data=data_file1, rows= row_file, cols=col_file, format="sm") self.assertListEqual(rows, sp.id2row) self.assertListEqual(cols, sp.id2column) self.assertIsInstance(sp.cooccurrence_matrix, SparseMatrix) np.testing.assert_array_equal(smat, sp.cooccurrence_matrix.mat.todense()) data_file2 = self.dir_ + data_file + ".dense" if dmat is None: self.assertRaises(ValueError, Space.build, data=data_file2, rows= row_file, cols=col_file, format="dm") else: sp = Space.build(data=data_file2, rows= row_file, cols=col_file, format="dm") self.assertListEqual(rows, sp.id2row) self.assertListEqual(cols, sp.id2column) self.assertIsInstance(sp.cooccurrence_matrix, DenseMatrix) np.testing.assert_array_equal(dmat, sp.cooccurrence_matrix.mat) def test_build_raises(self): dense_test_cases = ["data10.dense", "data9.dense", "data8.dense"] for data_file in dense_test_cases: data_file = self.dir_ + data_file self.assertRaises(ValueError, Space.build, data=data_file, format="dm") sparse_test_cases = ["data8.sparse", "data10.sparse"] for data_file in sparse_test_cases: data_file = self.dir_ + data_file self.assertRaises(ValueError, Space.build, data=data_file, format="sm") def reset_export_files(self, out_file): with open(out_file + ".dm", "w"): pass with open(out_file + ".row", "w"): pass with open(out_file + ".col", "w"): pass def test_export(self): out_file = self.dir_ + "tmp" mat1 = np.mat([[1,2],[3,0]]) mat1row, mat1col = ["a","b"], ["f1","f2"] mat2 = np.mat([[0,0]]) mat2row, mat2col = ["a"], [] test_cases = [(Space(DenseMatrix(mat1), mat1row, mat1col), Space(SparseMatrix(mat1), mat1row, mat1col)), (Space(DenseMatrix(mat2), mat2row, mat1col), Space(SparseMatrix(mat2), mat2row, mat1col))] #3 cases allowed at the moment for sp_d, sp_s in test_cases: self.reset_export_files(out_file) sp_d.export(out_file, format="dm") new_sp = Space.build(data=out_file + ".dm", rows=out_file + ".rows", cols=out_file + ".cols", format="dm") self._test_equal_spaces_dense(sp_d, new_sp) self.reset_export_files(out_file) sp_d.export(out_file, format="sm") new_sp = Space.build(data=out_file + ".sm", rows=out_file + ".rows", cols=out_file + ".cols", format="sm") self._test_equal_spaces_sparse(sp_s, new_sp) self.reset_export_files(out_file) sp_s.export(out_file, format="sm") new_sp = Space.build(data=out_file + ".sm", rows=out_file + ".rows", cols=out_file + ".cols", format="sm") self._test_equal_spaces_sparse(sp_s, new_sp) self.reset_export_files(out_file) sp_s.export(out_file, format="dm") new_sp = Space.build(data=out_file + ".dm", rows=out_file + ".rows", cols=out_file + ".cols", format="dm") self._test_equal_spaces_dense(sp_d, new_sp) test_cases = [(Space(DenseMatrix(mat2), mat2row, mat2col), Space(SparseMatrix(mat2), mat2row, mat2col))] for sp_d, sp_s in test_cases: self.reset_export_files(out_file) sp_d.export(out_file, format="dm") new_sp = Space.build(data=out_file + ".dm", rows=out_file + ".rows", format="dm") self._test_equal_spaces_dense(sp_d, new_sp) self.reset_export_files(out_file) sp_s.export(out_file, format="dm") new_sp = Space.build(data=out_file + ".dm", rows=out_file + ".rows", format="dm") self._test_equal_spaces_dense(sp_d, new_sp) def _test_equal_spaces_structs(self, sp, new_sp): self.assertListEqual(sp.id2row, new_sp.id2row) self.assertListEqual(sp.id2column, new_sp.id2column) self.assertDictEqual(sp.row2id, new_sp.row2id) self.assertDictEqual(sp.column2id, new_sp.column2id) def _test_equal_spaces_dense(self, sp, new_sp): self._test_equal_spaces_structs(sp, new_sp) np.testing.assert_array_equal(sp.cooccurrence_matrix.mat, new_sp.cooccurrence_matrix.mat) def _test_equal_spaces_sparse(self, sp, new_sp): self._test_equal_spaces_structs(sp, new_sp) np.testing.assert_array_equal(sp.cooccurrence_matrix.mat.todense(), new_sp.cooccurrence_matrix.mat.todense())