def test_space_compose_dense(self): test_cases = [ ([("a", "b", "a_b")], self.space4, self.space5, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a", "b", "a_b")], self.space4, self.space6, np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]])), ([("a", "b", "a_b"), ("a", "b", "a_a")], self.space4, self.space7, DenseMatrix.identity(2), DenseMatrix.identity(2)), ] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal( comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, []) self.assertDictEqual(comp_space.column2id, {}) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertFalse(comp_model._has_intercept)
def test_space_compose_sparse(self): #WHAT TO DO HERE??? #PARAMETERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE?? test_cases = [([("a", "b", "a_b")], self.space1, self.space2, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a", "b", "a_b")], self.space1, self.space3, np.mat([[0, 0], [0, 0]]), np.mat([[0, 0], [0, 0]]))] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal( comp_space.cooccurrence_matrix.mat.todense(), phrase_space.cooccurrence_matrix.mat.todense(), 10)
def test_dense_lstsq_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = DenseMatrix(m) id_ = DenseMatrix.identity(m1.shape[0]) res = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res.mat, m_inv, 7)
def test_space_compose_dense(self): test_cases = [([("a","b","a_b")], self.space4, self.space5, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a","b","a_b")], self.space4, self.space6, np.mat([[0,0],[0,0]]), np.mat([[0,0],[0,0]])), ([("a","b","a_b"),("a","b","a_a")], self.space4, self.space7, DenseMatrix.identity(2), DenseMatrix.identity(2)), ] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat, phrase_space.cooccurrence_matrix.mat, 10) self.assertListEqual(comp_space.id2column, []) self.assertDictEqual(comp_space.column2id, {}) self.assertListEqual(comp_space.id2row, phrase_space.id2row) self.assertDictEqual(comp_space.row2id, phrase_space.row2id) self.assertFalse(comp_model._has_intercept)
def test_space_compose_sparse(self): #WHAT TO DO HERE??? #PARAMTERS ARE GIVEN AS DENSE MATRICES, INPUT DATA AS SPARSE?? test_cases = [([("a","b","a_b")], self.space1, self.space2, DenseMatrix.identity(2), DenseMatrix.identity(2)), ([("a","b","a_b")], self.space1, self.space3, np.mat([[0,0],[0,0]]), np.mat([[0,0],[0,0]])) ] for in_data, arg_space, phrase_space, mat_a, mat_b in test_cases: comp_model = FullAdditive(A=mat_a, B=mat_b) comp_space = comp_model.compose(in_data, arg_space) np.testing.assert_array_almost_equal(comp_space.cooccurrence_matrix.mat.todense(), phrase_space.cooccurrence_matrix.mat.todense(), 10)
def test_dense_ridge_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = DenseMatrix(m) id_ = DenseMatrix.identity(m1.shape[0]) res1 = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res1.mat, m_inv, 7) res2 = Linalg.ridge_regression(m1, id_, 1)[0] error1 = (m1 * res1 - DenseMatrix(m_inv)).norm() error2 = (m1 * res2 - DenseMatrix(m_inv)).norm() #print "err", error1, error2 norm1 = error1 + res1.norm() norm2 = error2 + res2.norm() #print "norm", norm1, norm2 #THIS SHOULD HOLD, BUT DOES NOT, MAYBE ROUNDING ERROR? #self.assertGreaterEqual(error2, error1) self.assertGreaterEqual(norm1, norm2)
def train(self, matrix_a, matrix_b=None): """ matrix_b is ignored """ W = DenseMatrix.identity(matrix_a.shape[1]) return W