def test_nmf(self): test_cases = [np.mat([[1,2,3],[2,4,6],[4,17,13]], dtype = np.double), np.mat([[1,0,0]], dtype = np.double)] for in_mat in test_cases: red = Nmf(2) d_mat = DenseMatrix(in_mat) #wd_init, hd_init = red.random_init(d_mat) wd_init, hd_init = red.v_col_init(d_mat) s_mat = SparseMatrix(in_mat) ws_init = SparseMatrix(wd_init) hs_init = SparseMatrix(hd_init) wd_mat, hd_mat = Linalg.nmf(d_mat, wd_init, hd_init) ws_mat, hs_mat = Linalg.nmf(s_mat, ws_init, hs_init) #TESTED IT AGAINST MATLAB IMPLEMENTATION - ALL GOOD #print wd_mat.mat #print hd_mat.mat #print ws_mat.mat.todense() #print hs_mat.mat.todense() print "V:", in_mat print "WH:", (ws_mat*hs_mat).mat.todense() np.testing.assert_array_almost_equal(wd_mat.mat, ws_mat.mat.todense(), 2) np.testing.assert_array_almost_equal(hd_mat.mat, hs_mat.mat.todense(), 2)
def train(self, matrix_a, matrix_b): """ If cross validation is set to True, it performs generalized cross validation. (Hastie, Tibshirani and Friedman, Second edition, page 244). """ if not self._crossvalidation: return Linalg.ridge_regression(matrix_a, matrix_b, self._param, self._intercept)[0] else: min_err_param = 0 min_err = np.Inf gcv_err = np.Inf N = matrix_a.shape[0] for param in self._param_range: mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param, self._intercept) nom = pow(1 - S_trace / N, 2) * N if nom != 0: gcv_err = (err1 * err1) / nom if gcv_err < min_err: min_err = gcv_err min_err_param = param #print "lambda:", min_err_param return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param, self._intercept)[0]
def apply(self, matrix_): matrix_.assert_positive() #w_init, h_init = self.nndsvd_init(matrix_) w_init, h_init = self.v_col_init(matrix_) #w_init, h_init = self.random_init(matrix_) w, h = Linalg.nmf(matrix_, w_init, h_init) return w, Linalg.pinv(h)
def test_pinv(self): test_cases = self.pinv_test_cases for in_mat, expected_out in test_cases: out_mat = Linalg.pinv(DenseMatrix(in_mat)) np.testing.assert_array_almost_equal(out_mat.mat, expected_out, 7) out_mat = Linalg.pinv(SparseMatrix(in_mat)) np.testing.assert_array_almost_equal(out_mat.mat.todense(), expected_out, 7)
def test_sparse_svd(self): test_cases = self.svd_test_cases for x, u_expected, s_expected, v_expected in test_cases: for dim in [2,3,6]: u, s, v = Linalg.svd(SparseMatrix(x),dim) np.testing.assert_array_almost_equal(np.abs(u.mat.todense()), np.abs(u_expected), 2) np.testing.assert_array_almost_equal(np.abs(s), np.abs(s_expected), 2) np.testing.assert_array_almost_equal(np.abs(v.mat.todense()), np.abs(v_expected), 2) u, s, v = Linalg.svd(SparseMatrix(x),1) np.testing.assert_array_almost_equal(np.abs(u.mat.todense()), np.abs(u_expected[:,0:1]), 2) np.testing.assert_array_almost_equal(np.abs(s), np.abs(s_expected[0:1]), 2) np.testing.assert_array_almost_equal(np.abs(v.mat.todense()), np.abs(v_expected[:,0:1]), 2)
def test_dense_svd(self): test_cases = self.svd_test_cases for x, u_expected, s_expected, v_expected in test_cases: for dim in [2,3,6]: u, s, v = Linalg.svd(DenseMatrix(x),dim) np.testing.assert_array_almost_equal(u.mat, u_expected, 2) np.testing.assert_array_almost_equal(s, s_expected, 2) np.testing.assert_array_almost_equal(v.mat, v_expected, 2) u, s, v = Linalg.svd(DenseMatrix(x),1) np.testing.assert_array_almost_equal(u.mat, u_expected[:,0:1], 2) np.testing.assert_array_almost_equal(s, s_expected[0:1], 2) np.testing.assert_array_almost_equal(v.mat, v_expected[:, 0:1], 2)
def test_intercept_lstsq_regression(self): a = DenseMatrix(np.matrix([[1, 1],[2, 3],[4, 6]])) b = DenseMatrix(np.matrix([[12, 15, 18],[21, 27, 33],[35, 46, 57]])) res = DenseMatrix(np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]])) res1 = Linalg.lstsq_regression(a, b) res2 = Linalg.lstsq_regression(a, b, intercept=True) np.testing.assert_array_almost_equal(res2.mat[:-1,:], res[0:2,:].mat, 6) np.testing.assert_array_almost_equal(res2.mat[-1,:], res[2:3,:].mat, 6) new_a = a.hstack(DenseMatrix(np.ones((a.shape[0], 1)))) self.assertGreater(((a * res1) - b).norm(), ((new_a * res2) - b).norm())
def test_dense_svd(self): test_cases = self.svd_test_cases for x, u_expected, s_expected, v_expected in test_cases: for dim in [2, 3, 6]: u, s, v = Linalg.svd(DenseMatrix(x), dim) np.testing.assert_array_almost_equal(u.mat, u_expected, 2) np.testing.assert_array_almost_equal(s, s_expected, 2) np.testing.assert_array_almost_equal(v.mat, v_expected, 2) u, s, v = Linalg.svd(DenseMatrix(x), 1) np.testing.assert_array_almost_equal(u.mat, u_expected[:, 0:1], 2) np.testing.assert_array_almost_equal(s, s_expected[0:1], 2) np.testing.assert_array_almost_equal(v.mat, v_expected[:, 0:1], 2)
def test_sparse_lstsq_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = SparseMatrix(m) id_ = SparseMatrix.identity(m1.shape[0]) res = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res.mat.todense(), m_inv, 7) approx1 = (m1 * res).mat.todense() res2 = Linalg.lstsq_regression(m1, id_, intercept=True) new_a = m1.hstack(SparseMatrix(np.ones((m1.shape[0], 1)))) approx2 = (new_a * res2).mat.todense()
def nndsvd_init(self, matrix_): def matrix_abs(mat_): mat_p = mat_.get_non_negative() mat_n_abs = mat_p - mat_ return mat_p + mat_n_abs def padd_zeros(matrix_, axis, thickness): matrix_type = type(matrix_) if axis == 0: append_mat = matrix_type( np.zeros((thickness, matrix_.shape[1]))) return matrix_.vstack(append_mat) elif axis == 1: append_mat = matrix_type( np.zeros((matrix_.shape[0], thickness))) return matrix_.hstack(append_mat) u, s, v = Linalg.svd(matrix_, self._reduced_dimension) rank = u.shape[1] w = [[]] * rank h = [[]] * rank vt = v.transpose() w[0] = sqrt(s[0]) * matrix_abs(u[:, 0]) h[0] = sqrt(s[0]) * matrix_abs(vt[0, :]) for i in range(1, rank): uu = u[:, i] vv = vt[i, :] uup = uu.get_non_negative() uun = uup - uu vvp = vv.get_non_negative() vvn = vvp - vv n_uup = uup.norm() n_uun = uun.norm() n_vvp = vvp.norm() n_vvn = vvn.norm() termp = n_uup * n_vvp termn = n_uun * n_vvn if (termp >= termn): w[i] = sqrt(s[i] * termp) * uup / n_uup h[i] = sqrt(s[i] * termp) * vvp / n_vvp else: w[i] = sqrt(s[i] * termn) * uun / n_uun h[i] = sqrt(s[i] * termn) * vvn / n_vvn w = matrix_.nary_hstack(w) h = matrix_.nary_vstack(h) w.remove_small_values(0.0000000001) h.remove_small_values(0.0000000001) if (rank < self._reduced_dimension): w = padd_zeros(w, 1, self._reduced_dimension - rank) h = padd_zeros(h, 0, self._reduced_dimension - rank) return w, h
def train(self, matrix_a, matrix_b=None): """ If cross validation is set to True, it performs generalized cross validation. (Hastie, Tibshirani and Friedman, Second edition, page 244). TODO: not yet! """ pid = str(getpid()) logdir = 'convergence/'+pid call(['mkdir', '-p', logdir]) # mkdir -p convergence/pid self._trainId += 1 # For logging purposes if not self._crossvalidation: if self._projector: matrix_b = matrix_a W, costs = Linalg.tracenorm_regression(matrix_a, matrix_b, self._param, self._iterations, self._intercept) print_tuple_list(costs, logdir+'/'+str(self._trainId)+'-lmbd-'+str(self._param)) return W elif matrix_b == None: raise ValueError("Unable to perform cross-validation without a phrase space") else: min_err_param = 0 min_err = np.Inf gcv_err = np.Inf N = matrix_a.shape[0] for param in self._param_range: mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param, self._intercept) nom = pow(1 - S_trace / N, 2) * N if nom != 0: gcv_err = (err1 * err1) / nom if gcv_err < min_err: min_err = gcv_err min_err_param = param #print "lambda:", min_err_param return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param, self._intercept)[0]
def test_intercept_lstsq_regression(self): a = DenseMatrix(np.matrix([[1, 1], [2, 3], [4, 6]])) b = DenseMatrix(np.matrix([[12, 15, 18], [21, 27, 33], [35, 46, 57]])) res = DenseMatrix(np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]])) res1 = Linalg.lstsq_regression(a, b) res2 = Linalg.lstsq_regression(a, b, intercept=True) np.testing.assert_array_almost_equal(res2.mat[:-1, :], res[0:2, :].mat, 6) np.testing.assert_array_almost_equal(res2.mat[-1, :], res[2:3, :].mat, 6) new_a = a.hstack(DenseMatrix(np.ones((a.shape[0], 1)))) self.assertGreater(((a * res1) - b).norm(), ((new_a * res2) - b).norm())
def test_dense_lstsq_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = DenseMatrix(m) id_ = DenseMatrix.identity(m1.shape[0]) res = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res.mat, m_inv, 7)
def nndsvd_init(self,matrix_): def matrix_abs(mat_): mat_p = mat_.get_non_negative() mat_n_abs = mat_p - mat_ return mat_p + mat_n_abs def padd_zeros(matrix_, axis, thickness): matrix_type = type(matrix_) if axis == 0: append_mat = matrix_type(np.zeros((thickness, matrix_.shape[1]))) return matrix_.vstack(append_mat) elif axis == 1: append_mat = matrix_type(np.zeros((matrix_.shape[0], thickness))) return matrix_.hstack(append_mat) u, s, v = Linalg.svd(matrix_, self._reduced_dimension); rank = u.shape[1] w = [[]]*rank h = [[]]*rank vt = v.transpose() w[0] = sqrt(s[0]) * matrix_abs(u[:,0]) h[0] = sqrt(s[0]) * matrix_abs(vt[0,:]) for i in range(1,rank): uu = u[:,i] vv = vt[i,:] uup = uu.get_non_negative() uun = uup - uu vvp = vv.get_non_negative() vvn = vvp - vv n_uup = uup.norm() n_uun = uun.norm() n_vvp = vvp.norm() n_vvn = vvn.norm() termp = n_uup * n_vvp; termn = n_uun * n_vvn if (termp >= termn): w[i] = sqrt(s[i] * termp) * uup / n_uup h[i] = sqrt(s[i] * termp) * vvp / n_vvp else: w[i] = sqrt(s[i] * termn) * uun / n_uun h[i] = sqrt(s[i] * termn) * vvn / n_vvn w = matrix_.nary_hstack(w) h = matrix_.nary_vstack(h) w.remove_small_values(0.0000000001) h.remove_small_values(0.0000000001) if (rank < self._reduced_dimension): w = padd_zeros(w, 1, self._reduced_dimension - rank) h = padd_zeros(h, 0, self._reduced_dimension - rank) return w,h
def test_sparse_svd(self): test_cases = self.svd_test_cases for x, u_expected, s_expected, v_expected in test_cases: for dim in [2, 3, 6]: u, s, v = Linalg.svd(SparseMatrix(x), dim) np.testing.assert_array_almost_equal(np.abs(u.mat.todense()), np.abs(u_expected), 2) np.testing.assert_array_almost_equal(np.abs(s), np.abs(s_expected), 2) np.testing.assert_array_almost_equal(np.abs(v.mat.todense()), np.abs(v_expected), 2) u, s, v = Linalg.svd(SparseMatrix(x), 1) np.testing.assert_array_almost_equal(np.abs(u.mat.todense()), np.abs(u_expected[:, 0:1]), 2) np.testing.assert_array_almost_equal(np.abs(s), np.abs(s_expected[0:1]), 2) np.testing.assert_array_almost_equal(np.abs(v.mat.todense()), np.abs(v_expected[:, 0:1]), 2)
def test_sparse_ridge_regression(self): test_cases = self.pinv_test_cases for m, m_inv in test_cases: m1 = SparseMatrix(m) id_ = SparseMatrix.identity(m1.shape[0]) res1 = Linalg.lstsq_regression(m1, id_) np.testing.assert_array_almost_equal(res1.mat.todense(), m_inv, 7) res2 = Linalg.ridge_regression(m1, id_, 1)[0] error1 = (m1 * res1 - SparseMatrix(m_inv)).norm() error2 = (m1 * res2 - SparseMatrix(m_inv)).norm() #print "err", error1, error2 norm1 = error1 + res1.norm() norm2 = error2 + res2.norm() #print "norm", norm1, norm2 #THIS SHOULD HOLD, MAYBE ROUNDIGN ERROR? #self.assertGreaterEqual(error2, error1) self.assertGreaterEqual(norm1, norm2)
def apply(self, matrix_): u, s, v = Linalg.svd(matrix_, self._reduced_dimension) return u.scale_columns(s), v
def train(self, matrix_a, matrix_b): return Linalg.lstsq_regression(matrix_a, matrix_b, self._intercept)
def train(self, matrix_a, matrix_b=None): """ matrix_b is ignored """ W = Linalg.kronecker_product(matrix_a) return W