def test_nmf(self):
        test_cases = [np.mat([[1,2,3],[2,4,6],[4,17,13]], dtype = np.double),
                      np.mat([[1,0,0]], dtype = np.double)]

        for in_mat in test_cases:
            red = Nmf(2)
            d_mat = DenseMatrix(in_mat)
            #wd_init, hd_init = red.random_init(d_mat)
            wd_init, hd_init = red.v_col_init(d_mat)

            s_mat = SparseMatrix(in_mat)
            ws_init = SparseMatrix(wd_init)
            hs_init = SparseMatrix(hd_init)

            wd_mat, hd_mat = Linalg.nmf(d_mat, wd_init, hd_init)
            ws_mat, hs_mat = Linalg.nmf(s_mat, ws_init, hs_init)

            #TESTED IT AGAINST MATLAB IMPLEMENTATION - ALL GOOD
            #print wd_mat.mat
            #print hd_mat.mat
            #print ws_mat.mat.todense()
            #print hs_mat.mat.todense()
            print "V:", in_mat
            print "WH:", (ws_mat*hs_mat).mat.todense()

            np.testing.assert_array_almost_equal(wd_mat.mat,
                                                 ws_mat.mat.todense(), 2)
            np.testing.assert_array_almost_equal(hd_mat.mat,
                                                 hs_mat.mat.todense(), 2)
    def train(self, matrix_a, matrix_b):
        """
        If cross validation is set to True, it performs generalized
        cross validation. (Hastie, Tibshirani and Friedman, Second edition,
        page 244).
        """

        if not self._crossvalidation:
            return Linalg.ridge_regression(matrix_a, matrix_b, self._param,
                                           self._intercept)[0]

        else:
            min_err_param = 0
            min_err = np.Inf
            gcv_err = np.Inf

            N = matrix_a.shape[0]
            for param in self._param_range:

                mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param,
                                                               self._intercept)

                nom = pow(1 - S_trace / N, 2) * N
                if nom != 0:
                    gcv_err = (err1 * err1) / nom

                if gcv_err < min_err:
                    min_err = gcv_err
                    min_err_param = param

            #print "lambda:", min_err_param
            return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param,
                                           self._intercept)[0]
    def test_nmf(self):
        test_cases = [np.mat([[1,2,3],[2,4,6],[4,17,13]], dtype = np.double),
                      np.mat([[1,0,0]], dtype = np.double)]
        
        for in_mat in test_cases:
            red = Nmf(2)
            d_mat = DenseMatrix(in_mat)
            #wd_init, hd_init = red.random_init(d_mat)
            wd_init, hd_init = red.v_col_init(d_mat)
            
            s_mat = SparseMatrix(in_mat)
            ws_init = SparseMatrix(wd_init)
            hs_init = SparseMatrix(hd_init)
            
            wd_mat, hd_mat = Linalg.nmf(d_mat, wd_init, hd_init)
            ws_mat, hs_mat = Linalg.nmf(s_mat, ws_init, hs_init)
            
            #TESTED IT AGAINST MATLAB IMPLEMENTATION - ALL GOOD            
            #print wd_mat.mat
            #print hd_mat.mat
            #print ws_mat.mat.todense()
            #print hs_mat.mat.todense() 
            print "V:", in_mat
            print "WH:", (ws_mat*hs_mat).mat.todense()

            np.testing.assert_array_almost_equal(wd_mat.mat,
                                                 ws_mat.mat.todense(), 2)
            np.testing.assert_array_almost_equal(hd_mat.mat,
                                                 hs_mat.mat.todense(), 2)
Exemple #4
0
    def train(self, matrix_a, matrix_b):
        """
        If cross validation is set to True, it performs generalized
        cross validation. (Hastie, Tibshirani and Friedman, Second edition,
        page 244).
        """

        if not self._crossvalidation:
            return Linalg.ridge_regression(matrix_a, matrix_b, self._param,
                                           self._intercept)[0]

        else:
            min_err_param = 0
            min_err = np.Inf
            gcv_err = np.Inf

            N = matrix_a.shape[0]
            for param in self._param_range:

                mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param,
                                                               self._intercept)

                nom = pow(1 - S_trace / N, 2) * N
                if nom != 0:
                    gcv_err = (err1 * err1) / nom

                if gcv_err < min_err:
                    min_err = gcv_err
                    min_err_param = param

            #print "lambda:", min_err_param
            return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param,
                                           self._intercept)[0]
Exemple #5
0
 def apply(self, matrix_):    
     
     matrix_.assert_positive()
     #w_init, h_init = self.nndsvd_init(matrix_)
     w_init, h_init = self.v_col_init(matrix_)
     #w_init, h_init = self.random_init(matrix_)
     w, h = Linalg.nmf(matrix_, w_init, h_init)
     return w, Linalg.pinv(h)
Exemple #6
0
    def apply(self, matrix_):

        matrix_.assert_positive()
        #w_init, h_init = self.nndsvd_init(matrix_)
        w_init, h_init = self.v_col_init(matrix_)
        #w_init, h_init = self.random_init(matrix_)
        w, h = Linalg.nmf(matrix_, w_init, h_init)
        return w, Linalg.pinv(h)
Exemple #7
0
    def test_pinv(self):
        test_cases = self.pinv_test_cases

        for in_mat, expected_out in test_cases:
            out_mat = Linalg.pinv(DenseMatrix(in_mat))
            np.testing.assert_array_almost_equal(out_mat.mat, expected_out, 7)

            out_mat = Linalg.pinv(SparseMatrix(in_mat))
            np.testing.assert_array_almost_equal(out_mat.mat.todense(),
                                                 expected_out, 7)
Exemple #8
0
    def test_pinv(self):
        test_cases = self.pinv_test_cases

        for in_mat, expected_out in test_cases:
            out_mat = Linalg.pinv(DenseMatrix(in_mat))
            np.testing.assert_array_almost_equal(out_mat.mat, expected_out, 7)

            out_mat = Linalg.pinv(SparseMatrix(in_mat))
            np.testing.assert_array_almost_equal(out_mat.mat.todense(),
                                                 expected_out, 7)
Exemple #9
0
    def test_sparse_svd(self):
        test_cases = self.svd_test_cases

        for x, u_expected, s_expected, v_expected in test_cases:
            for dim in [2,3,6]:
                u, s, v = Linalg.svd(SparseMatrix(x),dim)
                np.testing.assert_array_almost_equal(np.abs(u.mat.todense()), np.abs(u_expected), 2)
                np.testing.assert_array_almost_equal(np.abs(s), np.abs(s_expected), 2)
                np.testing.assert_array_almost_equal(np.abs(v.mat.todense()), np.abs(v_expected), 2)

            u, s, v = Linalg.svd(SparseMatrix(x),1)
            np.testing.assert_array_almost_equal(np.abs(u.mat.todense()), np.abs(u_expected[:,0:1]), 2)
            np.testing.assert_array_almost_equal(np.abs(s), np.abs(s_expected[0:1]), 2)
            np.testing.assert_array_almost_equal(np.abs(v.mat.todense()), np.abs(v_expected[:,0:1]), 2)
Exemple #10
0
    def test_dense_svd(self):
        test_cases = self.svd_test_cases

        for x, u_expected, s_expected, v_expected in test_cases:
            for dim in [2,3,6]:
                u, s, v = Linalg.svd(DenseMatrix(x),dim)
                np.testing.assert_array_almost_equal(u.mat, u_expected, 2)
                np.testing.assert_array_almost_equal(s, s_expected, 2)
                np.testing.assert_array_almost_equal(v.mat, v_expected, 2)

            u, s, v = Linalg.svd(DenseMatrix(x),1)
            np.testing.assert_array_almost_equal(u.mat, u_expected[:,0:1], 2)
            np.testing.assert_array_almost_equal(s, s_expected[0:1], 2)
            np.testing.assert_array_almost_equal(v.mat, v_expected[:, 0:1], 2)
Exemple #11
0
    def test_intercept_lstsq_regression(self):

        a = DenseMatrix(np.matrix([[1, 1],[2, 3],[4, 6]]))
        b = DenseMatrix(np.matrix([[12, 15, 18],[21, 27, 33],[35, 46, 57]]))
        res = DenseMatrix(np.matrix([[1, 2, 3],[4, 5, 6],[7, 8, 9]]))

        res1 = Linalg.lstsq_regression(a, b)
        res2 = Linalg.lstsq_regression(a, b, intercept=True)

        np.testing.assert_array_almost_equal(res2.mat[:-1,:], res[0:2,:].mat, 6)
        np.testing.assert_array_almost_equal(res2.mat[-1,:], res[2:3,:].mat, 6)

        new_a = a.hstack(DenseMatrix(np.ones((a.shape[0], 1))))
        self.assertGreater(((a * res1) - b).norm(), ((new_a * res2) - b).norm())
Exemple #12
0
    def test_dense_svd(self):
        test_cases = self.svd_test_cases

        for x, u_expected, s_expected, v_expected in test_cases:
            for dim in [2, 3, 6]:
                u, s, v = Linalg.svd(DenseMatrix(x), dim)
                np.testing.assert_array_almost_equal(u.mat, u_expected, 2)
                np.testing.assert_array_almost_equal(s, s_expected, 2)
                np.testing.assert_array_almost_equal(v.mat, v_expected, 2)

            u, s, v = Linalg.svd(DenseMatrix(x), 1)
            np.testing.assert_array_almost_equal(u.mat, u_expected[:, 0:1], 2)
            np.testing.assert_array_almost_equal(s, s_expected[0:1], 2)
            np.testing.assert_array_almost_equal(v.mat, v_expected[:, 0:1], 2)
Exemple #13
0
    def test_sparse_lstsq_regression(self):

        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = SparseMatrix(m)
            id_ = SparseMatrix.identity(m1.shape[0])

            res = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res.mat.todense(), m_inv, 7)

            approx1 = (m1 * res).mat.todense()

            res2 = Linalg.lstsq_regression(m1, id_, intercept=True)
            new_a = m1.hstack(SparseMatrix(np.ones((m1.shape[0], 1))))

            approx2 = (new_a * res2).mat.todense()
Exemple #14
0
    def test_sparse_lstsq_regression(self):

        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = SparseMatrix(m)
            id_ = SparseMatrix.identity(m1.shape[0])

            res = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res.mat.todense(), m_inv, 7)

            approx1 = (m1 * res).mat.todense()

            res2 = Linalg.lstsq_regression(m1, id_, intercept=True)
            new_a = m1.hstack(SparseMatrix(np.ones((m1.shape[0], 1))))

            approx2 = (new_a * res2).mat.todense()
Exemple #15
0
    def nndsvd_init(self, matrix_):
        def matrix_abs(mat_):
            mat_p = mat_.get_non_negative()
            mat_n_abs = mat_p - mat_
            return mat_p + mat_n_abs

        def padd_zeros(matrix_, axis, thickness):
            matrix_type = type(matrix_)
            if axis == 0:
                append_mat = matrix_type(
                    np.zeros((thickness, matrix_.shape[1])))
                return matrix_.vstack(append_mat)
            elif axis == 1:
                append_mat = matrix_type(
                    np.zeros((matrix_.shape[0], thickness)))
                return matrix_.hstack(append_mat)

        u, s, v = Linalg.svd(matrix_, self._reduced_dimension)

        rank = u.shape[1]
        w = [[]] * rank
        h = [[]] * rank

        vt = v.transpose()

        w[0] = sqrt(s[0]) * matrix_abs(u[:, 0])
        h[0] = sqrt(s[0]) * matrix_abs(vt[0, :])

        for i in range(1, rank):
            uu = u[:, i]
            vv = vt[i, :]
            uup = uu.get_non_negative()
            uun = uup - uu
            vvp = vv.get_non_negative()
            vvn = vvp - vv

            n_uup = uup.norm()
            n_uun = uun.norm()
            n_vvp = vvp.norm()
            n_vvn = vvn.norm()

            termp = n_uup * n_vvp
            termn = n_uun * n_vvn
            if (termp >= termn):
                w[i] = sqrt(s[i] * termp) * uup / n_uup
                h[i] = sqrt(s[i] * termp) * vvp / n_vvp
            else:
                w[i] = sqrt(s[i] * termn) * uun / n_uun
                h[i] = sqrt(s[i] * termn) * vvn / n_vvn

        w = matrix_.nary_hstack(w)
        h = matrix_.nary_vstack(h)

        w.remove_small_values(0.0000000001)
        h.remove_small_values(0.0000000001)

        if (rank < self._reduced_dimension):
            w = padd_zeros(w, 1, self._reduced_dimension - rank)
            h = padd_zeros(h, 0, self._reduced_dimension - rank)
        return w, h
Exemple #16
0
    def train(self, matrix_a, matrix_b=None):
        """
        If cross validation is set to True, it performs generalized
        cross validation. (Hastie, Tibshirani and Friedman, Second edition,
        page 244).

        TODO: not yet!
        """

        pid = str(getpid())
        logdir = 'convergence/'+pid
        call(['mkdir', '-p', logdir])
        # mkdir -p convergence/pid

        self._trainId += 1 # For logging purposes

        if not self._crossvalidation:
            if self._projector:
                matrix_b = matrix_a
            W, costs = Linalg.tracenorm_regression(matrix_a, matrix_b, self._param, self._iterations, self._intercept)
            print_tuple_list(costs, logdir+'/'+str(self._trainId)+'-lmbd-'+str(self._param))
            return W
        elif matrix_b == None:
            raise ValueError("Unable to perform cross-validation without a phrase space")
        else:
            min_err_param = 0
            min_err = np.Inf
            gcv_err = np.Inf

            N = matrix_a.shape[0]
            for param in self._param_range:

                mat_x, S_trace, err1 = Linalg.ridge_regression(matrix_a, matrix_b, param,
                                                               self._intercept)

                nom = pow(1 - S_trace / N, 2) * N
                if nom != 0:
                    gcv_err = (err1 * err1) / nom

                if gcv_err < min_err:
                    min_err = gcv_err
                    min_err_param = param

            #print "lambda:", min_err_param
            return Linalg.ridge_regression(matrix_a, matrix_b, min_err_param,
                                           self._intercept)[0]
Exemple #17
0
    def test_intercept_lstsq_regression(self):

        a = DenseMatrix(np.matrix([[1, 1], [2, 3], [4, 6]]))
        b = DenseMatrix(np.matrix([[12, 15, 18], [21, 27, 33], [35, 46, 57]]))
        res = DenseMatrix(np.matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]]))

        res1 = Linalg.lstsq_regression(a, b)
        res2 = Linalg.lstsq_regression(a, b, intercept=True)

        np.testing.assert_array_almost_equal(res2.mat[:-1, :], res[0:2, :].mat,
                                             6)
        np.testing.assert_array_almost_equal(res2.mat[-1, :], res[2:3, :].mat,
                                             6)

        new_a = a.hstack(DenseMatrix(np.ones((a.shape[0], 1))))
        self.assertGreater(((a * res1) - b).norm(),
                           ((new_a * res2) - b).norm())
Exemple #18
0
    def test_dense_lstsq_regression(self):

        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = DenseMatrix(m)
            id_ = DenseMatrix.identity(m1.shape[0])

            res = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res.mat, m_inv, 7)
Exemple #19
0
 def nndsvd_init(self,matrix_):
     def matrix_abs(mat_):
         mat_p = mat_.get_non_negative()
         mat_n_abs = mat_p - mat_
         return mat_p + mat_n_abs
     
     def padd_zeros(matrix_, axis, thickness):
         matrix_type = type(matrix_)
         if axis == 0:  
             append_mat = matrix_type(np.zeros((thickness, matrix_.shape[1])))
             return matrix_.vstack(append_mat)
         elif axis == 1:
             append_mat = matrix_type(np.zeros((matrix_.shape[0], thickness)))
             return matrix_.hstack(append_mat)
     
     u, s, v = Linalg.svd(matrix_, self._reduced_dimension);
     
     rank = u.shape[1]
     w = [[]]*rank
     h = [[]]*rank
     
     vt = v.transpose()
     
     w[0] = sqrt(s[0]) * matrix_abs(u[:,0])
     h[0] = sqrt(s[0]) * matrix_abs(vt[0,:])
     
     for i in range(1,rank):
         uu = u[:,i]
         vv = vt[i,:]
         uup = uu.get_non_negative()
         uun = uup - uu
         vvp = vv.get_non_negative()
         vvn = vvp - vv
         
         n_uup = uup.norm()
         n_uun = uun.norm()
         n_vvp = vvp.norm()
         n_vvn = vvn.norm()
         
         termp = n_uup * n_vvp; termn = n_uun * n_vvn
         if (termp >= termn):
             w[i] = sqrt(s[i] * termp) * uup / n_uup 
             h[i] = sqrt(s[i] * termp) * vvp / n_vvp
         else:
             w[i] = sqrt(s[i] * termn) * uun / n_uun 
             h[i] = sqrt(s[i] * termn) * vvn / n_vvn
     
     w = matrix_.nary_hstack(w)
     h = matrix_.nary_vstack(h)
     
     w.remove_small_values(0.0000000001)
     h.remove_small_values(0.0000000001)
     
     if (rank < self._reduced_dimension):
         w = padd_zeros(w, 1, self._reduced_dimension - rank)
         h = padd_zeros(h, 0, self._reduced_dimension - rank)
     return w,h      
Exemple #20
0
    def test_dense_lstsq_regression(self):

        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = DenseMatrix(m)
            id_ = DenseMatrix.identity(m1.shape[0])

            res = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res.mat, m_inv, 7)
Exemple #21
0
    def test_sparse_svd(self):
        test_cases = self.svd_test_cases

        for x, u_expected, s_expected, v_expected in test_cases:
            for dim in [2, 3, 6]:
                u, s, v = Linalg.svd(SparseMatrix(x), dim)
                np.testing.assert_array_almost_equal(np.abs(u.mat.todense()),
                                                     np.abs(u_expected), 2)
                np.testing.assert_array_almost_equal(np.abs(s),
                                                     np.abs(s_expected), 2)
                np.testing.assert_array_almost_equal(np.abs(v.mat.todense()),
                                                     np.abs(v_expected), 2)

            u, s, v = Linalg.svd(SparseMatrix(x), 1)
            np.testing.assert_array_almost_equal(np.abs(u.mat.todense()),
                                                 np.abs(u_expected[:, 0:1]), 2)
            np.testing.assert_array_almost_equal(np.abs(s),
                                                 np.abs(s_expected[0:1]), 2)
            np.testing.assert_array_almost_equal(np.abs(v.mat.todense()),
                                                 np.abs(v_expected[:, 0:1]), 2)
Exemple #22
0
    def test_sparse_ridge_regression(self):
        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = SparseMatrix(m)
            id_ = SparseMatrix.identity(m1.shape[0])

            res1 = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res1.mat.todense(), m_inv, 7)

            res2 = Linalg.ridge_regression(m1, id_, 1)[0]

            error1 = (m1 * res1 - SparseMatrix(m_inv)).norm()
            error2 = (m1 * res2 - SparseMatrix(m_inv)).norm()

            #print "err", error1, error2

            norm1 = error1 + res1.norm()
            norm2 = error2 + res2.norm()

            #print "norm", norm1, norm2

            #THIS SHOULD HOLD, MAYBE ROUNDIGN ERROR?
            #self.assertGreaterEqual(error2, error1)
            self.assertGreaterEqual(norm1, norm2)
Exemple #23
0
    def test_sparse_ridge_regression(self):
        test_cases = self.pinv_test_cases
        for m, m_inv in test_cases:
            m1 = SparseMatrix(m)
            id_ = SparseMatrix.identity(m1.shape[0])

            res1 = Linalg.lstsq_regression(m1, id_)
            np.testing.assert_array_almost_equal(res1.mat.todense(), m_inv, 7)

            res2 = Linalg.ridge_regression(m1, id_, 1)[0]

            error1 = (m1 * res1 - SparseMatrix(m_inv)).norm()
            error2 = (m1 * res2 - SparseMatrix(m_inv)).norm()

            #print "err", error1, error2

            norm1 = error1 + res1.norm()
            norm2 = error2 + res2.norm()

            #print "norm", norm1, norm2

            #THIS SHOULD HOLD, MAYBE ROUNDIGN ERROR?
            #self.assertGreaterEqual(error2, error1)
            self.assertGreaterEqual(norm1, norm2)
Exemple #24
0
    def apply(self, matrix_):

        u, s, v = Linalg.svd(matrix_, self._reduced_dimension)
        return u.scale_columns(s), v
Exemple #25
0
 def train(self, matrix_a, matrix_b):
     return Linalg.lstsq_regression(matrix_a, matrix_b, self._intercept)
Exemple #26
0
 def train(self, matrix_a, matrix_b=None):
     """
     matrix_b is ignored
     """
     W = Linalg.kronecker_product(matrix_a)
     return W
Exemple #27
0
    def apply(self, matrix_):

        u, s, v = Linalg.svd(matrix_, self._reduced_dimension)
        return u.scale_columns(s), v
 def train(self, matrix_a, matrix_b):
     return Linalg.lstsq_regression(matrix_a, matrix_b, self._intercept)