コード例 #1
0
    def export(self):
        # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html
        # only importing default 'dt' S, Ut and Vt (dense text output matrices)
        PREFIX = self._svd_prefix
        file_Ut = PREFIX + '-Ut'
        file_Vt = PREFIX + '-Vt'
        file_S = PREFIX + '-S'
        # Not really used:
        file_U = PREFIX + '-U'
        file_V = PREFIX + '-V'

        # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!)
        try:
            Ut = array(list(csv.reader(open(file_Ut),
                                       delimiter=' '))[1:]).astype('float')
            U = Ut.transpose()
        except:
            U = array(list(csv.reader(open(file_U),
                                      delimiter=' '))[1:]).astype('float')
        try:
            Vt = array(list(csv.reader(open(file_Vt),
                                       delimiter=' '))[1:]).astype('float')
            V = Vt.transpose()
        except:
            V = array(list(csv.reader(open(file_V),
                                      delimiter=' '))[1:]).astype('float')
            #Vt = V.transpose()
        _S = array(list(csv.reader(open(file_S),
                                   delimiter=' '))[1:]).astype('float')
        S = _S.reshape(_S.shape[0], )

        PREFIX_INDEXES = PREFIX + '.ids.'
        file_U_idx = PREFIX_INDEXES + 'rows'
        file_V_idx = PREFIX_INDEXES + 'cols'
        try:
            U_idx = [int(idx.strip()) for idx in open(file_U_idx)]
        except:
            U_idx = [idx.strip() for idx in open(file_U_idx)]
        try:
            V_idx = [int(idx.strip()) for idx in open(file_V_idx)]
        except:
            V_idx = [idx.strip() for idx in open(file_V_idx)]

        #Check no duplicated IDs!!!
        assert (len(U_idx) == len(OrderedSet(U_idx)))
        assert (len(V_idx) == len(OrderedSet(V_idx)))

        # Create SVD
        svd = SVD()
        svd._U = DenseMatrix(U, OrderedSet(U_idx), None)
        svd._S = S
        svd._V = DenseMatrix(V, OrderedSet(V_idx), None)
        svd._matrix_similarity = svd._reconstruct_similarity()
        svd._matrix_reconstructed = svd._reconstruct_matrix()

        return svd
コード例 #2
0
ファイル: svdlibc.py プロジェクト: Maksymdelta/python-recsys
 def export(self):
     # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html
     # only importing default 'dt' S, Ut and Vt (dense text output matrices)
     PREFIX = self._svd_prefix
     file_Ut = PREFIX + '-Ut'
     file_Vt = PREFIX + '-Vt'
     file_S = PREFIX + '-S'
     # Not really used:
     file_U = PREFIX + '-U'
     file_V = PREFIX + '-V'
     
     # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!)
     try:
         Ut = array(list(csv.reader(open(file_Ut),delimiter=' '))[1:]).astype('float')
         U = Ut.transpose()
     except:
         U = array(list(csv.reader(open(file_U),delimiter=' '))[1:]).astype('float')
     try:
         Vt = array(list(csv.reader(open(file_Vt),delimiter=' '))[1:]).astype('float')
         V = Vt.transpose()
     except:
         V = array(list(csv.reader(open(file_V),delimiter=' '))[1:]).astype('float')
         #Vt = V.transpose()
     _S = array(list(csv.reader(open(file_S),delimiter=' '))[1:]).astype('float')
     S = _S.reshape(_S.shape[0], )
     
     PREFIX_INDEXES = PREFIX + '.ids.'
     file_U_idx = PREFIX_INDEXES + 'rows'
     file_V_idx = PREFIX_INDEXES + 'cols'
     try:
         U_idx = [ int(idx.strip()) for idx in open(file_U_idx)]
     except:
         U_idx = [ idx.strip() for idx in open(file_U_idx)]
     try:
         V_idx = [ int(idx.strip()) for idx in open(file_V_idx)]
     except:
         V_idx = [ idx.strip() for idx in open(file_V_idx)]
     
     #Check no duplicated IDs!!!
     assert(len(U_idx) == len(OrderedSet(U_idx)))
     assert(len(V_idx) == len(OrderedSet(V_idx)))
     
     # Create SVD
     svd = SVD()
     svd._U = DenseMatrix(U, OrderedSet(U_idx), None)
     svd._S = S
     svd._V = DenseMatrix(V, OrderedSet(V_idx), None)
     svd._matrix_similarity = svd._reconstruct_similarity()
     svd._matrix_reconstructed = svd._reconstruct_matrix()
     
     return svd
コード例 #3
0
    def export(self):
        # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html
        # only importing default 'dt' S, Ut and Vt (dense text output matrices)
        PREFIX = self._svd_prefix
        file_Ut = PREFIX + '-Ut'
        file_Vt = PREFIX + '-Vt'
        file_S = PREFIX + '-S'
        # Not really used:
        file_U = PREFIX + '-U'
        file_V = PREFIX + '-V'

        # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!)
        if VERBOSE:
            sys.stdout.write('Reading files: %s, %s, %s\n' %
                             (file_Ut, file_Vt, file_S))
        try:
            Ut = array(list(csv.reader(open(file_Ut),
                                       delimiter=' '))[1:]).astype('float')
            U = Ut.transpose()
        except:
            U = array(list(csv.reader(open(file_U),
                                      delimiter=' '))[1:]).astype('float')
        try:
            Vt = array(list(csv.reader(open(file_Vt),
                                       delimiter=' '))[1:]).astype('float')
            V = Vt.transpose()
        except:
            V = array(list(csv.reader(open(file_V),
                                      delimiter=' '))[1:]).astype('float')
            #Vt = V.transpose()
        _S = array(list(csv.reader(open(file_S),
                                   delimiter=' '))[1:]).astype('float')
        S = _S.reshape(_S.shape[0], )

        PREFIX_INDEXES = PREFIX + '.ids.'
        file_U_idx = PREFIX_INDEXES + 'rows'
        file_V_idx = PREFIX_INDEXES + 'cols'
        if VERBOSE:
            sys.stdout.write('Reading index files: %s, %s\n' %
                             (file_U_idx, file_V_idx))
        try:
            U_idx = [int(idx.strip()) for idx in open(file_U_idx)]
        except:
            U_idx = [idx.strip() for idx in open(file_U_idx)]
        try:
            V_idx = [int(idx.strip()) for idx in open(file_V_idx)]
        except:
            V_idx = [idx.strip() for idx in open(file_V_idx)]

        #Check no duplicated IDs!!!
        assert (len(U_idx) == len(
            OrderedSet(U_idx))), 'There are duplicated row IDs!'
        assert (len(U_idx) == U.shape[0]
                ), 'There are duplicated (or empty) row IDs!'
        assert (len(V_idx) == len(
            OrderedSet(V_idx))), 'There are duplicated col IDs!'
        assert (len(V_idx) == V.shape[0]
                ), 'There are duplicated (or empty) col IDs'

        # Create SVD
        if VERBOSE:
            sys.stdout.write('Creating SVD() class\n')
        svd = SVD()
        svd._U = DenseMatrix(U, OrderedSet(U_idx), None)
        svd._S = S
        svd._V = DenseMatrix(V, OrderedSet(V_idx), None)
        svd._matrix_similarity = svd._reconstruct_similarity()
        svd._matrix_reconstructed = svd._reconstruct_matrix()

        # If save_model, then use row and col ids from SVDLIBC
        MAX_VECTORS = 2**21
        if len(svd._U) > MAX_VECTORS:
            svd._file_row_ids = file_U_idx
        if len(svd._V) > MAX_VECTORS:
            svd._file_col_ids = file_V_idx

        return svd
コード例 #4
0
ファイル: svdlibc.py プロジェクト: 1060460048/python-recsys
    def export(self):
        # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html
        # only importing default 'dt' S, Ut and Vt (dense text output matrices)
        PREFIX = self._svd_prefix
        file_Ut = PREFIX + '-Ut'
        file_Vt = PREFIX + '-Vt'
        file_S = PREFIX + '-S'
        # Not really used:
        file_U = PREFIX + '-U'
        file_V = PREFIX + '-V'
        
        # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!)
        if VERBOSE:
            sys.stdout.write('Reading files: %s, %s, %s\n' % (file_Ut, file_Vt, file_S))
        try:
            Ut = array(list(csv.reader(open(file_Ut),delimiter=' '))[1:]).astype('float')
            U = Ut.transpose()
        except:
            U = array(list(csv.reader(open(file_U),delimiter=' '))[1:]).astype('float')
        try:
            Vt = array(list(csv.reader(open(file_Vt),delimiter=' '))[1:]).astype('float')
            V = Vt.transpose()
        except:
            V = array(list(csv.reader(open(file_V),delimiter=' '))[1:]).astype('float')
            #Vt = V.transpose()
        _S = array(list(csv.reader(open(file_S),delimiter=' '))[1:]).astype('float')
        S = _S.reshape(_S.shape[0], )
        
        PREFIX_INDEXES = PREFIX + '.ids.'
        file_U_idx = PREFIX_INDEXES + 'rows'
        file_V_idx = PREFIX_INDEXES + 'cols'
        if VERBOSE:
            sys.stdout.write('Reading index files: %s, %s\n' % (file_U_idx, file_V_idx))
        try:
            U_idx = [ int(idx.strip()) for idx in open(file_U_idx)]
        except:
            U_idx = [ idx.strip() for idx in open(file_U_idx)]
        try:
            V_idx = [ int(idx.strip()) for idx in open(file_V_idx)]
        except:
            V_idx = [ idx.strip() for idx in open(file_V_idx)]
        
        #Check no duplicated IDs!!!
        assert(len(U_idx) == len(OrderedSet(U_idx))), 'There are duplicated row IDs!'
        assert(len(U_idx) == U.shape[0]), 'There are duplicated (or empty) row IDs!'
        assert(len(V_idx) == len(OrderedSet(V_idx))), 'There are duplicated col IDs!'
        assert(len(V_idx) == V.shape[0]), 'There are duplicated (or empty) col IDs'
 
        # Create SVD
        if VERBOSE:
            sys.stdout.write('Creating SVD() class\n')
        svd = SVD()
        svd._U = DenseMatrix(U, OrderedSet(U_idx), None)
        svd._S = S
        svd._V = DenseMatrix(V, OrderedSet(V_idx), None)
        svd._matrix_similarity = svd._reconstruct_similarity()
        svd._matrix_reconstructed = svd._reconstruct_matrix()

        # If save_model, then use row and col ids from SVDLIBC
        MAX_VECTORS = 2**21
        if len(svd._U) > MAX_VECTORS:
            svd._file_row_ids = file_U_idx
        if len(svd._V) > MAX_VECTORS:
            svd._file_col_ids = file_V_idx
        
        return svd