def export(self): # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html # only importing default 'dt' S, Ut and Vt (dense text output matrices) PREFIX = self._svd_prefix file_Ut = PREFIX + '-Ut' file_Vt = PREFIX + '-Vt' file_S = PREFIX + '-S' # Not really used: file_U = PREFIX + '-U' file_V = PREFIX + '-V' # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!) try: Ut = array(list(csv.reader(open(file_Ut), delimiter=' '))[1:]).astype('float') U = Ut.transpose() except: U = array(list(csv.reader(open(file_U), delimiter=' '))[1:]).astype('float') try: Vt = array(list(csv.reader(open(file_Vt), delimiter=' '))[1:]).astype('float') V = Vt.transpose() except: V = array(list(csv.reader(open(file_V), delimiter=' '))[1:]).astype('float') #Vt = V.transpose() _S = array(list(csv.reader(open(file_S), delimiter=' '))[1:]).astype('float') S = _S.reshape(_S.shape[0], ) PREFIX_INDEXES = PREFIX + '.ids.' file_U_idx = PREFIX_INDEXES + 'rows' file_V_idx = PREFIX_INDEXES + 'cols' try: U_idx = [int(idx.strip()) for idx in open(file_U_idx)] except: U_idx = [idx.strip() for idx in open(file_U_idx)] try: V_idx = [int(idx.strip()) for idx in open(file_V_idx)] except: V_idx = [idx.strip() for idx in open(file_V_idx)] #Check no duplicated IDs!!! assert (len(U_idx) == len(OrderedSet(U_idx))) assert (len(V_idx) == len(OrderedSet(V_idx))) # Create SVD svd = SVD() svd._U = DenseMatrix(U, OrderedSet(U_idx), None) svd._S = S svd._V = DenseMatrix(V, OrderedSet(V_idx), None) svd._matrix_similarity = svd._reconstruct_similarity() svd._matrix_reconstructed = svd._reconstruct_matrix() return svd
def export(self): # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html # only importing default 'dt' S, Ut and Vt (dense text output matrices) PREFIX = self._svd_prefix file_Ut = PREFIX + '-Ut' file_Vt = PREFIX + '-Vt' file_S = PREFIX + '-S' # Not really used: file_U = PREFIX + '-U' file_V = PREFIX + '-V' # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!) try: Ut = array(list(csv.reader(open(file_Ut),delimiter=' '))[1:]).astype('float') U = Ut.transpose() except: U = array(list(csv.reader(open(file_U),delimiter=' '))[1:]).astype('float') try: Vt = array(list(csv.reader(open(file_Vt),delimiter=' '))[1:]).astype('float') V = Vt.transpose() except: V = array(list(csv.reader(open(file_V),delimiter=' '))[1:]).astype('float') #Vt = V.transpose() _S = array(list(csv.reader(open(file_S),delimiter=' '))[1:]).astype('float') S = _S.reshape(_S.shape[0], ) PREFIX_INDEXES = PREFIX + '.ids.' file_U_idx = PREFIX_INDEXES + 'rows' file_V_idx = PREFIX_INDEXES + 'cols' try: U_idx = [ int(idx.strip()) for idx in open(file_U_idx)] except: U_idx = [ idx.strip() for idx in open(file_U_idx)] try: V_idx = [ int(idx.strip()) for idx in open(file_V_idx)] except: V_idx = [ idx.strip() for idx in open(file_V_idx)] #Check no duplicated IDs!!! assert(len(U_idx) == len(OrderedSet(U_idx))) assert(len(V_idx) == len(OrderedSet(V_idx))) # Create SVD svd = SVD() svd._U = DenseMatrix(U, OrderedSet(U_idx), None) svd._S = S svd._V = DenseMatrix(V, OrderedSet(V_idx), None) svd._matrix_similarity = svd._reconstruct_similarity() svd._matrix_reconstructed = svd._reconstruct_matrix() return svd
def export(self): # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html # only importing default 'dt' S, Ut and Vt (dense text output matrices) PREFIX = self._svd_prefix file_Ut = PREFIX + '-Ut' file_Vt = PREFIX + '-Vt' file_S = PREFIX + '-S' # Not really used: file_U = PREFIX + '-U' file_V = PREFIX + '-V' # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!) if VERBOSE: sys.stdout.write('Reading files: %s, %s, %s\n' % (file_Ut, file_Vt, file_S)) try: Ut = array(list(csv.reader(open(file_Ut), delimiter=' '))[1:]).astype('float') U = Ut.transpose() except: U = array(list(csv.reader(open(file_U), delimiter=' '))[1:]).astype('float') try: Vt = array(list(csv.reader(open(file_Vt), delimiter=' '))[1:]).astype('float') V = Vt.transpose() except: V = array(list(csv.reader(open(file_V), delimiter=' '))[1:]).astype('float') #Vt = V.transpose() _S = array(list(csv.reader(open(file_S), delimiter=' '))[1:]).astype('float') S = _S.reshape(_S.shape[0], ) PREFIX_INDEXES = PREFIX + '.ids.' file_U_idx = PREFIX_INDEXES + 'rows' file_V_idx = PREFIX_INDEXES + 'cols' if VERBOSE: sys.stdout.write('Reading index files: %s, %s\n' % (file_U_idx, file_V_idx)) try: U_idx = [int(idx.strip()) for idx in open(file_U_idx)] except: U_idx = [idx.strip() for idx in open(file_U_idx)] try: V_idx = [int(idx.strip()) for idx in open(file_V_idx)] except: V_idx = [idx.strip() for idx in open(file_V_idx)] #Check no duplicated IDs!!! assert (len(U_idx) == len( OrderedSet(U_idx))), 'There are duplicated row IDs!' assert (len(U_idx) == U.shape[0] ), 'There are duplicated (or empty) row IDs!' assert (len(V_idx) == len( OrderedSet(V_idx))), 'There are duplicated col IDs!' assert (len(V_idx) == V.shape[0] ), 'There are duplicated (or empty) col IDs' # Create SVD if VERBOSE: sys.stdout.write('Creating SVD() class\n') svd = SVD() svd._U = DenseMatrix(U, OrderedSet(U_idx), None) svd._S = S svd._V = DenseMatrix(V, OrderedSet(V_idx), None) svd._matrix_similarity = svd._reconstruct_similarity() svd._matrix_reconstructed = svd._reconstruct_matrix() # If save_model, then use row and col ids from SVDLIBC MAX_VECTORS = 2**21 if len(svd._U) > MAX_VECTORS: svd._file_row_ids = file_U_idx if len(svd._V) > MAX_VECTORS: svd._file_col_ids = file_V_idx return svd
def export(self): # http://tedlab.mit.edu/~dr/SVDLIBC/SVD_F_DT.html # only importing default 'dt' S, Ut and Vt (dense text output matrices) PREFIX = self._svd_prefix file_Ut = PREFIX + '-Ut' file_Vt = PREFIX + '-Vt' file_S = PREFIX + '-S' # Not really used: file_U = PREFIX + '-U' file_V = PREFIX + '-V' # Read matrices files (U, S, Vt), using CSV (it's much faster than numpy.loadtxt()!) if VERBOSE: sys.stdout.write('Reading files: %s, %s, %s\n' % (file_Ut, file_Vt, file_S)) try: Ut = array(list(csv.reader(open(file_Ut),delimiter=' '))[1:]).astype('float') U = Ut.transpose() except: U = array(list(csv.reader(open(file_U),delimiter=' '))[1:]).astype('float') try: Vt = array(list(csv.reader(open(file_Vt),delimiter=' '))[1:]).astype('float') V = Vt.transpose() except: V = array(list(csv.reader(open(file_V),delimiter=' '))[1:]).astype('float') #Vt = V.transpose() _S = array(list(csv.reader(open(file_S),delimiter=' '))[1:]).astype('float') S = _S.reshape(_S.shape[0], ) PREFIX_INDEXES = PREFIX + '.ids.' file_U_idx = PREFIX_INDEXES + 'rows' file_V_idx = PREFIX_INDEXES + 'cols' if VERBOSE: sys.stdout.write('Reading index files: %s, %s\n' % (file_U_idx, file_V_idx)) try: U_idx = [ int(idx.strip()) for idx in open(file_U_idx)] except: U_idx = [ idx.strip() for idx in open(file_U_idx)] try: V_idx = [ int(idx.strip()) for idx in open(file_V_idx)] except: V_idx = [ idx.strip() for idx in open(file_V_idx)] #Check no duplicated IDs!!! assert(len(U_idx) == len(OrderedSet(U_idx))), 'There are duplicated row IDs!' assert(len(U_idx) == U.shape[0]), 'There are duplicated (or empty) row IDs!' assert(len(V_idx) == len(OrderedSet(V_idx))), 'There are duplicated col IDs!' assert(len(V_idx) == V.shape[0]), 'There are duplicated (or empty) col IDs' # Create SVD if VERBOSE: sys.stdout.write('Creating SVD() class\n') svd = SVD() svd._U = DenseMatrix(U, OrderedSet(U_idx), None) svd._S = S svd._V = DenseMatrix(V, OrderedSet(V_idx), None) svd._matrix_similarity = svd._reconstruct_similarity() svd._matrix_reconstructed = svd._reconstruct_matrix() # If save_model, then use row and col ids from SVDLIBC MAX_VECTORS = 2**21 if len(svd._U) > MAX_VECTORS: svd._file_row_ids = file_U_idx if len(svd._V) > MAX_VECTORS: svd._file_col_ids = file_V_idx return svd