def __init__(self, inputFile, outputFile): """ Function that take big coo_matrix file on input, read it, eliminate the data and classify the groups from data. :param inputFile: file with coo_matrix :param outputFile: name of text file to store the labels of clusters """ print(mminfo(inputFile)) data = mmread(inputFile) x, y = data.shape newRow = [] newCol = [] newData = [] # eliminate the data for row, col, data in zip(data.row, data.col, data.data): if (col < 2500): newRow.append(row) newCol.append(col) newData.append(1) cleaned_data = coo_matrix((newData, (newRow, newCol)), shape=(x, 2500)) del data kmeans = KMeans(n_clusters=45).fit(cleaned_data) np.savetxt(outputFile, kmeans.labels_, fmt='%d', delimiter='\n', newline='\n') print('finish') pass
def check_read(self, example, a, info): f = open(self.fn, 'w') f.write(example) f.close() assert_equal(mminfo(self.fn), info) b = mmread(self.fn).toarray() assert_array_almost_equal(a, b)
def test_simple_pattern(self): a = scipy.sparse.csr_matrix([[0, 1.5], [3.0, 2.5]]) p = np.zeros_like(a.toarray()) p[a.toarray() > 0] = 1 info = (2, 2, 3, 'coordinate', 'pattern', 'general') mmwrite(self.fn, a, field='pattern') assert_equal(mminfo(self.fn), info) b = mmread(self.fn) assert_array_almost_equal(p, b.toarray())
def check_read(self, example, a, info, dense, over32, over64): with open(self.fn, 'w') as f: f.write(example) assert_equal(mminfo(self.fn), info) if (over32 and (np.intp(0).itemsize < 8)) or over64: assert_raises(OverflowError, mmread, self.fn) else: b = mmread(self.fn) if not dense: b = b.toarray() assert_equal(a, b)
def test_empty_write_read(self): # https://github.com/scipy/scipy/issues/1410 (Trac #883) b = scipy.sparse.coo_matrix((10, 10)) mmwrite(self.fn, b) assert_equal(mminfo(self.fn), (10, 10, 0, 'coordinate', 'real', 'symmetric')) a = b.toarray() b = mmread(self.fn).toarray() assert_array_almost_equal(a, b)
def preProcessBenchmark(benchDirPath): entries = [] for f in os.listdir(benchDirPath): info = io.mminfo(os.path.join(benchDirPath, f)) if info[0] == info[1]: info = list(info[1:]) info.append(info[1] / info[0]) info.insert(0, f.replace(r'.mtx', '')) info[1] = int(info[1]) info[2] = int(info[2]) entries.append(info) return sorted(entries, key=lambda x: x[-1], reverse=True)
def preProcessBenchmark(benchDirPath): entries = [] for f in os.listdir(benchDirPath): info = io.mminfo(os.path.join(benchDirPath, f)) if info[0] == info[1]: info = list(info[1:]) info.append(info[1] / info[0]) info.insert(0, f.replace(r'.mtx', '')) info[1] = int(info[1]) info[2] = int(info[2]) entries.append(info) return sorted(entries, key=lambda x : x[-1], reverse=True)
def test_real_write_read(self): I = array([0, 0, 1, 2, 3, 3, 3, 4]) J = array([0, 3, 1, 2, 1, 3, 4, 4]) V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0]) b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5)) mmwrite(self.fn, b) assert_equal(mminfo(self.fn), (5, 5, 8, 'coordinate', 'real', 'general')) a = b.toarray() b = mmread(self.fn).toarray() assert_array_almost_equal(a, b)
def read_mm(): # ... leitura do arquivo da matrix file_in_a = '../data/retangulo_dif_4.mtx' file_in_b = '../data/retangulo_dif_4_b.mtx' # file_in_a = '../data/cilindro1_1_1.mtx' # file_in_b = '../data/cilindro1_1_1_b.mtx' # file_in_a = '../data/sist3.mtx' # file_in_b = '../data/sist3_b.mtx' aCoo = io.mmread(file_in_a) nla = int(io.mminfo(file_in_a)[0]) nca = int(io.mminfo(file_in_a)[1]) if nla != nca: print('Numero de linhas e colunas diferentes na matriz de coeficientes') print('Numero de linhas = {0}\nNumero de colunas'.format(nla,nca)) exit(0) neq = nla # ... leitura do arquivo do vetor de forcas b = io.mmread(file_in_b) nlb = int(io.mminfo(file_in_b)[0]) b = b.reshape((nlb,)) if nlb != neq: print('Numero de linhas no vertor de forcas incompativel') print('Numero de linhas = {0}\n'.format(nlb)) exit(0) print('**************************') print('numero de equacoes = {0}'.format(neq)) print('**************************') # convertendo para uma matriz cheia aDense= coo_matrix(aCoo,shape=(neq,neq)).toarray() return aDense, b, neq
def __init__(self, mat_path): """ """ #print ("WorkerIterativeLinearSystemSolver works good") Worker.__init__(self) self._hist_list = [] if mat_path == "NeedMatGeneration": """ Need to generatre matrix """ print("please call obj.matrix_generation(dim ,left_semi_bw, right_semi_bw, val_min, val_max)") else: self._mat_coo = io.mmread(mat_path) self._mat = self._mat_coo.tocsr() self._mat_info = io.mminfo(mat_path) print("Done reading matrix {}, Row:{}, Col:{}".format( mat_path, self._mat.shape[0], self._mat.shape[1])) print("mminfo:{}".format(self._mat_info)) if self._mat.getformat() == "csr": print("Yeah, it is CSR")
def test_complex_write_read(self): I = array([0, 0, 1, 2, 3, 3, 3, 4]) J = array([0, 3, 1, 2, 1, 3, 4, 4]) V = array([ 1.0 + 3j, 6.0 + 2j, 10.50 + 0.9j, 0.015 + -4.4j, 250.5 + 0j, -280.0 + 5j, 33.32 + 6.4j, 12.00 + 0.8j ]) b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5)) mmwrite(self.fn, b) assert_equal(mminfo(self.fn), (5, 5, 8, 'coordinate', 'complex', 'general')) a = b.toarray() b = mmread(self.fn).toarray() assert_array_almost_equal(a, b)
def process_file(file): """ Load a Matrix Market file. Arguments: file: a matrix market file of type .mtx Returns: None """ print("Reading file...") sparse_matrix = mmread(file) print("File read.") print( "File info: \n rows : {}, cols: {}, nonzeros: {}, format: {}, field: {}, symmetry: {}" .format(*mminfo(file))) adjacency_matrix = sparse_matrix.toarray() graph = construct_graph(adjacency_matrix) return adjacency_matrix, graph
def __init__(self, mat_path): """ """ #print ("WorkerIterativeLinearSystemSolver works good") Worker.__init__(self) self._hist_list = [] if mat_path == "": """ Need to generatre matrix """ print("calling self._matrix_generation") #self._mat = self._matrix_generation() else: self._mat_coo = io.mmread(mat_path) self._mat = self._mat_coo.tocsr() self._mat_info = io.mminfo(mat_path) print("Done reading matrix {}, Row:{}, Col:{}".format( mat_path, self._mat.shape[0], self._mat.shape[1])) print("mminfo:{}".format(self._mat_info)) if self._mat.getformat() == "csr": print("Yeah, it is CSR")
def process_file(file): """ Load a Matrix Market file. Arguments: file: a matrix market file of type .mtx Returns: adjacency_matrix: a sparse matrix of shape (number of nodes, number of nodes) graph: networkx graph object of the adjacency matrix """ print("Reading file...") sparse_matrix = mmread(file) print("File read.") print( "File info: rows : {}, cols: {}, nonzeros: {}, format: {}, field: {}, symmetry: {}" .format(*mminfo(file))) adjacency_matrix = sparse_matrix.toarray() graph = construct_graph(adjacency_matrix) return adjacency_matrix, graph
def run_scipy(files): exp_times = {} log_dir = os.path.join(os.getcwd(), args.out_dir, args.exp, datetime.now().strftime("%Y%m%d-%H%M%S")) for f in files: print("Processing :", f) fsparse = mmread(f) finfo = mminfo(f) print(finfo) if (finfo[3] != 'coordinate'): continue csr = fsparse.tocsr() v = np.random.uniform(finfo[0]) #0:rows iter_times = [] for i in range(args.iters): start = time.time_ns() res = csr.dot(v) delta = (time.time_ns() - start) iter_times.append(delta / (10**6)) exp_times[os.path.basename(f)] = iter_times df = pd.DataFrame.from_dict(exp_times, orient="index") df.to_csv(os.path.join(logdir, "results.csv"))
def time_spsolve(mat_path, rpt, num): '''Dummy docstring''' # Preliminary steps for spsolve # A and B should be global for timeit to see them, since timeit only sees # variables from __main__. It is possible to use lambda or partial, but this # is simpler global A, B #import the sparse matrix from MatrixMarket format matmark = sio.mmread(mat_path) A = ss.csr_matrix(matmark) # Nice formatting info = sio.mminfo(mat_path) info_arr = np.array(info).flatten() print( mat_path, '\t\t', '{: <8} {: <8} {: <10} {: <15} {: <8} {: <10}'.format(*info_arr), end='\t') # Create an array with random values, to be used as B in the system AX=B B = sst.uniform.rvs(size=A.shape[0]) # time spsolve with timeit setup_code = '''from __main__ import A, B from scipy.sparse.linalg import spsolve''' test_code = 'spsolve(A, B)' # the number of executions <number> and times <repeat> these executions are # repeated can be configured. bench = timeit.Timer(setup=setup_code, stmt=test_code) times = bench.repeat(repeat=rpt, number=num) # The most sensible value to show is the minimum result, since all the rest # are greater due to interferences with other processes of the system. # Therefore, the average proccessing time is estimated by dividing the value # corresponding to the minimum test by the number of executions per test. print(min(times)/num)
if args.her: supdiag = -1.0 - 2.j rhs_mid = 0.0 + 0.j rhs_1 = -1.0 + 3.j else: supdiag = -1. + 2.j rhs_mid = -4.0 + 4.j rhs_1 = 3.0 - 1.j rhs_n = -1. + 3.j else: parser.print_help() raise #definition of the sparse matrix if args.matrix: (n, m, nnz, form, arith, issym) = mminfo(args.matrix) n = int(n) localn = n / nprocs first = 1 + myrank * localn + min(myrank, n % nprocs) if n % nprocs < myrank: localn = localn + 1 loc2glob = np.array([x + first for x in range(localn)], dtype=pastix.integerType) n = int(n) A = mmread(args.matrix) rhs = [1] * n rhs = A * rhs print arith
def main(argv): myArgv=(['-autovalor','-autovalorI','-condNumber' ,'-norm' ,'-matvec' ,'-svd' ,'-svdI']) #checando os argumentos nArgs = len(argv) if nArgs < 4: sys.stderr.write("Usage: %s\nOpecoes:\n" % argv[0]) for arg in myArgv: print arg,"FileIn","FileOut" return 1 #...................................................................... fileIn = argv[2] fileOut = argv[3] # ... leitura do arquivo aCoo = io.mmread(fileIn) nl = int(io.mminfo(fileIn)[0]) nc = int(io.mminfo(fileIn)[1]) a = coo_matrix(aCoo,shape=(4,4)).todense() #escolhendo a opcao de execucao #autovalor if argv[1] == '-autovalor': timeIn = tm.time() # ... MyM.autovalor(a,'false',fileOut) # ........................................................... timeOut = tm.time() print timeOut - timeIn # ........................................................... #autovalorI elif argv[1] == '-autovalorI': timeIn = tm.time() # ... MyM.autovalor(a,'true',fileOut) # ........................................................... timeOut = tm.time() print timeOut - timeIn #numero de condicionamento elif argv[1] == '-condNumber': timeIn = tm.time() # ... MyM.condNumber(a,'true',fileOut) # ........................................................... timeOut = tm.time() print 'time:', timeOut - timeIn # ........................................................... #norma da matriz elif argv[1] == '-norm': timeIn = tm.time() # ... MyM.norm(a,2,nl,'false',fileOut) # ........................................................... timeOut = tm.time() print timeOut - timeIn #...................................................................... #matvec elif argv[1] == '-matvec': timeIn = tm.time() # ... MyM.matVec(a,nl,'true',fileOut) # ........................................................... timeOut = tm.time() print timeOut - timeIn #...................................................................... #svd elif argv[1] == '-svd': timeIn = tm.time() # ... MyM.svd(a,'false',fileOut) # ........................................................... timeOut = tm.time() print timeOut - timeIn #...................................................................... #svdI elif argv[1] == '-svdI': timeIn = tm.time() # ... MyM.svd(a,'true',fileOut) # ........................................................... timeOut = tm.time() print 'time:', timeOut - timeIn #...................................................................... else: sys.stderr.write("Usage: %s\nOpecoes:\n" % argv[0]) for arg in myArgv: print arg,"FileIn","FileOut" return 1
def readMatrix(filename, form, showProgress=False): HERCMATRIX = libHercMatrix.hercMatrix() logging.info("reading matrix {0} in format {1}".format(filename, form)) if (form == 'hercm') or (form == 'bxf'): # TODO: exception handling HERCMATRIX = libBXF.read(filename) elif form == 'mtx': from scipy import io from scipy.sparse import csr_matrix from numpy import array # reads in an MTX file and converts it to hercm try: if showProgress: print("reading data from file...") rawMatrix = scipy.sparse.coo_matrix(scipy.io.mmread(filename)) if 'symmetric' in io.mminfo(filename): HERCMATRIX.symmetry = "SYM" else: HERCMATRIX.symmetry = "ASYM" hercm = {} # needed to generate verification if showProgress: print("generating header data..") hercm['val'] = rawMatrix.data hercm['col'] = rawMatrix.col.tolist() hercm['row'] = rawMatrix.row.tolist() (matrixWidth, matrixHeight) = rawMatrix.shape HERCMATRIX.height = int(matrixHeight) HERCMATRIX.width = int(matrixWidth) vs = libBXF.generateVerificationSum(hercm) HERCMATRIX.verification = vs HERCMATRIX.remarks = [] # I'm not sure why has to be hard... # http://stackoverflow.com/questions/26018781/numpy-is-it-possible-to-preserve-the-dtype-of-columns-when-using-column-stack if showProgress: print("preparing matrix data...") val = numpy.asarray(hercm['val'], dtype='float64') col = numpy.asarray(hercm['col'], dtype='int32') row = numpy.asarray(hercm['row'], dtype='int32') val = numpy.rec.array(val, dtype=[(('val'), numpy.float64)]) col = numpy.rec.array(col, dtype=[(('col'), numpy.int32)]) row = numpy.rec.array(row, dtype=[(('row'), numpy.int32)]) HERCMATRIX.elements = append_fields(row, 'col', col, usemask=False, dtypes=[numpy.int32]) HERCMATRIX.elements = append_fields(HERCMATRIX.elements, 'val', val, usemask=False, dtypes=[numpy.float64]) HERCMATRIX.nzentries = len(HERCMATRIX.elements['val']) HERCMATRIX.verification = libBXF.generateVerificationSum( HERCMATRIX) if showProgress: print("finished reading matrix") except IOError as e: # make sure the file exists and is readable logging.warning("(lsc-480) could not open matrix file") raise IOError("could not open matrix file for writing...", str(e)) elif form == 'mat': # matlab matrices from scipy import io from scipy import sparse from numpy import array try: rawMatrix = scipy.sparse.coo_matrix( scipy.io.loadmat(filename)['matrix']) hercm = {} # needed to generate verification hercm['val'] = rawMatrix.data hercm['col'] = rawMatrix.col.tolist() hercm['row'] = rawMatrix.row.tolist() (matrixWidth, matrixHeight) = rawMatrix.shape HERCMATRIX.height = int(matrixHeight) HERCMATRIX.width = int(matrixWidth) vs = libBXF.generateVerificationSum(hercm) HERCMATRIX.verification = vs HERCMATRIX.remarks = [] # I'm not sure why has to be hard... # http://stackoverflow.com/questions/26018781/numpy-is-it-possible-to-preserve-the-dtype-of-columns-when-using-column-stack val = numpy.asarray(hercm['val'], dtype='float64') col = numpy.asarray(hercm['col'], dtype='int32') row = numpy.asarray(hercm['row'], dtype='int32') val = numpy.rec.array(val, dtype=[(('val'), numpy.float64)]) col = numpy.rec.array(col, dtype=[(('col'), numpy.int32)]) row = numpy.rec.array(row, dtype=[(('row'), numpy.int32)]) HERCMATRIX.elements = append_fields(row, 'col', col, usemask=False, dtypes=[numpy.int32]) HERCMATRIX.elements = append_fields(HERCMATRIX.elements, 'val', val, usemask=False, dtypes=[numpy.float64]) HERCMATRIX.nzentries = len(HERCMATRIX.elements['val']) if HERCMATRIX.checkSymmetry(): HERCMATRIX.symmetry = 'SYM' HERCMATRIX.verification = libBXF.generateVerificationSum( HERCMATRIX) except IOError as e: # make sure the file exists and is readable logging.warning("(lsc-536)could not open matrix file") raise IOError("could not open matrix file for writing...", str(e)) elif form == 'valcol': HERCMATRIX = libValcolIO.read(filename) else: logging.warning("(lsc-545) format {0} is not valid".format(form)) if showProgress: print("converting matrix to row-major...") logging.info("converting matrix to row-major") HERCMATRIX.makeRowMajor() if showProgress: print("matrix is now row major") if HERCMATRIX.symmetry == 'SYM': logging.info("matrix is symmetric, truncating upper triangle") if showProgress: print("matrix is symmetric, truncating upper triangle...") HERCMATRIX.makeSymmetrical('truncate') if showProgress: print("upper triangle truncated") return HERCMATRIX
import numpy as np import scipy.linalg as spla help(spla.det) import scipy.io as spio help(spio.mminfo) %cd "C:\Users\Administrador\Desktop\Clase2" spio.mminfo("mahindas.mtx") mahindas = spio.mmread("mahindas.mtx") type(mahindas) mahindas.shape mahindas_Densa=mahindas.todense() mahindas_Densa[1:10,1:10] spla.inv(mahindas_Densa) spla.det(mahindas_Densa) def gen_ex(d0): x = np.random.randn(d0,d0) return x.T + x mat1 =gen_ex(10**3)
#print(sys.argv) print("Reading graph!") filename = sys.argv[1] G = mmread(filename) graph = nx.Graph(G) print("Done!") #random.seed(1) from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score if sys.argv[2] == "1": print("Running native...", mminfo(filename)[0]) X = readEmbeddings(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) elif sys.argv[2] == "3": print("Running native...", mminfo(filename)[0]) X = readEmbeddingsHOPE(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) elif sys.argv[2] == "4": X = readEmbeddingsROLX(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) elif sys.argv[2] == "5": X = readEmbeddingsHARP(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) else: print("Running binnative...", mminfo(filename)[0]) X = readBinEmbeddings(sys.argv[3], int(sys.argv[4])) truelabfile = sys.argv[5] print("Making prediction data!!")
# 収束判定 if np.linalg.norm(new_x_diff) <= (rtol * np.linalg.norm(old_x) + atol): break old_x += new_x_diff return times, old_x # Matrix Market形式ファイルの読み込み # mtx_filename = 'bcsstm22' # n = 138 mtx_filename = 'memplus' # n = 17758 mtx_filename_full = mtx_filename + '.mtx' spmat_info = scio.mminfo(mtx_filename_full) print('spmat_info = ', spmat_info) row_dim = spmat_info[0] col_dim = spmat_info[1] # COO形式 spmat_a_coo = scio.mmread(mtx_filename_full) print('spmat_a = ', spmat_a_coo) # COO -> CSR spmat_a = spmat_a_coo.tocsr() # x = [1, 2, ..., n] true_x = np.array(np.arange(1, col_dim + 1)) print('true_x = ', true_x)
# -*- coding: utf-8 -*- import scipy.io as sio import numpy as np import sys argvs = sys.argv arglen = len(argvs) if arglen != 2: print 'Usage: python %s [MatrixMarketfile]' % argvs[0] quit() target = argvs[1] (sizex, sizey, nnz, format, field, symmerty) = sio.mminfo(target) mat = sio.mmread(target) ftype = mat.getformat() min_all = mat.min() max_all = mat.max() nnz = mat.getnnz() nnz_row = mat.getnnz(1) min_nnz = min(nnz_row) max_nnz = max(nnz_row) ave_nnz = np.average(nnz_row) zero = float(sizex*sizex-nnz)/float(sizex*sizex)*100.0 print "対称性 = %s" % (symmerty) print "格納形式 = %s" % (ftype) if symmerty == 'symmetric': print "次元数 = %d" % (sizex*sizex) else : print "X-サイズ = %d" % (sizex) print "Y-サイズ = %d" % (sizey)
supdiag = -1.0-2.j rhs_mid=0.0+ 0.j rhs_1=-1.0+3.j else: supdiag = -1.+2.j rhs_mid=-4.0+ 4.j rhs_1=3.0-1.j rhs_n=-1. + 3.j else: parser.print_help() raise #definition of the sparse matrix if args.matrix: (n, m, nnz, form, arith, issym) = mminfo(args.matrix) n = int(n) A = mmread(args.matrix) rhs = [1] * n rhs = A*rhs dtype=np.dtype(A) if issym == u"symmetric": args.sym = True elif issym == u"hermitian": args.her = True else: args.sym = False else: A = lil_matrix((n, n), dtype=dtype) A[0,0] = diag
plt.axis('off') plt.savefig(algo1 + '_vis.pdf') filename = sys.argv[1] G = mmread(filename) graph = nx.Graph(G) from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score from sklearn import metrics if sys.argv[2] == "1": print("Running native...") X = readEmbeddings(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) else: X = readEmbeddingsHARP(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) labs, l, gy = readgroundtruth(sys.argv[5], mminfo(filename)[0]) algoname = sys.argv[6] print("Running TSNE") if int(sys.argv[4]) == 2: print("Direct 2D Visualization") X_f = X else: print("Visualization by TSNE") X_embedded = TSNE(n_components=2).fit_transform(X) X_f = X_embedded
def check(self, a, info): mmwrite(self.fn, a) assert_equal(mminfo(self.fn), info) b = mmread(self.fn) assert_array_almost_equal(a, b)
def check_exact(self, a, info): mmwrite(self.fn, a) assert_equal(mminfo(self.fn), info) b = mmread(self.fn) assert_equal(a, b)
import numpy as np import scipy.io as sio import math import subprocess np.set_printoptions(threshold='nan') from PIL import Image matrices = open("matrices_square.txt", 'r') for line in matrices: name = line.rstrip().split(' ') #directory = "mcca.mtx" directory = "/home/matrices/mtx/" + name[0] + "/" + name[1] + "/" + name[ 1] + ".mtx" mtx = sio.mmread(directory) mtx_row = sio.mminfo(directory)[0] mtx_col = sio.mminfo(directory)[1] print name[1] # if mtx_row <= 1000: # coef = 1 # elif 1000 < mtx_row <= 5000: # coef = 5 # elif 5000 < mtx_row <= 10000: # coef = 10 # elif 10000 < mtx_row <= 50000: # coef = 50 # elif 50000 < mtx_row <= 100000: # coef = 100 # elif 100000 < mtx_row <= 500000: # coef = 500 # elif 500000 < mtx_row <= 1000000:
def Dataset_Load(file_name): dataset_info = spio.mminfo(file_name) print('Load dataset_info:', dataset_info) dataset_coomatrix = spio.mmread(file_name) return dataset_coomatrix
def main(argv): #checando os argumentos nArgs = len(argv) if nArgs < 3: sys.stderr.write("Usage: %s\nOpecoes:\n" % argv[0]) print("FileIn", "FileOut") return 1 #...................................................................... fileIn = argv[1] fileOut = argv[2] # ... leitura do arquivo aCoo = io.mmread(fileIn) nl = int(io.mminfo(fileIn)[0]) nc = int(io.mminfo(fileIn)[1]) # aDense = aCoo.todense() # aDense = aCoo.todense() # evalsAll, evecsAll = eigh(aDense) # print evalsAll # return # convertendo COO para CSR timeConv = tm.time() aCsr = aCoo.tocsr() timeConv = tm.time() - timeConv #...................................................................... maxit = 3500 tol = 1.e-15 lancozVector = 20 #Maior autovalor eighLarge = tm.time() evalsLarge, evecsLarge = eigsh(aCsr, 1, ncv=lancozVector, which='LA', tol=tol, maxiter=maxit) eighLarge = tm.time() - eighLarge print(evalsLarge) # erro = abs(evalsLarge[0] - evalsAll[nl-1])/evalsAll[nl-1] # print evalsLarge[0],evalsAll[nl-1],erro #...................................................................... #Menor autovalor eighSmall = tm.time() evalsSmall, evecsSmall = eigsh(aCsr, 1, ncv=lancozVector, which='LA', sigma=0, tol=tol, maxiter=maxit) eighSmall = tm.time() - eighSmall print(evalsSmall) # erro = abs(evalsSmall[0] - evalsAll[0])/evalsAll[0] # print evalsSmall[0],evalsAll[0],erro #...................................................................... print("NUmero de equacoes %d" % nl) print("Maior autovalor %0.6e" % evalsLarge[0]) print("Menor autovalor %0.6e" % evalsSmall[0]) print("Condicio number %0.6e" % (evalsLarge[0] / evalsSmall[0])) print("Time(Maior): ", eighLarge) print("Time(Menor): ", eighSmall) # ..................................................................... f = open(fileOut, "w") f.write("Numero de equacoes %d\n" % nl) f.write("Maior autovalor %.6e\n" % evalsLarge[0]) f.write("Menor autovalor %.6e\n" % evalsSmall[0]) f.write("Condicio number %.6e\n" % (evalsLarge[0] / evalsSmall[0])) f.close()
def create_subsampled_matrix(large_matrix_location, gene_file=None, MAX_CELLS_COUNT=MAX_CELLS_COUNT): mtx_info = mminfo(large_matrix_location) num_rows = mtx_info[0] num_cols = mtx_info[1] num_cells = num_cols if gene_file is not None: gene_list = load_genes( gene_file, delimiter='\t' if gene_file.endswith('tsv') else None, skip_rows=1 if gene_file.endswith('tsv') else 0) if num_rows == len(gene_list): bioblocks_log('Genes are rows, Cells are cols') num_cells = num_cols sample_rows = False else: bioblocks_log('Cells are rows, Genes are cols') num_cells = num_rows sample_rows = True else: gene_list = [''] subsampled_indices = random.sample(range(1, num_cells + 1), MAX_CELLS_COUNT) subsampled_indices.sort() if (num_cells > MAX_CELLS_COUNT): subsampled_matrix_location = '{}_sub.mtx'.format( large_matrix_location[0:len(large_matrix_location) - 4]) with open(subsampled_matrix_location, 'w') as subsampled_matrix: with open(large_matrix_location) as large_matrix: matrix_header = large_matrix.readline() num_entries = 0 output_lines = [] line = large_matrix.readline() while line: line = large_matrix.readline() matrix_index = -1 if sample_rows is True and line.split(' ')[0].isnumeric(): matrix_index = int(line.split(' ')[0]) num_entries += 1 elif len(line.split(' ')) >= 2 and line.split( ' ')[1].isnumeric(): matrix_index = int(line.split(' ')[1]) num_entries += 1 bisect_index = bisect_left(subsampled_indices, matrix_index) if bisect_index != len( subsampled_indices ) and subsampled_indices[bisect_index] == matrix_index: output_lines.append(line) if sample_rows is True: matrix_header = '{}{}'.format( '{} {} {}'.format(MAX_CELLS_COUNT, len(gene_list), num_entries), matrix_header) else: matrix_header = '{}{}'.format( matrix_header, '{} {} {}'.format(len(gene_list), MAX_CELLS_COUNT, num_entries)) subsampled_matrix.write(matrix_header) for line in output_lines: subsampled_matrix.write(line) os.remove(large_matrix_location) os.rename(subsampled_matrix_location, large_matrix_location)
def SSOR(filename): fileinfo = sio.mminfo(filename) A = ssp.csr_matrix(sio.mmread(filename)) print(fileinfo.__str__()) b = random.rand(A.shape[0]) starttime = datetime.datetime.now() x_direct, info_direct = linalg.cg(A, b) endtime_d = datetime.datetime.now() print("the directing time cost is %f" % ((endtime_d - starttime).microseconds/1000)) print("the iterations is %d" % info_direct) D = ssp.diags(A.diagonal(), 0) # print("===================D") # print(D) w = 0.5 D_like = (1/w)*D L = ssp.tril(A) D_inv = D_like for i in range(0, len(D_inv.data)): D_inv.data[i] = 1/D_inv.data[i] # print("===================D_inv") # print(D_inv) tmp = D_inv*L D_inv2 = D_inv for i in range(0, len(D_inv.data)): D_inv2.data[i] **= 0.5 # one K = math.sqrt(2-w)*(D_inv2 - D_inv2*L*D_inv) # two # K = math.sqrt(2-w)*(D_inv2 - D_inv2*L*D_inv + D_inv2*L*D_inv*L*D_inv) K_T = K.T M = K_T*K a = 0.5 print("when a=%f," % a) for row in range(0, len(M.indptr)-1): maxA = abs(A.data[A.indptr[row]]) for indexA in range(A.indptr[row], A.indptr[row+1]): if abs(A.data[indexA]) > maxA: maxA = abs(A.data[indexA]) for indexM in range(M.indptr[row], M.indptr[row+1]): if abs(M.data[indexM]) <= (1-a)*maxA: M.data[indexM] = 0 A_like = A*M x, info = linalg.cg(A_like, b) starttime_p = datetime.datetime.now() x, info = linalg.cg(A_like, b) endtime = datetime.datetime.now() print("the SSOR time cost is %f" % (endtime - starttime_p).seconds) print("the iterations is %d" % info)
def check_issparse(f): finfo = mminfo(f) if (finfo[3] == 'coordinate'): return True else: return False
print(len(Xd), len(Yd), count) return Xd, Yd #print(sys.argv) filename = sys.argv[1] G = mmread(filename) graph = nx.Graph(G) #random.seed(1) from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score if sys.argv[2] == "1": print("Running native...") X = readEmbeddings(sys.argv[3], mminfo(filename)[0], int(sys.argv[4])) else: X = readBinEmbeddings(sys.argv[3], int(sys.argv[4])) truelabfile = sys.argv[5] Xt, Yt = makeLinkPredictionData(graph, X, "hadamard") ltrainfrac = [0.50] for tf in ltrainfrac: CV = int(len(Yt) * tf) trainX = Xt[0:CV] testX = Xt[CV:] trainY = Yt[0:CV] testY = Yt[CV:] modelLR = LogisticRegression().fit(trainX, trainY) predictedY = modelLR.predict(testX)
def run_spring_analysis(dataset_dir, dataset_id, dataset, MAX_CELLS_COUNT, tmp_dir): mtx_file = '{}/matrix/matrix.mtx'.format(dataset_dir) gene_file = '{}/matrix/genes.tsv'.format(dataset_dir) bioblocks_log('mtx_file = {}'.format(mtx_file)) mtx_info = mminfo(mtx_file) num_rows = mtx_info[0] num_cols = mtx_info[1] bioblocks_log(mtx_info) gene_list = load_genes(gene_file, delimiter='\t' if gene_file.endswith('tsv') else None, skip_rows=1 if gene_file.endswith('tsv') else 0) if num_rows == len(gene_list): bioblocks_log('Genes are rows, Cells are cols') num_cells = num_cols sample_rows = False else: bioblocks_log('Cells are rows, Genes are cols') num_cells = num_rows sample_rows = True if num_cells > MAX_CELLS_COUNT: bioblocks_log('mtx_file: {}'.format(mtx_file)) create_subsampled_matrix(mtx_file, gene_file, MAX_CELLS_COUNT) num_cells = MAX_CELLS_COUNT subsample_ranges = get_cell_subsample_ranges(num_cells) bioblocks_log('Attempting to run SPRING with subsample ranges {}'.format(subsample_ranges)) for subsample_range in subsample_ranges: analysis_id = str(uuid.uuid4()) start_time = datetime.utcnow() bioblocks_log('Starting SPRING analysis \'{}\' for dataset \'{}\''.format( analysis_id, dataset_id)) main_dir = '{}/analyses/{}'.format(dataset_dir, analysis_id) spring_load_preprocess.run_spring_preprocessing( mtx_file=mtx_file, gene_file=gene_file, cell_labels_file='{}/matrix/cells.tsv'.format( dataset_dir), main_dir=main_dir, subplot_name=dataset['name'], sample_rows=sample_rows, subsample_range=subsample_range, num_cells=num_cells ) try: dataset['_etag'] = patch_matrix_info_for_dataset(dataset, mtx_info, mtx_file) analysis = { '_id': analysis_id, 'process_type': 'SPRING', 'name': '{} - {}'.format(dataset['name'], get_numeric_shorthand_suffix(subsample_range)) } post_bioblocks_analysis(analysis) dataset['_etag'] = patch_analysis_for_dataset(dataset, analysis_id) except Exception as e: bioblocks_log('Error with compression of matrix file: {}'.format(e)) return try: # bioblocks_log('Compressing file: {}'.format(mtx_file)) # with open(mtx_file, 'rb') as f_in: # with gzip.open('{}.gz'.format(mtx_file), 'wb') as f_out: # shutil.copyfileobj(f_in, f_out) # bioblocks_log('Finished compressing file: {}'.format(mtx_file)) delete_directory('{}/{}'.format(dataset_dir, tmp_dir)) os.remove(mtx_file) except Exception as e: bioblocks_log('Error with cleanup of matrix file: {}'.format(e)) end_time = datetime.utcnow() bioblocks_log('Finished SPRING analysis \'{}\' for dataset \'{}\'. Duration: {}'.format( analysis_id, dataset_id, end_time - start_time))
from collections import OrderedDict import pandas as pd import networkx as nx from bokeh.plotting import * from math import floor from stacked_graph import * try: import matplotlib.pyplot as plt except: raise ############ Read in matrix, find basic stats ########### output_file("read.html") mat_name = 'ca-AstroPh.mtx' mat_info = sio.mminfo(mat_name) mat_n = mat_info[0] mat_nnz = mat_info[2] print("matrix: {}".format(mat_name)) print("size: {} x {}".format(mat_n, mat_n)) print("nonzeros: {} (density: {})".format(mat_nnz, round(float(mat_nnz) / (mat_n * mat_n),6))) print("nnz type: {} {}".format(mat_info[5], mat_info[4])) assert(mat_n == mat_info[1]) print("reading matrix ..."), A = sio.mmread(mat_name) print("done\nconverting matrix to csr ..."), B = A A = A.tocsr() print("done") G = nx.to_networkx_graph(A) numsamples = mat_n #number of samples to take