Example #1
0
    def __init__(self, inputFile, outputFile):
        """
        Function that take big coo_matrix file on input, read it, eliminate the data and classify the groups from data.

        :param inputFile: file with coo_matrix
        :param outputFile: name of text file to store the labels of clusters
        """

        print(mminfo(inputFile))
        data = mmread(inputFile)
        x, y = data.shape
        newRow = []
        newCol = []
        newData = []
        # eliminate the data
        for row, col, data in zip(data.row, data.col, data.data):
            if (col < 2500):
                newRow.append(row)
                newCol.append(col)
                newData.append(1)
        cleaned_data = coo_matrix((newData, (newRow, newCol)), shape=(x, 2500))
        del data
        kmeans = KMeans(n_clusters=45).fit(cleaned_data)
        np.savetxt(outputFile,
                   kmeans.labels_,
                   fmt='%d',
                   delimiter='\n',
                   newline='\n')
        print('finish')
        pass
Example #2
0
 def check_read(self, example, a, info):
     f = open(self.fn, 'w')
     f.write(example)
     f.close()
     assert_equal(mminfo(self.fn), info)
     b = mmread(self.fn).toarray()
     assert_array_almost_equal(a, b)
Example #3
0
 def test_simple_pattern(self):
     a = scipy.sparse.csr_matrix([[0, 1.5], [3.0, 2.5]])
     p = np.zeros_like(a.toarray())
     p[a.toarray() > 0] = 1
     info = (2, 2, 3, 'coordinate', 'pattern', 'general')
     mmwrite(self.fn, a, field='pattern')
     assert_equal(mminfo(self.fn), info)
     b = mmread(self.fn)
     assert_array_almost_equal(p, b.toarray())
Example #4
0
 def check_read(self, example, a, info, dense, over32, over64):
     with open(self.fn, 'w') as f:
         f.write(example)
     assert_equal(mminfo(self.fn), info)
     if (over32 and (np.intp(0).itemsize < 8)) or over64:
         assert_raises(OverflowError, mmread, self.fn)
     else:
         b = mmread(self.fn)
         if not dense:
             b = b.toarray()
         assert_equal(a, b)
Example #5
0
    def test_empty_write_read(self):
        # https://github.com/scipy/scipy/issues/1410 (Trac #883)

        b = scipy.sparse.coo_matrix((10, 10))
        mmwrite(self.fn, b)

        assert_equal(mminfo(self.fn),
                     (10, 10, 0, 'coordinate', 'real', 'symmetric'))
        a = b.toarray()
        b = mmread(self.fn).toarray()
        assert_array_almost_equal(a, b)
Example #6
0
def preProcessBenchmark(benchDirPath):
    entries = []
    for f in os.listdir(benchDirPath):
        info = io.mminfo(os.path.join(benchDirPath, f))
        if info[0] == info[1]:
            info = list(info[1:])
        info.append(info[1] / info[0])
        info.insert(0, f.replace(r'.mtx', ''))
        info[1] = int(info[1])
        info[2] = int(info[2])
        entries.append(info)
    return sorted(entries, key=lambda x: x[-1], reverse=True)
Example #7
0
def preProcessBenchmark(benchDirPath):
  entries = []
  for f in os.listdir(benchDirPath):
    info = io.mminfo(os.path.join(benchDirPath, f))
    if info[0] == info[1]:
      info = list(info[1:])
    info.append(info[1] / info[0])
    info.insert(0, f.replace(r'.mtx', ''))
    info[1] = int(info[1])
    info[2] = int(info[2])
    entries.append(info)
  return sorted(entries, key=lambda x : x[-1], reverse=True)
Example #8
0
    def test_real_write_read(self):
        I = array([0, 0, 1, 2, 3, 3, 3, 4])
        J = array([0, 3, 1, 2, 1, 3, 4, 4])
        V = array([1.0, 6.0, 10.5, 0.015, 250.5, -280.0, 33.32, 12.0])

        b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))

        mmwrite(self.fn, b)

        assert_equal(mminfo(self.fn),
                     (5, 5, 8, 'coordinate', 'real', 'general'))
        a = b.toarray()
        b = mmread(self.fn).toarray()
        assert_array_almost_equal(a, b)
Example #9
0
def read_mm():

  # ... leitura do arquivo da matrix
  file_in_a = '../data/retangulo_dif_4.mtx'
  file_in_b = '../data/retangulo_dif_4_b.mtx'
#  file_in_a = '../data/cilindro1_1_1.mtx'
#  file_in_b = '../data/cilindro1_1_1_b.mtx'
#  file_in_a = '../data/sist3.mtx'
#  file_in_b = '../data/sist3_b.mtx'

  aCoo  = io.mmread(file_in_a)
  nla    = int(io.mminfo(file_in_a)[0])
  nca    = int(io.mminfo(file_in_a)[1])
  if nla != nca:
    print('Numero de linhas e colunas diferentes na matriz de coeficientes')
    print('Numero de linhas = {0}\nNumero de colunas'.format(nla,nca))
    exit(0)
  neq = nla

  # ... leitura do arquivo do vetor de forcas
  b      = io.mmread(file_in_b)
  nlb    = int(io.mminfo(file_in_b)[0])
  b      = b.reshape((nlb,))
  if nlb != neq:
    print('Numero de linhas no vertor de forcas incompativel')
    print('Numero de linhas = {0}\n'.format(nlb))
    exit(0)

  print('**************************')
  print('numero de equacoes = {0}'.format(neq))
  print('**************************')

  # convertendo para uma matriz cheia
  aDense= coo_matrix(aCoo,shape=(neq,neq)).toarray()

  return aDense, b, neq
Example #10
0
    def __init__(self, mat_path):
        """ """
        #print ("WorkerIterativeLinearSystemSolver works good")
        Worker.__init__(self)
        self._hist_list = []

        if mat_path == "NeedMatGeneration":
            """ Need to generatre matrix """
            print("please call obj.matrix_generation(dim ,left_semi_bw, right_semi_bw, val_min, val_max)")
        else:
            self._mat_coo = io.mmread(mat_path)
            self._mat = self._mat_coo.tocsr()
            self._mat_info = io.mminfo(mat_path)
            print("Done reading matrix {}, Row:{}, Col:{}".format( mat_path, self._mat.shape[0], self._mat.shape[1]))
            print("mminfo:{}".format(self._mat_info))
            if self._mat.getformat() == "csr":
                print("Yeah, it is CSR")
Example #11
0
    def __init__(self, mat_path):
        """ """
        #print ("WorkerIterativeLinearSystemSolver works good")
        Worker.__init__(self)
        self._hist_list = []

        if mat_path == "NeedMatGeneration":
            """ Need to generatre matrix """
            print("please call obj.matrix_generation(dim ,left_semi_bw, right_semi_bw, val_min, val_max)")
        else:
            self._mat_coo = io.mmread(mat_path)
            self._mat = self._mat_coo.tocsr()
            self._mat_info = io.mminfo(mat_path)
            print("Done reading matrix {}, Row:{}, Col:{}".format( mat_path, self._mat.shape[0], self._mat.shape[1]))
            print("mminfo:{}".format(self._mat_info))
            if self._mat.getformat() == "csr":
                print("Yeah, it is CSR")
Example #12
0
    def test_complex_write_read(self):
        I = array([0, 0, 1, 2, 3, 3, 3, 4])
        J = array([0, 3, 1, 2, 1, 3, 4, 4])
        V = array([
            1.0 + 3j, 6.0 + 2j, 10.50 + 0.9j, 0.015 + -4.4j, 250.5 + 0j,
            -280.0 + 5j, 33.32 + 6.4j, 12.00 + 0.8j
        ])

        b = scipy.sparse.coo_matrix((V, (I, J)), shape=(5, 5))

        mmwrite(self.fn, b)

        assert_equal(mminfo(self.fn),
                     (5, 5, 8, 'coordinate', 'complex', 'general'))
        a = b.toarray()
        b = mmread(self.fn).toarray()
        assert_array_almost_equal(a, b)
Example #13
0
def process_file(file):
    """
    Load a Matrix Market file.
    Arguments:
        file: a matrix market file of type .mtx
    Returns:
        None
    """

    print("Reading file...")
    sparse_matrix = mmread(file)
    print("File read.")
    print(
        "File info: \n rows : {}, cols: {}, nonzeros: {}, format: {}, field: {}, symmetry: {}"
        .format(*mminfo(file)))
    adjacency_matrix = sparse_matrix.toarray()
    graph = construct_graph(adjacency_matrix)
    return adjacency_matrix, graph
Example #14
0
    def __init__(self, mat_path):
        """ """
        #print ("WorkerIterativeLinearSystemSolver works good")
        Worker.__init__(self)
        self._hist_list = []

        if mat_path == "":
            """ Need to generatre matrix """
            print("calling self._matrix_generation")
            #self._mat = self._matrix_generation()
        else:
            self._mat_coo = io.mmread(mat_path)
            self._mat = self._mat_coo.tocsr()
            self._mat_info = io.mminfo(mat_path)
            print("Done reading matrix {}, Row:{}, Col:{}".format( mat_path, self._mat.shape[0], self._mat.shape[1]))
            print("mminfo:{}".format(self._mat_info))
            if self._mat.getformat() == "csr":
                print("Yeah, it is CSR")
Example #15
0
def process_file(file):
    """
    Load a Matrix Market file.
    Arguments:
        file: a matrix market file of type .mtx
    Returns:
        adjacency_matrix: a sparse matrix of shape (number of nodes, number of nodes)
        graph: networkx graph object of the adjacency matrix
    """

    print("Reading file...")
    sparse_matrix = mmread(file)
    print("File read.")
    print(
        "File info: rows : {}, cols: {}, nonzeros: {}, format: {}, field: {}, symmetry: {}"
        .format(*mminfo(file)))
    adjacency_matrix = sparse_matrix.toarray()
    graph = construct_graph(adjacency_matrix)
    return adjacency_matrix, graph
Example #16
0
def run_scipy(files):
    exp_times = {}
    log_dir = os.path.join(os.getcwd(), args.out_dir, args.exp,
                           datetime.now().strftime("%Y%m%d-%H%M%S"))
    for f in files:
        print("Processing :", f)
        fsparse = mmread(f)
        finfo = mminfo(f)
        print(finfo)
        if (finfo[3] != 'coordinate'):
            continue
        csr = fsparse.tocsr()
        v = np.random.uniform(finfo[0])  #0:rows
        iter_times = []
        for i in range(args.iters):
            start = time.time_ns()
            res = csr.dot(v)
            delta = (time.time_ns() - start)
            iter_times.append(delta / (10**6))
        exp_times[os.path.basename(f)] = iter_times
        df = pd.DataFrame.from_dict(exp_times, orient="index")
        df.to_csv(os.path.join(logdir, "results.csv"))
Example #17
0
def time_spsolve(mat_path, rpt, num):
    '''Dummy docstring'''

    # Preliminary steps for spsolve
    # A and B should be global for timeit to see them, since timeit only sees
    # variables from __main__. It is possible to use lambda or partial, but this
    # is simpler
    global A, B
    #import the sparse matrix from MatrixMarket format
    matmark = sio.mmread(mat_path)
    A = ss.csr_matrix(matmark)

    # Nice formatting
    info = sio.mminfo(mat_path)
    info_arr = np.array(info).flatten()
    print(
        mat_path,
        '\t\t',
        '{: <8} {: <8} {: <10} {: <15} {: <8} {: <10}'.format(*info_arr),
        end='\t')

    # Create an array with random values, to be used as B in the system AX=B
    B = sst.uniform.rvs(size=A.shape[0])

    # time spsolve with timeit
    setup_code = '''from __main__ import A, B
from scipy.sparse.linalg import spsolve'''
    test_code = 'spsolve(A, B)'

    # the number of executions <number> and times <repeat> these executions are
    # repeated can be configured.
    bench = timeit.Timer(setup=setup_code, stmt=test_code)
    times = bench.repeat(repeat=rpt, number=num)

    # The most sensible value to show is the minimum result, since all the rest
    # are greater due to interferences with other processes of the system.
    # Therefore, the average proccessing time is estimated by dividing the value
    # corresponding to the minimum test by the number of executions per test.
    print(min(times)/num)
Example #18
0
    if args.her:
        supdiag = -1.0 - 2.j
        rhs_mid = 0.0 + 0.j
        rhs_1 = -1.0 + 3.j
    else:
        supdiag = -1. + 2.j
        rhs_mid = -4.0 + 4.j
        rhs_1 = 3.0 - 1.j
    rhs_n = -1. + 3.j
else:
    parser.print_help()
    raise

#definition of the sparse matrix
if args.matrix:
    (n, m, nnz, form, arith, issym) = mminfo(args.matrix)
    n = int(n)
    localn = n / nprocs

    first = 1 + myrank * localn + min(myrank, n % nprocs)

    if n % nprocs < myrank:
        localn = localn + 1

    loc2glob = np.array([x + first for x in range(localn)],
                        dtype=pastix.integerType)
    n = int(n)
    A = mmread(args.matrix)
    rhs = [1] * n
    rhs = A * rhs
    print arith
Example #19
0
def main(argv):
    myArgv=(['-autovalor','-autovalorI','-condNumber'
            ,'-norm'     ,'-matvec'     ,'-svd'
            ,'-svdI'])
#checando os argumentos
    nArgs = len(argv)
    if nArgs < 4:
        sys.stderr.write("Usage: %s\nOpecoes:\n" % argv[0])
        for arg in myArgv:
            print arg,"FileIn","FileOut"  
        return 1
#......................................................................
 
    fileIn  = argv[2]
    fileOut = argv[3]
# ... leitura do arquivo
    aCoo = io.mmread(fileIn)
    nl = int(io.mminfo(fileIn)[0])
    nc = int(io.mminfo(fileIn)[1])
    a    = coo_matrix(aCoo,shape=(4,4)).todense()
#escolhendo a opcao de execucao

#autovalor
    if argv[1] == '-autovalor':
      timeIn  = tm.time()
# ... 
      MyM.autovalor(a,'false',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print timeOut - timeIn
# ...........................................................

#autovalorI
    elif argv[1] == '-autovalorI':
      timeIn  = tm.time()
# ... 
      MyM.autovalor(a,'true',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print timeOut - timeIn
    
#numero de condicionamento    
    elif  argv[1] == '-condNumber':
      timeIn       = tm.time()
# ... 
      MyM.condNumber(a,'true',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print 'time:', timeOut - timeIn
# ...........................................................

#norma da matriz    
    elif  argv[1] == '-norm':
      timeIn  = tm.time()
# ... 
      MyM.norm(a,2,nl,'false',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print timeOut - timeIn
#......................................................................

#matvec    
    elif  argv[1] == '-matvec':
      timeIn  = tm.time()
# ... 
      MyM.matVec(a,nl,'true',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print timeOut - timeIn
#......................................................................

#svd       
    elif  argv[1] == '-svd':
      timeIn  = tm.time()
# ... 
      MyM.svd(a,'false',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print timeOut - timeIn
#......................................................................

#svdI       
    elif  argv[1] == '-svdI':
      timeIn  = tm.time()
# ... 
      MyM.svd(a,'true',fileOut)
# ...........................................................
      timeOut      = tm.time()
      print 'time:', timeOut - timeIn
#......................................................................

    else:
      sys.stderr.write("Usage: %s\nOpecoes:\n" % argv[0])
      for arg in myArgv:
        print arg,"FileIn","FileOut"  
      return 1
Example #20
0
def readMatrix(filename, form, showProgress=False):
    HERCMATRIX = libHercMatrix.hercMatrix()

    logging.info("reading matrix {0} in format {1}".format(filename, form))

    if (form == 'hercm') or (form == 'bxf'):
        # TODO: exception handling 
        HERCMATRIX = libBXF.read(filename)

    elif form == 'mtx':
        from scipy import io
        from scipy.sparse import csr_matrix
        from numpy import array

        # reads in an MTX file and converts it to hercm

        try:
            if showProgress:
                print("reading data from file...")

            rawMatrix = scipy.sparse.coo_matrix(scipy.io.mmread(filename))

            if 'symmetric' in io.mminfo(filename):
                HERCMATRIX.symmetry = "SYM"
            else:
                HERCMATRIX.symmetry = "ASYM"

            hercm = {}  # needed to generate verification

            if showProgress:
                print("generating header data..")
            hercm['val'] = rawMatrix.data
            hercm['col'] = rawMatrix.col.tolist()
            hercm['row'] = rawMatrix.row.tolist()
            (matrixWidth, matrixHeight) = rawMatrix.shape
            HERCMATRIX.height = int(matrixHeight)
            HERCMATRIX.width = int(matrixWidth)
            vs = libBXF.generateVerificationSum(hercm)
            HERCMATRIX.verification = vs
            HERCMATRIX.remarks = []

            # I'm not sure why  has to be hard...
            # http://stackoverflow.com/questions/26018781/numpy-is-it-possible-to-preserve-the-dtype-of-columns-when-using-column-stack

            if showProgress:
                print("preparing matrix data...")
            val = numpy.asarray(hercm['val'], dtype='float64')
            col = numpy.asarray(hercm['col'], dtype='int32')
            row = numpy.asarray(hercm['row'], dtype='int32')

            val = numpy.rec.array(val, dtype=[(('val'), numpy.float64)])
            col = numpy.rec.array(col, dtype=[(('col'), numpy.int32)])
            row = numpy.rec.array(row, dtype=[(('row'), numpy.int32)])

            HERCMATRIX.elements = append_fields(row,
                    'col',
                    col,
                    usemask=False,
                    dtypes=[numpy.int32])

            HERCMATRIX.elements = append_fields(HERCMATRIX.elements,
                    'val',
                    val,
                    usemask=False,
                    dtypes=[numpy.float64])

            HERCMATRIX.nzentries = len(HERCMATRIX.elements['val'])

            HERCMATRIX.verification = libBXF.generateVerificationSum(
                HERCMATRIX)

            if showProgress:
                print("finished reading matrix")

        except IOError as e:  # make sure the file exists and is readable
            logging.warning("(lsc-480) could not open matrix file")
            raise IOError("could not open matrix file for writing...",
                          str(e))

    elif form == 'mat':  # matlab matrices
        from scipy import io
        from scipy import sparse
        from numpy import array

        try:

            rawMatrix = scipy.sparse.coo_matrix(
                scipy.io.loadmat(filename)['matrix'])

            hercm = {}  # needed to generate verification

            hercm['val'] = rawMatrix.data
            hercm['col'] = rawMatrix.col.tolist()
            hercm['row'] = rawMatrix.row.tolist()
            (matrixWidth, matrixHeight) = rawMatrix.shape
            HERCMATRIX.height = int(matrixHeight)
            HERCMATRIX.width = int(matrixWidth)
            vs = libBXF.generateVerificationSum(hercm)
            HERCMATRIX.verification = vs
            HERCMATRIX.remarks = []

            # I'm not sure why  has to be hard...
            # http://stackoverflow.com/questions/26018781/numpy-is-it-possible-to-preserve-the-dtype-of-columns-when-using-column-stack

            val = numpy.asarray(hercm['val'], dtype='float64')
            col = numpy.asarray(hercm['col'], dtype='int32')
            row = numpy.asarray(hercm['row'], dtype='int32')

            val = numpy.rec.array(val, dtype=[(('val'), numpy.float64)])
            col = numpy.rec.array(col, dtype=[(('col'), numpy.int32)])
            row = numpy.rec.array(row, dtype=[(('row'), numpy.int32)])

            HERCMATRIX.elements = append_fields(row,
                    'col',
                    col,
                    usemask=False,
                    dtypes=[numpy.int32])

            HERCMATRIX.elements = append_fields(HERCMATRIX.elements,
                    'val', val, usemask=False, dtypes=[numpy.float64])

            HERCMATRIX.nzentries = len(HERCMATRIX.elements['val'])

            if HERCMATRIX.checkSymmetry():
                HERCMATRIX.symmetry = 'SYM'

            HERCMATRIX.verification = libBXF.generateVerificationSum(
                HERCMATRIX)

        except IOError as e:  # make sure the file exists and is readable
            logging.warning("(lsc-536)could not open matrix file")
            raise IOError("could not open matrix file for writing...",
                          str(e))

    elif form == 'valcol':
        HERCMATRIX = libValcolIO.read(filename)

    else:
        logging.warning("(lsc-545) format {0} is not valid".format(form))

    if showProgress:
        print("converting matrix to row-major...")
        
    logging.info("converting matrix to row-major")
    HERCMATRIX.makeRowMajor()

    if showProgress:
        print("matrix is now row major")

    if HERCMATRIX.symmetry == 'SYM':
        logging.info("matrix is symmetric, truncating upper triangle")
        if showProgress:
            print("matrix is symmetric, truncating upper triangle...")
        HERCMATRIX.makeSymmetrical('truncate')
        if showProgress:
            print("upper triangle truncated")

    return HERCMATRIX
Example #21
0
import numpy as np

import scipy.linalg as spla

help(spla.det)


import scipy.io as spio
help(spio.mminfo)


%cd "C:\Users\Administrador\Desktop\Clase2"

spio.mminfo("mahindas.mtx")

mahindas = spio.mmread("mahindas.mtx")
type(mahindas)
mahindas.shape

mahindas_Densa=mahindas.todense()
mahindas_Densa[1:10,1:10]
spla.inv(mahindas_Densa)
spla.det(mahindas_Densa)


def gen_ex(d0):
    x = np.random.randn(d0,d0)
    return x.T + x

mat1 =gen_ex(10**3)
Example #22
0

#print(sys.argv)
print("Reading graph!")
filename = sys.argv[1]
G = mmread(filename)
graph = nx.Graph(G)
print("Done!")

#random.seed(1)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

if sys.argv[2] == "1":
    print("Running native...", mminfo(filename)[0])
    X = readEmbeddings(sys.argv[3], mminfo(filename)[0], int(sys.argv[4]))
elif sys.argv[2] == "3":
    print("Running native...", mminfo(filename)[0])
    X = readEmbeddingsHOPE(sys.argv[3], mminfo(filename)[0], int(sys.argv[4]))
elif sys.argv[2] == "4":
    X = readEmbeddingsROLX(sys.argv[3], mminfo(filename)[0], int(sys.argv[4]))
elif sys.argv[2] == "5":
    X = readEmbeddingsHARP(sys.argv[3], mminfo(filename)[0], int(sys.argv[4]))
else:
    print("Running binnative...", mminfo(filename)[0])
    X = readBinEmbeddings(sys.argv[3], int(sys.argv[4]))

truelabfile = sys.argv[5]

print("Making prediction data!!")
Example #23
0
        # 収束判定
        if np.linalg.norm(new_x_diff) <= (rtol * np.linalg.norm(old_x) + atol):
            break

        old_x += new_x_diff

    return times, old_x


# Matrix Market形式ファイルの読み込み
# mtx_filename = 'bcsstm22'  # n = 138
mtx_filename = 'memplus'   # n = 17758

mtx_filename_full = mtx_filename + '.mtx'
spmat_info = scio.mminfo(mtx_filename_full)
print('spmat_info = ', spmat_info)

row_dim = spmat_info[0]
col_dim = spmat_info[1]

# COO形式
spmat_a_coo = scio.mmread(mtx_filename_full)
print('spmat_a = ', spmat_a_coo)

# COO -> CSR
spmat_a = spmat_a_coo.tocsr()

# x = [1, 2, ..., n]
true_x = np.array(np.arange(1, col_dim + 1))
print('true_x = ', true_x)
Example #24
0
File: info.py Project: Jie211/tools
# -*- coding: utf-8 -*-
import scipy.io as sio
import numpy as np
import sys

argvs = sys.argv
arglen = len(argvs)
if arglen != 2:
	print 'Usage: python %s [MatrixMarketfile]' % argvs[0]
	quit()
target = argvs[1]
(sizex, sizey, nnz, format, field, symmerty) = sio.mminfo(target)
mat = sio.mmread(target)

ftype = mat.getformat()
min_all = mat.min()
max_all = mat.max()
nnz = mat.getnnz()
nnz_row = mat.getnnz(1)
min_nnz = min(nnz_row)
max_nnz = max(nnz_row)
ave_nnz = np.average(nnz_row)
zero = float(sizex*sizex-nnz)/float(sizex*sizex)*100.0

print "対称性 = %s" % (symmerty)
print "格納形式 = %s" % (ftype)
if symmerty == 'symmetric':
	print "次元数 = %d" % (sizex*sizex)
else :
	print "X-サイズ = %d" % (sizex)
	print "Y-サイズ = %d" % (sizey)
Example #25
0
        supdiag = -1.0-2.j
        rhs_mid=0.0+ 0.j
        rhs_1=-1.0+3.j
    else:
        supdiag = -1.+2.j
        rhs_mid=-4.0+ 4.j
        rhs_1=3.0-1.j
    rhs_n=-1. + 3.j
else:
    parser.print_help()
    raise


#definition of the sparse matrix
if args.matrix:
    (n, m, nnz, form, arith, issym) = mminfo(args.matrix)
    n = int(n)
    A = mmread(args.matrix)
    rhs = [1] * n
    rhs = A*rhs
    dtype=np.dtype(A)

    if issym == u"symmetric":
        args.sym = True
    elif issym == u"hermitian":
        args.her = True
    else:
        args.sym = False
else:
    A = lil_matrix((n, n), dtype=dtype)
    A[0,0] =  diag
Example #26
0
    plt.axis('off')
    plt.savefig(algo1 + '_vis.pdf')


filename = sys.argv[1]
G = mmread(filename)
graph = nx.Graph(G)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn import metrics

if sys.argv[2] == "1":
    print("Running native...")
    X = readEmbeddings(sys.argv[3], mminfo(filename)[0], int(sys.argv[4]))
else:
    X = readEmbeddingsHARP(sys.argv[3], mminfo(filename)[0], int(sys.argv[4]))

labs, l, gy = readgroundtruth(sys.argv[5], mminfo(filename)[0])
algoname = sys.argv[6]
print("Running TSNE")

if int(sys.argv[4]) == 2:
    print("Direct 2D Visualization")
    X_f = X
else:
    print("Visualization by TSNE")
    X_embedded = TSNE(n_components=2).fit_transform(X)
    X_f = X_embedded
Example #27
0
 def check(self, a, info):
     mmwrite(self.fn, a)
     assert_equal(mminfo(self.fn), info)
     b = mmread(self.fn)
     assert_array_almost_equal(a, b)
Example #28
0
 def check_exact(self, a, info):
     mmwrite(self.fn, a)
     assert_equal(mminfo(self.fn), info)
     b = mmread(self.fn)
     assert_equal(a, b)
Example #29
0
import numpy as np
import scipy.io as sio
import math
import subprocess
np.set_printoptions(threshold='nan')
from PIL import Image

matrices = open("matrices_square.txt", 'r')
for line in matrices:
    name = line.rstrip().split(' ')
    #directory = "mcca.mtx"
    directory = "/home/matrices/mtx/" + name[0] + "/" + name[1] + "/" + name[
        1] + ".mtx"
    mtx = sio.mmread(directory)
    mtx_row = sio.mminfo(directory)[0]
    mtx_col = sio.mminfo(directory)[1]
    print name[1]

    # if mtx_row <= 1000:
    #     coef = 1
    # elif 1000 < mtx_row <= 5000:
    #     coef = 5
    # elif 5000 < mtx_row <= 10000:
    #     coef = 10
    # elif 10000 < mtx_row <= 50000:
    #     coef = 50
    # elif 50000 < mtx_row <= 100000:
    #     coef = 100
    # elif 100000 < mtx_row <= 500000:
    #     coef = 500
    # elif 500000 < mtx_row <= 1000000:
Example #30
0
def Dataset_Load(file_name):
    dataset_info = spio.mminfo(file_name)
    print('Load dataset_info:', dataset_info)
    dataset_coomatrix = spio.mmread(file_name)
    return dataset_coomatrix
Example #31
0
def main(argv):
    #checando os argumentos
    nArgs = len(argv)
    if nArgs < 3:
        sys.stderr.write("Usage: %s\nOpecoes:\n" % argv[0])
        print("FileIn", "FileOut")
        return 1
#......................................................................

    fileIn = argv[1]
    fileOut = argv[2]
    # ... leitura do arquivo
    aCoo = io.mmread(fileIn)
    nl = int(io.mminfo(fileIn)[0])
    nc = int(io.mminfo(fileIn)[1])

    #    aDense = aCoo.todense()
    #   aDense = aCoo.todense()
    #   evalsAll, evecsAll = eigh(aDense)
    #   print evalsAll
    #   return
    # convertendo COO para CSR
    timeConv = tm.time()
    aCsr = aCoo.tocsr()
    timeConv = tm.time() - timeConv
    #......................................................................

    maxit = 3500
    tol = 1.e-15
    lancozVector = 20
    #Maior autovalor
    eighLarge = tm.time()
    evalsLarge, evecsLarge = eigsh(aCsr,
                                   1,
                                   ncv=lancozVector,
                                   which='LA',
                                   tol=tol,
                                   maxiter=maxit)
    eighLarge = tm.time() - eighLarge
    print(evalsLarge)
    #    erro = abs(evalsLarge[0] - evalsAll[nl-1])/evalsAll[nl-1]
    #    print evalsLarge[0],evalsAll[nl-1],erro
    #......................................................................

    #Menor autovalor
    eighSmall = tm.time()
    evalsSmall, evecsSmall = eigsh(aCsr,
                                   1,
                                   ncv=lancozVector,
                                   which='LA',
                                   sigma=0,
                                   tol=tol,
                                   maxiter=maxit)
    eighSmall = tm.time() - eighSmall
    print(evalsSmall)
    #    erro = abs(evalsSmall[0] - evalsAll[0])/evalsAll[0]
    #     print evalsSmall[0],evalsAll[0],erro
    #......................................................................
    print("NUmero de equacoes %d" % nl)
    print("Maior autovalor %0.6e" % evalsLarge[0])
    print("Menor autovalor %0.6e" % evalsSmall[0])
    print("Condicio number %0.6e" % (evalsLarge[0] / evalsSmall[0]))
    print("Time(Maior): ", eighLarge)
    print("Time(Menor): ", eighSmall)
    # .....................................................................
    f = open(fileOut, "w")
    f.write("Numero de equacoes %d\n" % nl)
    f.write("Maior autovalor %.6e\n" % evalsLarge[0])
    f.write("Menor autovalor %.6e\n" % evalsSmall[0])
    f.write("Condicio number %.6e\n" % (evalsLarge[0] / evalsSmall[0]))
    f.close()
Example #32
0
def create_subsampled_matrix(large_matrix_location,
                             gene_file=None,
                             MAX_CELLS_COUNT=MAX_CELLS_COUNT):
    mtx_info = mminfo(large_matrix_location)
    num_rows = mtx_info[0]
    num_cols = mtx_info[1]
    num_cells = num_cols

    if gene_file is not None:
        gene_list = load_genes(
            gene_file,
            delimiter='\t' if gene_file.endswith('tsv') else None,
            skip_rows=1 if gene_file.endswith('tsv') else 0)

        if num_rows == len(gene_list):
            bioblocks_log('Genes are rows, Cells are cols')
            num_cells = num_cols
            sample_rows = False
        else:
            bioblocks_log('Cells are rows, Genes are cols')
            num_cells = num_rows
            sample_rows = True
    else:
        gene_list = ['']

    subsampled_indices = random.sample(range(1, num_cells + 1),
                                       MAX_CELLS_COUNT)
    subsampled_indices.sort()
    if (num_cells > MAX_CELLS_COUNT):
        subsampled_matrix_location = '{}_sub.mtx'.format(
            large_matrix_location[0:len(large_matrix_location) - 4])
        with open(subsampled_matrix_location, 'w') as subsampled_matrix:
            with open(large_matrix_location) as large_matrix:
                matrix_header = large_matrix.readline()
                num_entries = 0
                output_lines = []
                line = large_matrix.readline()
                while line:
                    line = large_matrix.readline()
                    matrix_index = -1
                    if sample_rows is True and line.split(' ')[0].isnumeric():
                        matrix_index = int(line.split(' ')[0])
                        num_entries += 1
                    elif len(line.split(' ')) >= 2 and line.split(
                            ' ')[1].isnumeric():
                        matrix_index = int(line.split(' ')[1])
                        num_entries += 1

                    bisect_index = bisect_left(subsampled_indices,
                                               matrix_index)

                    if bisect_index != len(
                            subsampled_indices
                    ) and subsampled_indices[bisect_index] == matrix_index:
                        output_lines.append(line)

                if sample_rows is True:
                    matrix_header = '{}{}'.format(
                        '{} {} {}'.format(MAX_CELLS_COUNT, len(gene_list),
                                          num_entries), matrix_header)
                else:
                    matrix_header = '{}{}'.format(
                        matrix_header,
                        '{} {} {}'.format(len(gene_list), MAX_CELLS_COUNT,
                                          num_entries))
                subsampled_matrix.write(matrix_header)
                for line in output_lines:
                    subsampled_matrix.write(line)
        os.remove(large_matrix_location)
        os.rename(subsampled_matrix_location, large_matrix_location)
def SSOR(filename):


    fileinfo = sio.mminfo(filename)
    A = ssp.csr_matrix(sio.mmread(filename))
    print(fileinfo.__str__())

    b = random.rand(A.shape[0])



    starttime = datetime.datetime.now()

    x_direct, info_direct = linalg.cg(A, b)

    endtime_d = datetime.datetime.now()

    print("the directing time cost is %f" % ((endtime_d - starttime).microseconds/1000))
    print("the iterations is %d" % info_direct)


    D = ssp.diags(A.diagonal(), 0)


    # print("===================D")
    # print(D)


    w = 0.5

    D_like = (1/w)*D

    L = ssp.tril(A)


    D_inv = D_like
    for i in range(0, len(D_inv.data)):
        D_inv.data[i] = 1/D_inv.data[i]
    # print("===================D_inv")
    # print(D_inv)

    tmp = D_inv*L


    D_inv2 = D_inv
    for i in range(0, len(D_inv.data)):
        D_inv2.data[i] **= 0.5


    # one
    K = math.sqrt(2-w)*(D_inv2 - D_inv2*L*D_inv)

    # two
    # K = math.sqrt(2-w)*(D_inv2 - D_inv2*L*D_inv + D_inv2*L*D_inv*L*D_inv)


    K_T = K.T
    M = K_T*K

    a = 0.5

    print("when a=%f," % a)
    for row in range(0, len(M.indptr)-1):
            maxA = abs(A.data[A.indptr[row]])

            for indexA in range(A.indptr[row], A.indptr[row+1]):
                if abs(A.data[indexA]) > maxA:
                    maxA = abs(A.data[indexA])

            for indexM in range(M.indptr[row], M.indptr[row+1]):
                if abs(M.data[indexM]) <= (1-a)*maxA:
                    M.data[indexM] = 0

    A_like = A*M

    x, info = linalg.cg(A_like, b)


    starttime_p = datetime.datetime.now()

    x, info = linalg.cg(A_like, b)


    endtime = datetime.datetime.now()

    print("the SSOR time cost is %f" % (endtime - starttime_p).seconds)
    print("the iterations is %d" % info)
Example #34
0
def check_issparse(f):
    finfo = mminfo(f)
    if (finfo[3] == 'coordinate'):
        return True
    else:
        return False
Example #35
0
    print(len(Xd), len(Yd), count)
    return Xd, Yd

#print(sys.argv)
filename = sys.argv[1]
G = mmread(filename)
graph = nx.Graph(G)

#random.seed(1)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

if sys.argv[2] == "1":
    print("Running native...")
    X = readEmbeddings(sys.argv[3],  mminfo(filename)[0], int(sys.argv[4]))
else:
    X = readBinEmbeddings(sys.argv[3], int(sys.argv[4]))

truelabfile = sys.argv[5]

Xt, Yt = makeLinkPredictionData(graph, X, "hadamard")
ltrainfrac = [0.50]
for tf in ltrainfrac:
    CV = int(len(Yt) * tf)
    trainX = Xt[0:CV]
    testX = Xt[CV:]
    trainY = Yt[0:CV]
    testY = Yt[CV:]
    modelLR = LogisticRegression().fit(trainX, trainY)
    predictedY = modelLR.predict(testX)
Example #36
0
def run_spring_analysis(dataset_dir, dataset_id, dataset, MAX_CELLS_COUNT, tmp_dir):
    mtx_file = '{}/matrix/matrix.mtx'.format(dataset_dir)
    gene_file = '{}/matrix/genes.tsv'.format(dataset_dir)
    bioblocks_log('mtx_file = {}'.format(mtx_file))
    mtx_info = mminfo(mtx_file)
    num_rows = mtx_info[0]
    num_cols = mtx_info[1]
    bioblocks_log(mtx_info)

    gene_list = load_genes(gene_file,
                           delimiter='\t' if gene_file.endswith('tsv') else None,
                           skip_rows=1 if gene_file.endswith('tsv') else 0)

    if num_rows == len(gene_list):
        bioblocks_log('Genes are rows, Cells are cols')
        num_cells = num_cols
        sample_rows = False
    else:
        bioblocks_log('Cells are rows, Genes are cols')
        num_cells = num_rows
        sample_rows = True

    if num_cells > MAX_CELLS_COUNT:
        bioblocks_log('mtx_file: {}'.format(mtx_file))
        create_subsampled_matrix(mtx_file, gene_file, MAX_CELLS_COUNT)
        num_cells = MAX_CELLS_COUNT

    subsample_ranges = get_cell_subsample_ranges(num_cells)

    bioblocks_log('Attempting to run SPRING with subsample ranges {}'.format(subsample_ranges))

    for subsample_range in subsample_ranges:
        analysis_id = str(uuid.uuid4())
        start_time = datetime.utcnow()
        bioblocks_log('Starting SPRING analysis \'{}\' for dataset \'{}\''.format(
            analysis_id, dataset_id))

        main_dir = '{}/analyses/{}'.format(dataset_dir, analysis_id)

        spring_load_preprocess.run_spring_preprocessing(
            mtx_file=mtx_file,
            gene_file=gene_file,
            cell_labels_file='{}/matrix/cells.tsv'.format(
                dataset_dir),
            main_dir=main_dir,
            subplot_name=dataset['name'],
            sample_rows=sample_rows,
            subsample_range=subsample_range,
            num_cells=num_cells
        )

        try:
            dataset['_etag'] = patch_matrix_info_for_dataset(dataset, mtx_info, mtx_file)
            analysis = {
                '_id': analysis_id,
                'process_type': 'SPRING',
                'name': '{} - {}'.format(dataset['name'], get_numeric_shorthand_suffix(subsample_range))
            }
            post_bioblocks_analysis(analysis)
            dataset['_etag'] = patch_analysis_for_dataset(dataset, analysis_id)
        except Exception as e:
            bioblocks_log('Error with compression of matrix file: {}'.format(e))
            return

    try:
        # bioblocks_log('Compressing file: {}'.format(mtx_file))
        # with open(mtx_file, 'rb') as f_in:
        #     with gzip.open('{}.gz'.format(mtx_file), 'wb') as f_out:
        #         shutil.copyfileobj(f_in, f_out)
        # bioblocks_log('Finished compressing file: {}'.format(mtx_file))

        delete_directory('{}/{}'.format(dataset_dir, tmp_dir))
        os.remove(mtx_file)

    except Exception as e:
        bioblocks_log('Error with cleanup of matrix file: {}'.format(e))

    end_time = datetime.utcnow()
    bioblocks_log('Finished SPRING analysis \'{}\' for dataset \'{}\'. Duration: {}'.format(
        analysis_id, dataset_id, end_time - start_time))
Example #37
0
from collections import OrderedDict
import pandas as pd
import networkx as nx
from bokeh.plotting import *
from math import floor
from stacked_graph import *

try:
    import matplotlib.pyplot as plt
except:
    raise

############ Read in matrix, find basic stats ###########
output_file("read.html")
mat_name = 'ca-AstroPh.mtx'
mat_info = sio.mminfo(mat_name)
mat_n = mat_info[0]
mat_nnz = mat_info[2]
print("matrix: {}".format(mat_name))
print("size: {} x {}".format(mat_n, mat_n))
print("nonzeros: {} (density: {})".format(mat_nnz, round(float(mat_nnz) / (mat_n * mat_n),6)))
print("nnz type: {} {}".format(mat_info[5], mat_info[4]))
assert(mat_n == mat_info[1])
print("reading matrix ..."),
A = sio.mmread(mat_name)
print("done\nconverting matrix to csr ..."),
B = A
A = A.tocsr()
print("done")
G = nx.to_networkx_graph(A)
numsamples = mat_n  #number of samples to take