def process_lmat(infile, mat, filename=None): """Processes an lmat so that it returns as an imat""" if filename: util.tic("reading '%s'" % filename) rows, cols, vals = matrixlib.transpose(list(matrixlib.iter_lmat(infile))) # determine labels rowlabels = util.unique(rows) collabels = util.unique(cols) nrows = len(rowlabels) ncols = len(collabels) nnz = len(vals) # determine order if mat.order is not None: order = util.read_strings(mat.order) rowlookup = util.list2lookup(order) collookup = util.list2lookup(order) rowlabels.sort(key=lambda x: rowlookup[x]) collabels.sort(key=lambda x: collookup[x]) else: rowlookup = util.list2lookup(rowlabels) collookup = util.list2lookup(collabels) mat.rowlabels = rowlabels mat.collabels = collabels # iterate with an imat, then post process def func(): ilmat = itertools.izip(rows, cols, vals) imat = matrixlib.ilmat2imat(ilmat, rowlabels, collabels) for entry in imat: yield entry # also store entries by label for i, j, v in itertools.izip(mat.rows, mat.cols, mat.vals): mat[rowlabels[i]][collabels[j]] = v if filename: util.toc() return nrows, ncols, nnz, func()
def load_matrix(nrows, ncols, nnz, imat, mat, loadvals=False, minval=-util.INF, sample=False, rowsample=False, colsample=False, filename=None): """Load matrix from an index matrix iterator""" mat.setup(nrows, ncols, nnz, rowsample=rowsample, colsample=colsample) rows, cols, vals = (mat.rows, mat.cols, mat.vals) # clear matrix rows[:] = [] cols[:] = [] vals[:] = [] if filename: util.tic("reading '%s'" % filename) util.log("%s: %d nrows, %d ncols, %d non-zeros" % (filename, nrows, ncols, nnz)) try: for i, j, v in imat: # filtering: 1. random sample sample # 2. row/col filtering # 3. value cutoff if (sample and random.random() > sample) or \ i not in mat.rshow or j not in mat.cshow or \ v < minval: continue rows.append(i) cols.append(j) vals.append(v) if loadvals: mat[i][j] = v except Exception, e: if filename: util.toc() raise e
cols.append(j) vals.append(v) if loadvals: mat[i][j] = v except Exception, e: if filename: util.toc() raise e mat.nnz = len(vals) mat.maxval = max(vals) mat.minval = min(vals) if filename: util.toc() def load_dmat(dmat, mat, loadvals=False, minval=-util.INF, sample=False, rowsample=False, colsample=False): """Load dense matrix""" assert util.equal(* map(len, mat)), "matrix has unequal row sizes" nrows, ncols = len(dmat), len(dmat[0]) nnz = nrows * ncols imat = matrixlib.dmat2imat(dmat) load_matrix(nrows, ncols, nnz, imat, mat, loadvals=loadvals, minval=minval,