def dump_sample(input_dir): bm = BigCountMatrix.open(input_dir) for i in xrange(bm.shape[0]): a = bm.counts[i, 0] if np.isnan(a): a = 'NA' print bm.rownames[i], str(a)
def main(argv=None): '''Command line options.''' # create instance of run configuration # Setup argument parser parser = argparse.ArgumentParser() # Add command line parameters parser.add_argument("matrix_dir", help="directory containing matrix file") parser.add_argument("-nr", "--rows", dest='num_rows', default=1, help='number of rows in matrix to grab') parser.add_argument("-nc", "--cols", dest='num_cols', default=1, help='number of rows in matrix to grab') parser.add_argument("-in", "--input_number", dest="in_num", action="store_true", default=False, help="import defined number of rows/cols") # Process arguments args = parser.parse_args() # setup logging level = logging.INFO logging.basicConfig( level=level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") bcm = BigCountMatrix.open(args.matrix_dir) rows = bcm.rownames cols = bcm.colnames matrix = bcm.counts if args.in_num == True: num_rows = int(args.num_rows) num_cols = int(args.num_cols) for row in xrange(num_rows): for col in xrange(num_cols): d = { 't_id': row, 's_id': col, 'value': float(matrix[row, col]) } print json.dumps(d) else: for row in xrange(len(rows)): for col in xrange(len(cols)): d = { 't_id': row, 's_id': col, 'value': float(matrix[row, col]) } print json.dumps(d)
def search_for_all_nan(input_dir): logging.debug("Opening matrix memmap files") bm = BigCountMatrix.open(input_dir) for i in xrange(bm.shape[0]): a = bm.counts[i, :] a = np.array(a, dtype=np.float) b = a[np.isfinite(a)] if len(b) < len(a): print i, len(b), len(a)
def normalize_count_data(input_dir): # setup matrix logging.debug("Opening matrix memmap files") bm = BigCountMatrix.open(input_dir) r = RandomState() for i in xrange(bm.shape[0]): a = bm.counts[i, :] a = np.array(a, dtype=np.float) b = a.copy() normalize_counts(b, bm.size_factors, r, resample=True, add_noise=True, noise_loc=1.0, noise_scale=1.0) logging.debug(str(i)) y1 = np.log2((a / bm.size_factors) + 1) y2 = np.log2(b + 1) order = y1.argsort()[::-1] y1 = y1[order] y2 = y2[order] x = np.arange(len(y1)) f = plt.figure() plt.plot(x, y2, 'ro', ms=1, mew=0, label='Resampled counts') plt.plot(x, y1, 'b-', lw=3, label='Original counts') plt.legend(markerscale=5, numpoints=3) plt.xlabel('Samples sorted by counts') plt.ylabel('log2(counts + 1)') plt.savefig('/home/mkiyer/Documents/t%d.png' % (i)) plt.show() print 'y1', y1.min(), y1.max(), y1.mean(), np.median(y1) print 'y2', y2.min(), y2.max(), y2.mean(), np.median(y2) if i == 100: break
def rpkm_versus_count(input_dir): bm = BigCountMatrix.open(input_dir) for j in xrange(bm.shape[1]): a = bm.counts_t[j, :] a = a[np.isfinite(a)] print bm.colnames[j], a.sum()