Beispiel #1
0
def dump_sample(input_dir):
    bm = BigCountMatrix.open(input_dir)
    for i in xrange(bm.shape[0]):
        a = bm.counts[i, 0]
        if np.isnan(a):
            a = 'NA'
        print bm.rownames[i], str(a)
Beispiel #2
0
def dump_sample(input_dir):
    bm = BigCountMatrix.open(input_dir)
    for i in xrange(bm.shape[0]):
        a = bm.counts[i,0]
        if np.isnan(a):
            a = 'NA'
        print bm.rownames[i], str(a)
def main(argv=None):
    '''Command line options.'''
    # create instance of run configuration

    # Setup argument parser
    parser = argparse.ArgumentParser()
    # Add command line parameters
    parser.add_argument("matrix_dir", help="directory containing matrix file")
    parser.add_argument("-nr",
                        "--rows",
                        dest='num_rows',
                        default=1,
                        help='number of rows in matrix to grab')
    parser.add_argument("-nc",
                        "--cols",
                        dest='num_cols',
                        default=1,
                        help='number of rows in matrix to grab')
    parser.add_argument("-in",
                        "--input_number",
                        dest="in_num",
                        action="store_true",
                        default=False,
                        help="import defined number of rows/cols")
    # Process arguments
    args = parser.parse_args()
    # setup logging

    level = logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")

    bcm = BigCountMatrix.open(args.matrix_dir)
    rows = bcm.rownames
    cols = bcm.colnames
    matrix = bcm.counts

    if args.in_num == True:
        num_rows = int(args.num_rows)
        num_cols = int(args.num_cols)

        for row in xrange(num_rows):
            for col in xrange(num_cols):
                d = {
                    't_id': row,
                    's_id': col,
                    'value': float(matrix[row, col])
                }
                print json.dumps(d)
    else:
        for row in xrange(len(rows)):
            for col in xrange(len(cols)):
                d = {
                    't_id': row,
                    's_id': col,
                    'value': float(matrix[row, col])
                }
                print json.dumps(d)
Beispiel #4
0
def search_for_all_nan(input_dir):
    logging.debug("Opening matrix memmap files")
    bm = BigCountMatrix.open(input_dir)
    for i in xrange(bm.shape[0]):
        a = bm.counts[i, :]
        a = np.array(a, dtype=np.float)
        b = a[np.isfinite(a)]
        if len(b) < len(a):
            print i, len(b), len(a)
Beispiel #5
0
def search_for_all_nan(input_dir):
    logging.debug("Opening matrix memmap files")
    bm = BigCountMatrix.open(input_dir)
    for i in xrange(bm.shape[0]):
        a = bm.counts[i,:]
        a = np.array(a, dtype=np.float)
        b = a[np.isfinite(a)]
        if len(b) < len(a):
            print i, len(b), len(a)
Beispiel #6
0
def main(argv=None):
    '''Command line options.'''    
    # create instance of run configuration
    
    # Setup argument parser
    parser = argparse.ArgumentParser()
    # Add command line parameters
    parser.add_argument("matrix_dir", 
                        help="directory containing matrix file")
    parser.add_argument("-nr", "--rows", dest = 'num_rows',
                        default = 1,
                        help = 'number of rows in matrix to grab')
    parser.add_argument("-nc", "--cols", dest = 'num_cols',
                        default = 1,
                        help = 'number of rows in matrix to grab')
    parser.add_argument("-in", "--input_number", dest="in_num", 
                        action="store_true", default=False, 
                        help="import defined number of rows/cols")
    # Process arguments
    args = parser.parse_args()
    # setup logging
    
    level = logging.INFO
    logging.basicConfig(level=level,
                        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    
    
    bcm = BigCountMatrix.open(args.matrix_dir)
    rows = bcm.rownames
    cols = bcm.colnames
    matrix = bcm.counts
    
    if args.in_num == True:
        num_rows = int(args.num_rows)
        num_cols = int(args.num_cols)
               
        for row in xrange(num_rows):
            for col in xrange(num_cols):
                d = {'t_id': row,
                     's_id': col,
                     'value': float(matrix[row,col])}
                print json.dumps(d)
    else: 
        for row in xrange(len(rows)):
            for col in xrange(len(cols)):
                d = {'t_id': row,
                     's_id': col,
                     'value': float(matrix[row,col])}
                print json.dumps(d)
Beispiel #7
0
def normalize_count_data(input_dir):
    # setup matrix
    logging.debug("Opening matrix memmap files")
    bm = BigCountMatrix.open(input_dir)
    r = RandomState()

    for i in xrange(bm.shape[0]):
        a = bm.counts[i, :]
        a = np.array(a, dtype=np.float)
        b = a.copy()
        normalize_counts(b,
                         bm.size_factors,
                         r,
                         resample=True,
                         add_noise=True,
                         noise_loc=1.0,
                         noise_scale=1.0)
        logging.debug(str(i))

        y1 = np.log2((a / bm.size_factors) + 1)
        y2 = np.log2(b + 1)
        order = y1.argsort()[::-1]
        y1 = y1[order]
        y2 = y2[order]
        x = np.arange(len(y1))
        f = plt.figure()
        plt.plot(x, y2, 'ro', ms=1, mew=0, label='Resampled counts')
        plt.plot(x, y1, 'b-', lw=3, label='Original counts')
        plt.legend(markerscale=5, numpoints=3)
        plt.xlabel('Samples sorted by counts')
        plt.ylabel('log2(counts + 1)')
        plt.savefig('/home/mkiyer/Documents/t%d.png' % (i))
        plt.show()
        print 'y1', y1.min(), y1.max(), y1.mean(), np.median(y1)
        print 'y2', y2.min(), y2.max(), y2.mean(), np.median(y2)

        if i == 100:
            break
Beispiel #8
0
def normalize_count_data(input_dir):
    # setup matrix
    logging.debug("Opening matrix memmap files")
    bm = BigCountMatrix.open(input_dir)
    r = RandomState()

    for i in xrange(bm.shape[0]):
        a = bm.counts[i,:]
        a = np.array(a, dtype=np.float)
        b = a.copy()
        normalize_counts(b, bm.size_factors, r,
                         resample=True,
                         add_noise=True,
                         noise_loc=1.0,
                         noise_scale=1.0)        
        logging.debug(str(i))

        y1 = np.log2((a / bm.size_factors) + 1)
        y2 = np.log2(b + 1)
        order = y1.argsort()[::-1]
        y1 = y1[order]
        y2 = y2[order]
        x = np.arange(len(y1))
        f = plt.figure()
        plt.plot(x, y2, 'ro', ms=1, mew=0, label='Resampled counts')
        plt.plot(x, y1, 'b-', lw=3, label='Original counts')
        plt.legend(markerscale=5, numpoints=3)
        plt.xlabel('Samples sorted by counts')
        plt.ylabel('log2(counts + 1)')
        plt.savefig('/home/mkiyer/Documents/t%d.png' % (i))
        plt.show()
        print 'y1', y1.min(), y1.max(), y1.mean(), np.median(y1)
        print 'y2', y2.min(), y2.max(), y2.mean(), np.median(y2)
        
        if i == 100:
            break
Beispiel #9
0
def rpkm_versus_count(input_dir):
    bm = BigCountMatrix.open(input_dir)
    for j in xrange(bm.shape[1]):
        a = bm.counts_t[j, :]
        a = a[np.isfinite(a)]
        print bm.colnames[j], a.sum()
Beispiel #10
0
def rpkm_versus_count(input_dir):
    bm = BigCountMatrix.open(input_dir)
    for j in xrange(bm.shape[1]):
        a = bm.counts_t[j,:]
        a = a[np.isfinite(a)]
        print bm.colnames[j], a.sum()