Beispiel #1
0
def getregpot(bw, tss, output, alpha, exclude):
    exclude = exclude.split(',')
    left = int(exclude[0])
    right = int(exclude[1])
    if left == right:
        _bw.getrp(bw, tss, output, alpha, 0, 0)
    elif left < right:
        _bw.getrp(bw, tss, output, alpha, left, right)
    else:
        print >> sys.stderr, "--exclude left should be smaller than right"
        sys.exit(1)

    rp = {}
    with open(output) as inf:
        for line in inf:
            line = line.strip().split()
            rp['\t'.join(line[:4])] = line[4]
    fout = open(output, 'w')
    with open(tss) as inf:
        for line in inf:
            line = line.strip().split()
            s = line[3].split(':')
            print >> fout, '\t'.join([
                line[0], line[1], line[2], s[0], rp['\t'.join(line[:4])], s[1],
                line[-1]
            ])
# use _bw to get rp for each of the files
status = 0
shape2 = 0
files = []
for key in factors:
    fin = glob.glob(os.path.join(directory[int(key)], '*treat*bw'))
    if fin:
        print(fin[0])
        # check file existence
        #if not os.path.exists(os.path.basename(fin[0])+'.txt'):
        if not os.path.exists(os.path.basename(fin[0]) + '_1kb.txt'):
            # check status
            status = os.system('bigWigInfo %s' % fin[0])
            if status == 0:
                ##_bw.getrp(fin[0], 'histonerp/test/hg38.tss', os.path.basename(fin[0])+'.txt', 1e4, 0, 0)
                _bw.getrp(fin[0], 'histonerp/test/hg38.tss',
                          os.path.basename(fin[0]) + '_1kb.txt', 1e3, 0, 0)

                if os.path.getsize(os.path.basename(fin[0]) + '_1kb.txt') > 0:
                    shape2 += 1
                    files.append(os.path.basename(fin[0]) + '_1kb.txt')
        else:
            # get shape 2
            if os.path.getsize(os.path.basename(fin[0]) + '_1kb.txt') > 0:
                shape2 += 1
                files.append(os.path.basename(fin[0]) + '_1kb.txt')

f = h5py.File('margeRP_%s_1kb.h5' % factor, 'a')
if not ('RP' in f.keys()):
    RP = f.create_dataset("RP",
                          dtype=np.float32,
                          shape=(l, shape2),
Beispiel #3
0
import h5py, _bw
import numpy as np
import os
from pkg_resources import resource_filename
import gzip

meta = resource_filename("_bw", "%s.tss" % (snakemake.params.sp))
_bw.getrp(snakemake.input.bw, meta, snakemake.output.rp, 1e4, 0, 0)

# single sample RP to HDF5
tmp = []
n = 0
with open(snakemake.output.rp) as inf:
    for line in inf:
        n += 1
        line = line.split()[4]
        tmp.append(float(line.strip()))
    tmp = np.array(tmp, dtype='float32')
    with h5py.File(snakemake.output.h5, 'a') as store:
        RP = store.create_dataset("RP",
                                  dtype=np.float32,
                                  shape=(n, 1),
                                  compression='gzip',
                                  shuffle=True,
                                  fletcher32=True)
        RP[:, 0] = tmp
        store.flush()

os.system('bigWigAverageOverBed %s %s stdout > %s' %
          (snakemake.input.bw, snakemake.params.bin1kb, snakemake.output.ct))
Beispiel #4
0
            # check status
            status = os.system('bigWigInfo %s' % fin[0])
            if status == 0:
                # without promoter +/- 1000bp
                #if factors[key] == 'H3K4me3':
                #    _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e3, -1000, 1000)
                #else:
                #    _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e4, -1000, 1000)

                # with promoter +/- 1000bp
                #if factors[key] == 'H3K4me3':
                #    _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e3, 0, 0)
                #else:
                #    _bw.getrp(fin[0], 'histonerp/test/mm10.tss', os.path.basename(fin[0])+'_rdown1kb.txt', 1e4, 0, 0)

                _bw.getrp(fin[0], '../MARGEData/histonerp/test/mm10.tss',
                          os.path.basename(fin[0]) + '.txt', 1e4, 0, 0)

                if os.path.getsize(os.path.basename(fin[0]) + '.txt') > 0:
                    shape2 += 1
                    files.append(os.path.basename(fin[0]) + '.txt')
        else:
            # get shape 2
            if os.path.getsize(os.path.basename(fin[0]) + '.txt') > 0:
                shape2 += 1
                files.append(os.path.basename(fin[0]) + '.txt')

f = h5py.File('margeRP_%s_mm.h5' % factor, 'a')
if not ('RP' in f.keys()):
    RP = f.create_dataset("RP",
                          dtype=np.float32,
                          shape=(l, shape2),