Ejemplo n.º 1
0
Archivo: csf.py Proyecto: kepbod/stuff
def build_index(fa, chrom, site, strand, rlen, thread, out_dir, seq, seq_flag):
    print('Build index...')
    if strand == '+':
        start = site - (rlen - 10)
        end = site + (rlen - 20)
        offset = rlen - 10
    else:
        start = site - (rlen - 20)
        end = site + (rlen - 10)
        offset = rlen - 20
    index_path = os.path.join(out_dir, 'sgRNA.fa')
    if seq_flag:
        os.symlink(seq, index_path)
    else:
        # fetch sgRNA region sequence
        with open(index_path, 'w') as out:
            out.write('>sgRNA_region\n')
            out.write(
                dna_to_rna(fa.fetch(chrom, start, end), strand=strand) + '\n')
    # build index
    if which('bowtie2-build'):
        command = 'bowtie2-build -q --threads %s %s %s'
        command = command % (thread, index_path, index_path)
        run_command(command, 'Error: cannot build index for sgRNA!')
    else:
        sys.exit('Error: no bowtie2-build installed!')
    return index_path, offset
Ejemplo n.º 2
0
def main():
    # parse options
    options = docopt(__doc__, version=__version__)
    # check output_dir
    if options['-o'] == './':
        dir = os.getcwd()
    else:
        dir = check_dir(options['-o'])
    # fetch junction bed file
    junc_f = fetch_juncfile(options['<bam>'],
                            url=options['--url'],
                            dir=dir,
                            uniq=options['--uniq'],
                            stranded=options['--stranded'],
                            min=int(options['--min-reads']))
    # create junction bigbed file in case
    if options['--bb'] and which('bedToBigBed') is not None:
        prefix = os.path.splitext(os.path.split(options['<bam>'])[-1])[0]
        bamf = pysam.AlignmentFile(options['<bam>'], 'rb')
        with tempfile.NamedTemporaryFile() as chrom_size:
            for seq in bamf.header['SQ']:
                chrom_size.write('%s\t%s\n' % (seq['SN'], seq['LN']))
            chrom_size.seek(0)
            bb_path = os.path.join(dir, prefix + '_junc.bb')
            return_code = os.system('bedToBigBed -type=bed12 %s %s %s' %
                                    (junc_f, chrom_size.name, bb_path)) >> 8
            if return_code:
                sys.exit('Error: cannot convert bed to BigBed!')
        bamf.close()
Ejemplo n.º 3
0
def fseq(options):
    '''
    Call peaks using F-seq
    '''
    # parse options
    if not which('fseq'):
        sys.exit('Error: No F-seq installed!')
    folder = check_dir(options['<rampagedir>'])
    flength = options['-l']
    wig_flag = options['--wig']
    percent = float(options['-p'])
    with open(os.path.join(folder, 'total_counts.txt'), 'r') as f:
        total = int(f.read().rstrip())
    # run F-seq
    flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'}
    all_peak_f = os.path.join(folder, 'rampage_peaks.txt')
    with open(all_peak_f, 'w') as out:
        for strand in flist:
            peak_f = run_fseq(folder, flist[strand], strand, flength, wig_flag,
                              percent)
            with open(peak_f, 'r') as f:
                for line in f:
                    if total:  # calculate RPM
                        reads = int(line.rstrip().split()[9])
                        rpm = reads * 1000000.0 / total
                        out.write(line.rstrip() + '\t%f\n' % rpm)
                    else:
                        out.write(line)
Ejemplo n.º 4
0
def assemble(options):
    '''
    Assemble RNA-seq with StringTie
    '''
    # parse options
    if not which('stringtie'):
        sys.exit('Error: No StringTie installed!')
    gtf = options['--gtf']
    thread = options['--thread']
    bamf = options['<bam>']
    dir_path = options['--dir']
    if len(bamf) == 1:  # no replicates
        # run StringTie
        out_gtf = run_stringtie(bamf[0], gtf, dir_path, thread)
        # convert GTF to GenePred
        convert_gtf(out_gtf)
    else:  # have replicates
        # run StringTie
        gtf_list = []
        for f in bamf:
            gtf_list.append(run_stringtie(f, gtf, dir_path, thread))
        merged_prefix = options['--prefix']
        out_gtf = merge_stringtie(gtf_list, gtf, dir_path, merged_prefix,
                                  thread)
        # convert GTF to GenePred
        convert_gtf(out_gtf, merge=True)
Ejemplo n.º 5
0
Archivo: csf.py Proyecto: kepbod/stuff
def bowtie2_align(index, read, thread, out_dir):
    print('Align reads...')
    if which('bowtie2'):
        bam = os.path.join(out_dir, 'cs.bam')
        sam = tempfile.NamedTemporaryFile('w+')
        command = 'bowtie2 --quiet --end-to-end -p %s -x %s -U %s -S %s'
        command = command % (thread, index, read, sam.name)
        run_command(command, 'Error in bowtie2 alignment!')
        sam.seek(0)
        with pysam.AlignmentFile(sam.name, 'r') as sam_f:
            with pysam.AlignmentFile(bam, 'wb', template=sam_f) as bam_f:
                for read in sam_f:
                    if not read.is_unmapped:
                        bam_f.write(read)
        sam.close()
        return bam
    else:
        sys.exit('Error: no bowtie2 installed!')
Ejemplo n.º 6
0
def fseq(options):
    '''
    Call peaks using F-seq
    '''
    # parse options
    if not which('fseq'):
        sys.exit('Error: No F-seq installed!')
    folder = check_dir(options['<rampagedir>'])
    flength = options['-l']
    percent = [0.95, 0.9, 0.85, 0.8, 0.75, 0.7]
    with open(os.path.join(folder, 'total_counts.txt'), 'r') as f:
        total = int(f.read().rstrip())
    # run F-seq
    flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'}
    all_peak_f = options['-o']
    with open(all_peak_f, 'w') as out:
        for strand in flist:
            peak_f = run_fseq(folder, flist[strand], strand, flength, percent)
            with open(peak_f, 'r') as f:
                for line in f:
                    if total:
                        out.write(line.rstrip() + '\t%d\n' % total)
                    else:
                        out.write(line)
Ejemplo n.º 7
0
def check_pattern(options):
    # parse options
    strand_flag = options['--stranded']
    remote_flag = options['--remote']
    thread = int(options['--thread'])
    # check bigwig
    if options['--bam']:
        if which('bedGraphToBigWig') is not None:
            if strand_flag:
                plus_bg, minus_bg = bam_to_bedgraph(options['--bam'],
                                                    url=remote_flag,
                                                    stranded=True)
                plus_bw = bg_to_bw(plus_bg, options['--chrom-size'])
                minus_bw = bg_to_bw(minus_bg, options['--chrom-size'])
            else:
                bg = bam_to_bedgraph(options['--bam'], url=remote_flag)
                bw = bg_to_bw(bg, options['--chrom-size'])
        else:
            sys.exit('Could not find bedGraphToBigWig!')
    else:
        if os.path.isfile(options['--bigwig']):
            bw = options['--bigwig']
    # parse junction file
    if os.path.isfile(options['--junc']):
        junc_path = options['--junc']
    else:
        sys.exit('Wrong junc file: %s' % options['--junc'])
    # check pattern
    junc_info = defaultdict(list)
    p = Pool(thread)
    result = []
    with open(junc_path, 'r') as junc_f:
        for junc in junc_f:
            info = junc.split()
            chrom = info[1]
            strand = info[4]
            rs_site = info[6].split('|')[0]
            rs_info = '\t'.join([chrom, rs_site, strand])
            if rs_info not in junc_info:
                if strand_flag:
                    if strand == '+':
                        result.append(
                            p.apply_async(cal_ratio,
                                          args=(
                                              plus_bw,
                                              rs_info,
                                              options,
                                          )))
                    else:
                        result.append(
                            p.apply_async(cal_ratio,
                                          args=(
                                              minus_bw,
                                              rs_info,
                                              options,
                                          )))
                else:
                    result.append(
                        p.apply_async(cal_ratio, args=(
                            bw,
                            rs_info,
                            options,
                        )))
            junc_info[rs_info].append(info)
        p.close()
        p.join()
    with open(options['<rs_pattern>'], 'w') as out:
        for r in result:
            pvalue, fold, rs_info = r.get()
            if pvalue is not None:
                for junc in junc_info[rs_info]:
                    out.write('\t'.join(junc))
                    out.write('\t%f\t%f\n' % (fold, pvalue))