def build_index(fa, chrom, site, strand, rlen, thread, out_dir, seq, seq_flag): print('Build index...') if strand == '+': start = site - (rlen - 10) end = site + (rlen - 20) offset = rlen - 10 else: start = site - (rlen - 20) end = site + (rlen - 10) offset = rlen - 20 index_path = os.path.join(out_dir, 'sgRNA.fa') if seq_flag: os.symlink(seq, index_path) else: # fetch sgRNA region sequence with open(index_path, 'w') as out: out.write('>sgRNA_region\n') out.write( dna_to_rna(fa.fetch(chrom, start, end), strand=strand) + '\n') # build index if which('bowtie2-build'): command = 'bowtie2-build -q --threads %s %s %s' command = command % (thread, index_path, index_path) run_command(command, 'Error: cannot build index for sgRNA!') else: sys.exit('Error: no bowtie2-build installed!') return index_path, offset
def main(): # parse options options = docopt(__doc__, version=__version__) # check output_dir if options['-o'] == './': dir = os.getcwd() else: dir = check_dir(options['-o']) # fetch junction bed file junc_f = fetch_juncfile(options['<bam>'], url=options['--url'], dir=dir, uniq=options['--uniq'], stranded=options['--stranded'], min=int(options['--min-reads'])) # create junction bigbed file in case if options['--bb'] and which('bedToBigBed') is not None: prefix = os.path.splitext(os.path.split(options['<bam>'])[-1])[0] bamf = pysam.AlignmentFile(options['<bam>'], 'rb') with tempfile.NamedTemporaryFile() as chrom_size: for seq in bamf.header['SQ']: chrom_size.write('%s\t%s\n' % (seq['SN'], seq['LN'])) chrom_size.seek(0) bb_path = os.path.join(dir, prefix + '_junc.bb') return_code = os.system('bedToBigBed -type=bed12 %s %s %s' % (junc_f, chrom_size.name, bb_path)) >> 8 if return_code: sys.exit('Error: cannot convert bed to BigBed!') bamf.close()
def fseq(options): ''' Call peaks using F-seq ''' # parse options if not which('fseq'): sys.exit('Error: No F-seq installed!') folder = check_dir(options['<rampagedir>']) flength = options['-l'] wig_flag = options['--wig'] percent = float(options['-p']) with open(os.path.join(folder, 'total_counts.txt'), 'r') as f: total = int(f.read().rstrip()) # run F-seq flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'} all_peak_f = os.path.join(folder, 'rampage_peaks.txt') with open(all_peak_f, 'w') as out: for strand in flist: peak_f = run_fseq(folder, flist[strand], strand, flength, wig_flag, percent) with open(peak_f, 'r') as f: for line in f: if total: # calculate RPM reads = int(line.rstrip().split()[9]) rpm = reads * 1000000.0 / total out.write(line.rstrip() + '\t%f\n' % rpm) else: out.write(line)
def assemble(options): ''' Assemble RNA-seq with StringTie ''' # parse options if not which('stringtie'): sys.exit('Error: No StringTie installed!') gtf = options['--gtf'] thread = options['--thread'] bamf = options['<bam>'] dir_path = options['--dir'] if len(bamf) == 1: # no replicates # run StringTie out_gtf = run_stringtie(bamf[0], gtf, dir_path, thread) # convert GTF to GenePred convert_gtf(out_gtf) else: # have replicates # run StringTie gtf_list = [] for f in bamf: gtf_list.append(run_stringtie(f, gtf, dir_path, thread)) merged_prefix = options['--prefix'] out_gtf = merge_stringtie(gtf_list, gtf, dir_path, merged_prefix, thread) # convert GTF to GenePred convert_gtf(out_gtf, merge=True)
def bowtie2_align(index, read, thread, out_dir): print('Align reads...') if which('bowtie2'): bam = os.path.join(out_dir, 'cs.bam') sam = tempfile.NamedTemporaryFile('w+') command = 'bowtie2 --quiet --end-to-end -p %s -x %s -U %s -S %s' command = command % (thread, index, read, sam.name) run_command(command, 'Error in bowtie2 alignment!') sam.seek(0) with pysam.AlignmentFile(sam.name, 'r') as sam_f: with pysam.AlignmentFile(bam, 'wb', template=sam_f) as bam_f: for read in sam_f: if not read.is_unmapped: bam_f.write(read) sam.close() return bam else: sys.exit('Error: no bowtie2 installed!')
def fseq(options): ''' Call peaks using F-seq ''' # parse options if not which('fseq'): sys.exit('Error: No F-seq installed!') folder = check_dir(options['<rampagedir>']) flength = options['-l'] percent = [0.95, 0.9, 0.85, 0.8, 0.75, 0.7] with open(os.path.join(folder, 'total_counts.txt'), 'r') as f: total = int(f.read().rstrip()) # run F-seq flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'} all_peak_f = options['-o'] with open(all_peak_f, 'w') as out: for strand in flist: peak_f = run_fseq(folder, flist[strand], strand, flength, percent) with open(peak_f, 'r') as f: for line in f: if total: out.write(line.rstrip() + '\t%d\n' % total) else: out.write(line)
def check_pattern(options): # parse options strand_flag = options['--stranded'] remote_flag = options['--remote'] thread = int(options['--thread']) # check bigwig if options['--bam']: if which('bedGraphToBigWig') is not None: if strand_flag: plus_bg, minus_bg = bam_to_bedgraph(options['--bam'], url=remote_flag, stranded=True) plus_bw = bg_to_bw(plus_bg, options['--chrom-size']) minus_bw = bg_to_bw(minus_bg, options['--chrom-size']) else: bg = bam_to_bedgraph(options['--bam'], url=remote_flag) bw = bg_to_bw(bg, options['--chrom-size']) else: sys.exit('Could not find bedGraphToBigWig!') else: if os.path.isfile(options['--bigwig']): bw = options['--bigwig'] # parse junction file if os.path.isfile(options['--junc']): junc_path = options['--junc'] else: sys.exit('Wrong junc file: %s' % options['--junc']) # check pattern junc_info = defaultdict(list) p = Pool(thread) result = [] with open(junc_path, 'r') as junc_f: for junc in junc_f: info = junc.split() chrom = info[1] strand = info[4] rs_site = info[6].split('|')[0] rs_info = '\t'.join([chrom, rs_site, strand]) if rs_info not in junc_info: if strand_flag: if strand == '+': result.append( p.apply_async(cal_ratio, args=( plus_bw, rs_info, options, ))) else: result.append( p.apply_async(cal_ratio, args=( minus_bw, rs_info, options, ))) else: result.append( p.apply_async(cal_ratio, args=( bw, rs_info, options, ))) junc_info[rs_info].append(info) p.close() p.join() with open(options['<rs_pattern>'], 'w') as out: for r in result: pvalue, fold, rs_info = r.get() if pvalue is not None: for junc in junc_info[rs_info]: out.write('\t'.join(junc)) out.write('\t%f\t%f\n' % (fold, pvalue))