def fseq(options): ''' Call peaks using F-seq ''' # parse options if not which('fseq'): sys.exit('Error: No F-seq installed!') folder = check_dir(options['<rampagedir>']) flength = options['-l'] wig_flag = options['--wig'] percent = float(options['-p']) with open(os.path.join(folder, 'total_counts.txt'), 'r') as f: total = int(f.read().rstrip()) # run F-seq flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'} all_peak_f = os.path.join(folder, 'rampage_peaks.txt') with open(all_peak_f, 'w') as out: for strand in flist: peak_f = run_fseq(folder, flist[strand], strand, flength, wig_flag, percent) with open(peak_f, 'r') as f: for line in f: if total: # calculate RPM reads = int(line.rstrip().split()[9]) rpm = reads * 1000000.0 / total out.write(line.rstrip() + '\t%f\n' % rpm) else: out.write(line)
def main(): # parse options options = docopt(__doc__, version=__version__) # check output_dir if options['-o'] == './': dir = os.getcwd() else: dir = check_dir(options['-o']) # fetch junction bed file junc_f = fetch_juncfile(options['<bam>'], url=options['--url'], dir=dir, uniq=options['--uniq'], stranded=options['--stranded'], min=int(options['--min-reads'])) # create junction bigbed file in case if options['--bb'] and which('bedToBigBed') is not None: prefix = os.path.splitext(os.path.split(options['<bam>'])[-1])[0] bamf = pysam.AlignmentFile(options['<bam>'], 'rb') with tempfile.NamedTemporaryFile() as chrom_size: for seq in bamf.header['SQ']: chrom_size.write('%s\t%s\n' % (seq['SN'], seq['LN'])) chrom_size.seek(0) bb_path = os.path.join(dir, prefix + '_junc.bb') return_code = os.system('bedToBigBed -type=bed12 %s %s %s' % (junc_f, chrom_size.name, bb_path)) >> 8 if return_code: sys.exit('Error: cannot convert bed to BigBed!') bamf.close()
def entropy(options): ''' Calculate entropy for each cluster ''' # parse options folder = check_dir(options['<rampagedir>']) link_f = check_bed(os.path.join(folder, 'rampage_link.bed'), return_handle=False) threads = int(options['--thread']) with open(os.path.join(folder, 'rampage_peaks.txt'), 'r') as peak: result = Parallel(n_jobs=threads)(delayed(cal_entropy)(line, link_f) for line in peak) with open(os.path.join(folder, 'rampage_entropy.txt'), 'w') as out: for r in result: out.write(r)
def main(): # parse options options = docopt(__doc__, version=__version__) if options['--seq']: if not os.path.isfile(options['--seq']): sys.exit('Error: wrong seq file!') seq = os.path.abspath(options['--seq']) seq_flag = True else: seq = None seq_flag = False fa = check_fasta(options['--genome']) chrom = options['--chrom'] site = int(options['--site']) strand = '+' if options['--strand'] == '1' else '-' rlen = int(options['--read-length']) alen = int(options['--region-length']) clen = int(options['--check-region-length']) thread = options['--thread'] skip_flag = options['--skip-alignment'] # check output directory if not skip_flag: # not skip alignment out_dir = create_dir(options['<out_dir>']) else: # skip alignment out_dir = check_dir(options['<out_dir>']) # build index for sgRNA index_path, offset = build_index(fa, chrom, site, strand, rlen, thread, out_dir, seq, seq_flag) if not skip_flag: # not skip alignment # deal with reads file reads = tempfile.NamedTemporaryFile(mode='w+') if options['-R']: fq_lst = options['-R'].split(',') convert_read(reads, single=fq_lst) else: fq1_lst = options['-1'].split(',') fq2_lst = options['-2'].split(',') convert_read(reads, fq1=fq1_lst, fq2=fq2_lst) reads.seek(0) read_path = reads.name # mapped reads with bowtie2 bam = bowtie2_align(index_path, read_path, thread, out_dir) # remove tempfile reads.close() else: bam = os.path.join(out_dir, 'cs.bam') # fetch cleavage site reads fetch_reads(index_path, offset, alen, clen, bam, out_dir)
def assign_peak(options): ''' Call rampage peaks ''' # parse options if options['--ref']: db = options['--ref'] ref_flag = True elif options['--db']: import gffutils db = gffutils.FeatureDB(options['--db']) ref_flag = False else: import gffutils gtf_f = options['--gtf'] prefix = os.path.splitext(os.path.basename(gtf_f))[0] db = gffutils.create_db(gtf_f, prefix + '.db', force=True, disable_infer_transcripts=True) ref_flag = False folder = check_dir(options['<rampagepeak>']) rampage = check_bed(os.path.join(folder, 'rampage_link.bed'), return_handle=False) rampage_peak = check_bed(os.path.join(folder, 'rampage_peaks.txt'), return_handle=False) prom = int(options['--promoter']) # align and filter candidate peak p = Pool(int(options['--thread'])) results = [] for gene_info, gpromoter in parse_gene(db, ref_flag, prom): results.append( p.apply_async(assign_peak_to_gene, args=(rampage, rampage_peak, gene_info, gpromoter, prom))) p.close() p.join() # output results with open(os.path.join(folder, 'rampage_assigned_peaks.txt'), 'w') as outf: for r in results: gene_info, peak = r.get() if gene_info: for p in peak: outf.write('%s\t%s\n' % (p, gene_info))
def fseq(options): ''' Call peaks using F-seq ''' # parse options if not which('fseq'): sys.exit('Error: No F-seq installed!') folder = check_dir(options['<rampagedir>']) flength = options['-l'] percent = [0.95, 0.9, 0.85, 0.8, 0.75, 0.7] with open(os.path.join(folder, 'total_counts.txt'), 'r') as f: total = int(f.read().rstrip()) # run F-seq flist = {'+': 'rampage_plus_5end.bed', '-': 'rampage_minus_5end.bed'} all_peak_f = options['-o'] with open(all_peak_f, 'w') as out: for strand in flist: peak_f = run_fseq(folder, flist[strand], strand, flength, percent) with open(peak_f, 'r') as f: for line in f: if total: out.write(line.rstrip() + '\t%d\n' % total) else: out.write(line)
def dbloci(options): ''' Fetch bidirectionally transcribed loci ''' # parse options folder = check_dir(options['<rampagedir>']) size = int(options['-l']) filter_flag = options['--filter'] if filter_flag == 'rpm': filter = float(options['--rpm']) elif filter_flag == 'height': filter = int(options['--height']) # fetch rampage pairs peak_f = os.path.join(folder, 'rampage_peaks.txt') peak_bed = check_bed(peak_f) up_cluster = {} down_cluster = {} pairs = defaultdict(list) with open(peak_f, 'r') as peak: for ucluster in peak: # parse upstream cluster chrom, _, _, _, _, ustrand, upos = ucluster.split()[:7] if ustrand == '+': # upstream cluster should be minus continue if filter_cluster(ucluster, filter_flag, filter): continue uheight = int(ucluster.split()[7]) up_id = '\t'.join([chrom, upos, ustrand]) up_cluster[up_id] = uheight # parse downstream cluster start = int(upos) end = start + size * 2 for dcluster in peak_bed.fetch(chrom, start, end): dstrand, dpos = dcluster.split()[5:7] if dstrand == '-': # downstream cluster should be plus continue if filter_cluster(dcluster, filter_flag, filter): continue dheight = int(dcluster.split()[7]) down_id = '\t'.join([chrom, dpos, dstrand]) down_cluster[down_id] = dheight # construct pairs pairs[up_id].append(down_id) pairs[down_id].append(up_id) # output enhancers outf = os.path.join(folder, 'enhancers.txt') with open(outf, 'w') as out: for pair_set in fetch_pair(pairs): up_site, down_site = 0, 0 up_height, down_height = 0, 0 for site_id in pair_set: chrom, site, strand = site_id.split() if strand == '-': # upstream height = up_cluster[site_id] if height > up_height: up_site = int(site) up_height = height else: # downstream height = down_cluster[site_id] if height > down_height: down_site = int(site) down_height = height middle_site = int((up_site + down_site) / 2) forward_plus = cal_density(folder, chrom, middle_site, middle_site + size, 'plus') forward_minus = cal_density(folder, chrom, middle_site, middle_site + size, 'minus') reverse_plus = cal_density(folder, chrom, middle_site - size, middle_site, 'plus') reverse_minus = cal_density(folder, chrom, middle_site - size, middle_site, 'minus') if forward_minus >= forward_plus or reverse_plus >= reverse_minus: continue else: forward = forward_plus reverse = reverse_minus forward_dis = fetch_dis(folder, chrom, middle_site, middle_site + size, '+') reverse_dis = fetch_dis(folder, chrom, middle_site - size, middle_site, '-') fold = (forward - reverse) * 1.0 / (forward + reverse) start = middle_site - size end = middle_site + size out_format = '%s\t%d\t%d\tenhancer\t0\t+\t%d\t%d\t%d\t%d\t%d\t%f\n' out.write(out_format % (chrom, start, end, middle_site, reverse_dis, forward_dis, reverse, forward, fold))