def generate_wiggle(reads, ofps, num_threads=1, contig=None ): all_args = [] for chrm_length, chrm in sorted(izip(reads.lengths, reads.references)): strands = ['+', '-'] if len(ofps) == 2 else [None,] # skip regions not in the specified contig, if requested if contig is not None and clean_chr_name(chrm) != clean_chr_name(contig): continue for strand in strands: ofp = ofps[strand] assert (ofp, reads, chrm, chrm_length, strand ) not in all_args all_args.append((ofp, reads, chrm, chrm_length, strand )) if num_threads == 1: for args in reversed(all_args): populate_cvg_array_for_contig( *args ) else: ps = [None]*num_threads while len( all_args ) > 0: for i in xrange(num_threads): if ps[i] is None or not ps[i].is_alive(): ps[i] = multiprocessing.Process( target=populate_cvg_array_for_contig, args=all_args.pop() ) ps[i].start() break time.sleep( 0.1 ) for p in ps: if p is not None: p.join() for fp in ofps.values(): fp.close() return
def write_array_to_opstream(ofp, buffer, buff_start, chrm, chrm_length, strand): """write buffer to disk, buff_start determines the start of buffer in genomic coordinates. """ chrm = fix_chrm_name(clean_chr_name(chrm)) prev_pos = 0 prev_val = buffer[0] for pos, val in enumerate(buffer[1:]): # make sure this doesn't extend past the end of the chromosome # bedGraphs are 0-based, so use chrm_length-1 if buff_start + pos + 1 >= chrm_length: pos = chrm_length - buff_start - 1 break if val != prev_val: if prev_val > 1e-12: write_val = -prev_val if strand == '-' else prev_val line = "%s\t%i\t%i\t%.2f" % (chrm, buff_start + prev_pos, buff_start + pos + 1, write_val) ofp.write(line + "\n") prev_pos, prev_val = pos + 1, val if prev_val > 1e-12: write_val = -prev_val if strand == '-' else prev_val line = "%s\t%i\t%i\t%.2f" % (chrm, buff_start + prev_pos, buff_start + pos + 1, write_val) ofp.write(line + "\n") return
def parse_arguments(): allowed_assays = ['cage', 'rampage', 'rnaseq', 'polya'] import argparse parser = argparse.ArgumentParser( description='Extract elements from a bed or gtf file.') parser.add_argument( 'region', help='A region string of the form contig:strand:start-stop') parser.add_argument( 'input_file', type=file, help='An bed file to extract elements from') args = parser.parse_args() chrm, strand, poss = args.region.strip().split(":") start, stop = [int(x) for x in poss.replace(",", "").split('-')] return ( GenomicInterval(clean_chr_name(chrm), strand, start, stop), args.input_file )
def parse_arguments(): allowed_assays = ['cage', 'rampage', 'rnaseq', 'polya'] import argparse parser = argparse.ArgumentParser( description='Extract elements from a bed or gtf file.') parser.add_argument( 'region', help='A region string of the form contig:strand:start-stop') parser.add_argument('input_file', type=file, help='An bed file to extract elements from') args = parser.parse_args() chrm, strand, poss = args.region.strip().split(":") start, stop = [int(x) for x in poss.replace(",", "").split('-')] return (GenomicInterval(clean_chr_name(chrm), strand, start, stop), args.input_file)
def fix_chr_name(x): return "chr" + clean_chr_name(x)