def main(): args = parseArgs() # debug mode if args.debug: args.verbose = "debug" sys.stderr = Unbuffered(sys.stderr) if args.window: windows = [args.window] elif args.regions: windows = [bed_to_coord(x) for x in args.regions] else: if args.verbose: print("converting the whole genome", file=sys.stderr) windows = get_windows_from_bam(args.bam, 100000) if args.verbose: print("{} regions to parse".format(len(windows)), file=sys.stderr) # read in fasta if args.fasta: fasta = pysam.FastaFile(args.fasta) # initialize mp manager = mp.Manager() q = manager.Queue() pool = mp.Pool(processes=args.threads) if args.verbose: print("using {} parallel processes".format(args.threads), file=sys.stderr) # watcher for output watcher = pool.apply_async(listener, (q, args.bam, args.out, args.verbose)) # which convert function if args.gpc is None: converter = convert_cpg else: converter = convert_nome # start processing if args.fasta: jobs = list() for win in windows: chrom, start, end = coord_to_bed(win) seq = fasta.fetch(reference=chrom).upper() jobs.append( pool.apply_async(convertBam, args=(args.bam, seq, converter, args.cpg, args.gpc, win, args.remove_poor, args.verbose, q))) else: jobs = [ pool.apply_async(convertBam, args=(args.bam, 0, converter, args.cpg, args.gpc, win, args.remove_poor, args.verbose, q)) for win in windows ] output = [p.get() for p in jobs] # done q.put('kill') q.join() pool.close() if args.verbose: print("time elapsed : {} seconds".format(time.time() - start_time), file=sys.stderr)
def __init__(self, regline): self.regline = regline self.regfields = regline.strip().split("\t") self.coord = bed_to_coord(regline) self.chrom, self.start, self.end = coord_to_bed(self.coord) if len(self.regfields) > 6: self.title = "{} {} ({})".format(self.regfields[6], self.coord, self.regfields[5]) elif len(self.regfields) > 4: self.title = "{} {} ({})".format(self.regfields[3], self.coord, self.regfields[5]) elif len(self.regfields) > 3: self.title = "{} ({})".format(self.coord, self.regfields[5]) else: self.title = self.coord self.center = self.start + np.floor((self.end - self.start) / 2) self.metharrays = [] self.totreads = 0