def main():
    args = parseArgs()
    # debug mode
    if args.debug: args.verbose = "debug"
    sys.stderr = Unbuffered(sys.stderr)
    if args.window:
        windows = [args.window]
    elif args.regions:
        windows = [bed_to_coord(x) for x in args.regions]
    else:
        if args.verbose:
            print("converting the whole genome", file=sys.stderr)
        windows = get_windows_from_bam(args.bam, 100000)
    if args.verbose:
        print("{} regions to parse".format(len(windows)), file=sys.stderr)
    # read in fasta
    if args.fasta:
        fasta = pysam.FastaFile(args.fasta)
    # initialize mp
    manager = mp.Manager()
    q = manager.Queue()
    pool = mp.Pool(processes=args.threads)
    if args.verbose:
        print("using {} parallel processes".format(args.threads),
              file=sys.stderr)
    # watcher for output
    watcher = pool.apply_async(listener, (q, args.bam, args.out, args.verbose))
    # which convert function
    if args.gpc is None: converter = convert_cpg
    else: converter = convert_nome
    # start processing
    if args.fasta:
        jobs = list()
        for win in windows:
            chrom, start, end = coord_to_bed(win)
            seq = fasta.fetch(reference=chrom).upper()
            jobs.append(
                pool.apply_async(convertBam,
                                 args=(args.bam, seq, converter, args.cpg,
                                       args.gpc, win, args.remove_poor,
                                       args.verbose, q)))
    else:
        jobs = [
            pool.apply_async(convertBam,
                             args=(args.bam, 0, converter, args.cpg, args.gpc,
                                   win, args.remove_poor, args.verbose, q))
            for win in windows
        ]
    output = [p.get() for p in jobs]
    # done
    q.put('kill')
    q.join()
    pool.close()
    if args.verbose:
        print("time elapsed : {} seconds".format(time.time() - start_time),
              file=sys.stderr)
Beispiel #2
0
    def __init__(self, regline):
        self.regline = regline
        self.regfields = regline.strip().split("\t")
        self.coord = bed_to_coord(regline)
        self.chrom, self.start, self.end = coord_to_bed(self.coord)
        if len(self.regfields) > 6:
            self.title = "{} {} ({})".format(self.regfields[6], self.coord,
                                             self.regfields[5])
        elif len(self.regfields) > 4:
            self.title = "{} {} ({})".format(self.regfields[3], self.coord,
                                             self.regfields[5])
        elif len(self.regfields) > 3:
            self.title = "{} ({})".format(self.coord, self.regfields[5])
        else:
            self.title = self.coord

        self.center = self.start + np.floor((self.end - self.start) / 2)
        self.metharrays = []
        self.totreads = 0
def main():
    args = parseArgs()
    if args.window:
        windows = [args.window]
    elif args.regions:
        windows = [bed_to_coord(x) for x in args.regions]
    else:
        if args.verbose:
            print("extracting all reads in the bam file", file=sys.stderr)
        windows = get_windows_from_bam(args.bam, 100000)
    if args.verbose:
        print("{} regions to parse".format(len(windows)), file=sys.stderr)
        if args.random:
            print("separating reads randomly", file=sys.stderr)
    # initialize mp
    manager = mp.Manager()
    q = manager.Queue()
    pool = mp.Pool(processes=args.threads)
    if args.verbose:
        print("using {} parallel processes".format(args.threads),
              file=sys.stderr)
    # watcher for output
    watcher = pool.apply_async(listener, (q, args.pre, args.verbose))
    # start processing
    jobs = [
        pool.apply_async(split_bed_haplotypes,
                         args=(args.bam, args.bed, win, args.random,
                               args.verbose, q)) for win in windows
    ]
    output = [p.get() for p in jobs]
    # done
    q.put('kill')
    q.join()
    pool.close()
    if args.verbose:
        print("time elapsed : {} seconds".format(time.time() - start_time),
              file=sys.stderr)