def do_realign(dataHub, sample): processes = dataHub.args.processes if processes is None or processes == 0: # we don't really gain from using virtual cores, so try to figure out how many physical # cores we have processes = misc.cpu_count_physical() variant = dataHub.variant reads = sample.reads name = "{}:{{}}".format(sample.name[:15]) t0 = time.time() refalignments = do1remap(variant.chromParts("ref"), reads, processes, jobName=name.format("ref")) altalignments = do1remap(variant.chromParts("alt"), reads, processes, jobName=name.format("alt")) t1 = time.time() logging.debug(" Time to realign: {:.1f}s".format(t1-t0)) assert refalignments.keys() == altalignments.keys() alnCollections = [] for key in refalignments: alnCollection = AlignmentSetCollection(key) alnCollection.addSet(refalignments[key], "ref") alnCollection.addSet(altalignments[key], "alt") alnCollections.append(alnCollection) return alnCollections
def do_realign(dataHub, sample): processes = dataHub.args.processes if processes is None or processes == 0: # we don't really gain from using virtual cores, so try to figure out how many physical # cores we have processes = misc.cpu_count_physical() variant = dataHub.variant reads = sample.reads name = "{}:{{}}".format(sample.name[:15]) t0 = time.time() refalignments = do1remap(variant.chromParts("ref"), reads, processes, jobName=name.format("ref")) altalignments = do1remap(variant.chromParts("alt"), reads, processes, jobName=name.format("alt")) t1 = time.time() logging.debug(" Time to realign: {:.1f}s".format(t1 - t0)) assert refalignments.keys() == altalignments.keys() alnCollections = [] for key in refalignments: alnCollection = AlignmentSetCollection(key) alnCollection.addSet(refalignments[key], "ref") alnCollection.addSet(altalignments[key], "alt") alnCollections.append(alnCollection) return alnCollections
def multimap(namesToReferences, seqs): if not hasattr(multimap, "pool"): multimap.pool = multiprocessing.Pool(processes=misc.cpu_count_physical()) pool = multimap.pool results = {} results = dict(pool.map_async(remaps, [(namesToReferences, seq) for seq in seqs]).get(999999)) # results = dict(map(remaps, [(namesToReferences, seq) for seq in seqs])) return results
def do_realign(dataHub, sample): processes = dataHub.args.processes if processes is None or processes == 0: # we don't really gain from using virtual cores, so try to figure out how many physical # cores we have processes = misc.cpu_count_physical() variant = dataHub.variant reads = sample.reads name = "{}:{{}}".format(sample.name[:15]) t0 = time.time() refalignments, badReadsRef = do1remap(variant.chromParts("ref"), reads, processes, jobName=name.format("ref"), tryExact=dataHub.args.fast) altalignments, badReadsAlt = do1remap(variant.chromParts("alt"), reads, processes, jobName=name.format("alt"), tryExact=dataHub.args.fast) t1 = time.time() logging.debug(" Time to realign: {:.1f}s".format(t1 - t0)) badReads = badReadsRef.union(badReadsAlt) if len(badReads) > 0: logging.warn( " Alignment failed with {} reads (this is a known issue)".format( badReads)) for badRead in badReads: refalignments.pop(badRead, None) altalignments.pop(badRead, None) assert set(refalignments.keys()) == set(altalignments.keys()), \ set(refalignments.keys()) ^ set(altalignments.keys()) alnCollections = [] for key in refalignments: alnCollection = AlignmentSetCollection(key) alnCollection.addSet(refalignments[key], "ref") alnCollection.addSet(altalignments[key], "alt") alnCollections.append(alnCollection) return alnCollections
def do_realign(dataHub, sample): processes = dataHub.args.processes if processes is None or processes == 0: # we don't really gain from using virtual cores, so try to figure out how many physical # cores we have processes = misc.cpu_count_physical() variant = dataHub.variant reads = sample.reads name = "{}:{{}}".format(sample.name[:15]) t0 = time.time() refalignments, badReadsRef = do1remap(variant.chromParts("ref"), reads, processes, jobName=name.format("ref"), tryExact=dataHub.args.fast) altalignments, badReadsAlt = do1remap(variant.chromParts("alt"), reads, processes, jobName=name.format("alt"), tryExact=dataHub.args.fast) t1 = time.time() logging.debug(" Time to realign: {:.1f}s".format(t1-t0)) badReads = badReadsRef.union(badReadsAlt) if len(badReads) > 0: logging.warn(" Alignment failed with {} reads (this is a known issue)".format(badReads)) for badRead in badReads: refalignments.pop(badRead, None) altalignments.pop(badRead, None) assert set(refalignments.keys()) == set(altalignments.keys()), \ set(refalignments.keys()) ^ set(altalignments.keys()) alnCollections = [] for key in refalignments: alnCollection = AlignmentSetCollection(key) alnCollection.addSet(refalignments[key], "ref") alnCollection.addSet(altalignments[key], "alt") alnCollections.append(alnCollection) return alnCollections
def do_realign(variant, reads, processes=None): if processes is None or processes == 0: # we don't really gain from using virtual cores, so try to figure out how many physical # cores we have processes = misc.cpu_count_physical() t0 = time.time() refalignments = do1remap(variant.chromParts("ref"), reads, processes) altalignments = do1remap(variant.chromParts("alt"), reads, processes) t1 = time.time() logging.debug(" time for realigning:{}".format(t1 - t0)) assert refalignments.keys() == altalignments.keys() alnCollections = [] for key in refalignments: alnCollection = AlignmentSetCollection(key) alnCollection.addSet(refalignments[key], "ref") alnCollection.addSet(altalignments[key], "alt") alnCollections.append(alnCollection) return alnCollections