def __init__(self, region, alignDistance, fasta): breakpoints = [ Locus(region.chr(), region.start(), region.start(), "+"), Locus(region.chr(), region.end(), region.end(), "+") ] super(Inversion, self).__init__(breakpoints, alignDistance, fasta) self.region = region
def getReads(variant, bam, minmapq, pair_minmapq, searchDistance, single_ended=False, include_supplementary=False, max_reads=None, sample_reads=None): t0 = time.time() searchRegions = variant.searchRegions(searchDistance) # This cludge tries the chromosomes as given ('chr4' or '4') and if that doesn't work # tries to switch to the other variation ('4' or 'chr4') try: reads, supplementaryAlignmentsFound = _getreads( searchRegions, bam, minmapq, pair_minmapq, single_ended, include_supplementary, max_reads, sample_reads) except ValueError as e: oldchrom = searchRegions[0].chr() try: if "chr" in oldchrom: newchrom = oldchrom.replace("chr", "") searchRegions = [ Locus(l.chr().replace("chr", ""), l.start(), l.end(), l.strand()) for l in searchRegions ] else: newchrom = "chr{}".format(oldchrom) searchRegions = [ Locus("chr{}".format(l.chr()), l.start(), l.end(), l.strand()) for l in searchRegions ] logging.warn( " Couldn't find reads on chromosome '{}'; trying instead '{}'" .format(oldchrom, newchrom)) reads, supplementaryAlignmentsFound = _getreads( searchRegions, bam, minmapq, pair_minmapq, single_ended, include_supplementary, max_reads, sample_reads) except ValueError: raise e t1 = time.time() if supplementaryAlignmentsFound: logging.warn( " ** Supplementary alignments found: these alignments (with sam flag 0x800) **\n" " ** are poorly documented among mapping software and may result in missing **\n" " ** portions of reads; consider using the --include-supplementary **\n" " ** command line argument if you think this is happening **" ) logging.debug(" time to find reads and mates:{:.1f}s".format(t1 - t0)) logging.info(" number of reads found: {}".format(len(reads))) return reads
def searchRegions(self, searchDistance): chrom = self.chrom() if len(self.region) < 2*searchDistance: # return a single region return [Locus(chrom, nonNegative(self.region.start()-searchDistance), self.region.end()+searchDistance, "+")] else: # return two regions, each around one of the ends of the inversion searchRegions = [] searchRegions.append(Locus(chrom, nonNegative(self.region.start()-searchDistance), self.region.start()+searchDistance, "+")) searchRegions.append(Locus(chrom, nonNegative(self.region.end()-searchDistance), self.region.end()+searchDistance, "+")) return searchRegions
def searchRegions(self, searchDistance): chrom = self.breakpoints[0].chr() deletionRegion = Locus(chrom, self.breakpoints[0].start() - searchDistance, self.breakpoints[-1].end() + searchDistance, "+") return [deletionRegion]
def searchRegions(self, searchDistance): searchRegions = [] for breakpoint in self.breakpoints: searchRegions.append(Locus(breakpoint.chr(), nonNegative(breakpoint.start()-searchDistance), breakpoint.end()+searchDistance, breakpoint.strand())) return searchRegions
def getReads(variant, bam, minmapq, pair_minmapq, searchDistance, single_ended=False, include_supplementary=False): t0 = time.time() searchRegions = variant.searchRegions(searchDistance) # This cludge tries the chromosomes as given ('chr4' or '4') and if that doesn't work # tries to switch to the other variation ('4' or 'chr4') try: reads, supplementaryAlignmentsFound = _getreads( searchRegions, bam, minmapq, pair_minmapq, single_ended, include_supplementary) except ValueError, e: oldchrom = searchRegions[0].chr() try: if "chr" in oldchrom: newchrom = oldchrom.replace("chr", "") searchRegions = [ Locus(l.chr().replace("chr", ""), l.start(), l.end(), l.strand()) for l in searchRegions ] else: newchrom = "chr{}".format(oldchrom) searchRegions = [ Locus("chr{}".format(l.chr()), l.start(), l.end(), l.strand()) for l in searchRegions ] logging.warn( " Couldn't find reads on chromosome '{}'; trying instead '{}'" .format(oldchrom, newchrom)) reads, supplementaryAlignmentsFound = _getreads( searchRegions, bam, minmapq, pair_minmapq, single_ended, include_supplementary) except ValueError: raise e
def chromParts(self, allele): b1 = self.breakpoints[0] b2 = self.breakpoints[1] segments = [] for i, breakpoint in enumerate(self.breakpoints): segments.append( Segment(breakpoint.chr(), breakpoint.start() - self.alignDistance, breakpoint.start() - 1, "+", 0 + i * 2)) segments.append( Segment(breakpoint.chr(), breakpoint.start(), breakpoint.start() + self.alignDistance, "+", 1 + i * 2)) # assert breakpoint.strand() == "+", breakpoint # TODO: disambiguate reads mapping to multiple parts with the same alignment scores # but different orientations loci = [Locus(s.chrom, s.start, s.end, "+") for s in segments] for i in range(len(loci) - 1): for j in range(i + 1, len(loci)): if loci[i].overlaps(loci[j]): raise Exception( "Not yet implemented - breakend-breakpoints near one another" ) parts = [] if allele in ["ref", "amb"]: name = "ref_{}".format(b1.chr()) parts.append( ChromPart(name, [segments[0], segments[1]], self.sources)) name = "ref_{}".format(b2.chr()) if b1.chr() == b2.chr(): name += "b" parts.append( ChromPart(name, [segments[2], segments[3]], self.sources)) else: if b1.strand() == "+": s1 = segments[0] else: s1 = segments[1].antisense() if b2.strand() == "+": s2 = segments[3] else: s2 = segments[2].antisense() name = "alt_{}/{}".format(b1.chr(), b2.chr()) parts.append(ChromPart(name, [s1, s2], self.sources)) return ChromPartsCollection(parts)
def getVariant(dataHub): if dataHub.args.type.lower().startswith("del"): assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("del") chrom = dataHub.args.breakpoints[0] start = int(dataHub.args.breakpoints[1]) end = int(dataHub.args.breakpoints[2]) assert start < end variant = Deletion.from_breakpoints(chrom, start-1, end-1, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower() in ["ldel", "largedeletion"]: assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("ldel") chrom = dataHub.args.breakpoints[0] start = int(dataHub.args.breakpoints[1]) end = int(dataHub.args.breakpoints[2]) assert start < end variant = LargeDeletion.from_breakpoints(chrom, start-1, end-1, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("ins"): assert len(dataHub.args.breakpoints) in [3,4], getBreakpointFormatsStr("ins") chrom = dataHub.args.breakpoints[0] pos = int(dataHub.args.breakpoints[1]) if len(dataHub.args.breakpoints) == 3: seq = dataHub.args.breakpoints[2] end = pos else: end = int(dataHub.args.breakpoints[2]) seq = dataHub.args.breakpoints[3] variant = Insertion(Locus(chrom, pos, end, "+"), seq, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("inv"): assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("inv") chrom = dataHub.args.breakpoints[0] start = int(dataHub.args.breakpoints[1]) end = int(dataHub.args.breakpoints[2]) if dataHub.args.min_mapq is None: dataHub.args.min_mapq = -1 variant = Inversion(Locus(chrom, start, end, "+"), dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("mei"): assert len(dataHub.args.breakpoints) >= 4, getBreakpointFormatsStr("mei") insertionBreakpoint = Locus(dataHub.args.breakpoints[1], dataHub.args.breakpoints[2], dataHub.args.breakpoints[2], "+") meName = dataHub.args.breakpoints[3] meStrand = getListDefault(dataHub.args.breakpoints, 4, "+") meStart = getListDefault(dataHub.args.breakpoints, 5, 0) meEnd = getListDefault(dataHub.args.breakpoints, 6, 1e100) meCoords = Locus(meName, meStart, meEnd, meStrand) meFasta = genomesource.FastaGenomeSource(dataHub.args.breakpoints[0]) variant = MobileElementInsertion(insertionBreakpoint, meCoords, meFasta, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("tra"): assert len(dataHub.args.breakpoints) == 5, getBreakpointFormatsStr("tra") chrom1 = dataHub.args.breakpoints[0] start1 = int(dataHub.args.breakpoints[1]) chrom2 = dataHub.args.breakpoints[2] start2 = int(dataHub.args.breakpoints[3]) orientation = dataHub.args.breakpoints[4] if dataHub.args.min_mapq is None: dataHub.args.min_mapq = -1 variant = Translocation(Locus(chrom1, start1, start1, "+"), Locus(chrom2, start2, start2, orientation), dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower() in ["bkend", "breakend"]: assert len(dataHub.args.breakpoints) == 6, getBreakpointFormatsStr("bkend") chrom1 = dataHub.args.breakpoints[0] start1 = int(dataHub.args.breakpoints[1]) strand1 = dataHub.args.breakpoints[2] chrom2 = dataHub.args.breakpoints[3] start2 = int(dataHub.args.breakpoints[4]) strand2 = dataHub.args.breakpoints[5] if dataHub.args.min_mapq is None: dataHub.args.min_mapq = -1 variant = Breakend(Locus(chrom1, start1, start1, strand1), Locus(chrom2, start2, start2, strand2), dataHub.alignDistance, dataHub.genome) else: raise Exception("only accept event types of deletion, insertion, mei, translocation or breakend") logging.info(" Variant: {}".format(variant)) return variant
def from_breakpoints(class_, chrom, first, second, alignDistance, fasta): breakpoint1 = Locus(chrom, first, first, "+") breakpoint2 = Locus(chrom, second, second, "+") return class_(breakpoint1, breakpoint2, alignDistance, fasta)
def searchRegions(self, searchDistance): chrom = self.breakpoints[0].chr() return [Locus(chrom, nonNegative(self.breakpoints[0].start()-searchDistance), self.breakpoints[-1].end()+searchDistance, "+")]
def from_breakpoints(class_, chrom, first, second, alignDistance, fasta): breakpointLoci = [Locus(chrom, first, first, "+"), Locus(chrom, second, second, "+")] return class_(breakpointLoci, alignDistance, fasta)