def parseInsertion(record, dataHub): altchars = set(record.alt.upper()) breakpoint = utilities.Locus(record.chrom, record.start, record.end, "+") if altchars <= set("ACGTN"): insertSeq = record.alt.upper() variant = variants.Insertion(breakpoint, insertSeq, dataHub.alignDistance, dataHub.genome) elif "MEINFO" in record.info: meinfo = record.info["MEINFO"].split(",") meName = meinfo[0] meStart = utilities.getListDefault(meinfo, 1, 0) meEnd = utilities.getListDefault(meinfo, 2, 1e100) meStrand = utilities.getListDefault(meinfo, 3, "+") meLocus = utilities.Locus(meName, meStart, meEnd, meStrand) variant = variants.MobileElementInsertion(breakpoint, meLocus, getMobileElementFasta(dataHub), dataHub.alignDistance, dataHub.genome) else: raise VCFParserError("Unknown insertion sequence") return variant
def parseInsertion(record, dataHub): altchars = set(record.alt.upper()) breakpoint = utilities.Locus(record.chrom, record.start, record.end, "+") if altchars <= set("ACGTN"): insertSeq = record.alt.upper() variant = variants.Insertion(breakpoint, insertSeq, dataHub.alignDistance, dataHub.genome) elif "MEINFO" in record.info: meinfo = record.info["MEINFO"].split(",") meName = meinfo[0] meStart = utilities.getListDefault(meinfo, 1, 0) meEnd = utilities.getListDefault(meinfo, 2, 1e100) meStrand = utilities.getListDefault(meinfo, 3, "+") meLocus = utilities.Locus(meName, meStart, meEnd, meStrand) variant = variants.MobileElementInsertion( breakpoint, meLocus, getMobileElementFasta(dataHub), dataHub.alignDistance, dataHub.genome) else: raise VCFParserError("Unknown insertion sequence") return variant
def getVariant(dataHub): if dataHub.args.type.lower().startswith("del"): assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("del") chrom = dataHub.args.breakpoints[0] start = int(dataHub.args.breakpoints[1]) end = int(dataHub.args.breakpoints[2]) assert start < end variant = Deletion.from_breakpoints(chrom, start-1, end-1, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower() in ["ldel", "largedeletion"]: assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("ldel") chrom = dataHub.args.breakpoints[0] start = int(dataHub.args.breakpoints[1]) end = int(dataHub.args.breakpoints[2]) assert start < end variant = LargeDeletion.from_breakpoints(chrom, start-1, end-1, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("ins"): assert len(dataHub.args.breakpoints) in [3,4], getBreakpointFormatsStr("ins") chrom = dataHub.args.breakpoints[0] pos = int(dataHub.args.breakpoints[1]) if len(dataHub.args.breakpoints) == 3: seq = dataHub.args.breakpoints[2] end = pos else: end = int(dataHub.args.breakpoints[2]) seq = dataHub.args.breakpoints[3] variant = Insertion(Locus(chrom, pos, end, "+"), seq, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("inv"): assert len(dataHub.args.breakpoints) == 3, getBreakpointFormatsStr("inv") chrom = dataHub.args.breakpoints[0] start = int(dataHub.args.breakpoints[1]) end = int(dataHub.args.breakpoints[2]) if dataHub.args.min_mapq is None: dataHub.args.min_mapq = -1 variant = Inversion(Locus(chrom, start, end, "+"), dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("mei"): assert len(dataHub.args.breakpoints) >= 4, getBreakpointFormatsStr("mei") insertionBreakpoint = Locus(dataHub.args.breakpoints[1], dataHub.args.breakpoints[2], dataHub.args.breakpoints[2], "+") meName = dataHub.args.breakpoints[3] meStrand = getListDefault(dataHub.args.breakpoints, 4, "+") meStart = getListDefault(dataHub.args.breakpoints, 5, 0) meEnd = getListDefault(dataHub.args.breakpoints, 6, 1e100) meCoords = Locus(meName, meStart, meEnd, meStrand) meFasta = genomesource.FastaGenomeSource(dataHub.args.breakpoints[0]) variant = MobileElementInsertion(insertionBreakpoint, meCoords, meFasta, dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower().startswith("tra"): assert len(dataHub.args.breakpoints) == 5, getBreakpointFormatsStr("tra") chrom1 = dataHub.args.breakpoints[0] start1 = int(dataHub.args.breakpoints[1]) chrom2 = dataHub.args.breakpoints[2] start2 = int(dataHub.args.breakpoints[3]) orientation = dataHub.args.breakpoints[4] if dataHub.args.min_mapq is None: dataHub.args.min_mapq = -1 variant = Translocation(Locus(chrom1, start1, start1, "+"), Locus(chrom2, start2, start2, orientation), dataHub.alignDistance, dataHub.genome) elif dataHub.args.type.lower() in ["bkend", "breakend"]: assert len(dataHub.args.breakpoints) == 6, getBreakpointFormatsStr("bkend") chrom1 = dataHub.args.breakpoints[0] start1 = int(dataHub.args.breakpoints[1]) strand1 = dataHub.args.breakpoints[2] chrom2 = dataHub.args.breakpoints[3] start2 = int(dataHub.args.breakpoints[4]) strand2 = dataHub.args.breakpoints[5] if dataHub.args.min_mapq is None: dataHub.args.min_mapq = -1 variant = Breakend(Locus(chrom1, start1, start1, strand1), Locus(chrom2, start2, start2, strand2), dataHub.alignDistance, dataHub.genome) else: raise Exception("only accept event types of deletion, insertion, mei, translocation or breakend") logging.info(" Variant: {}".format(variant)) return variant