for dup5to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2 - s1), abs(e2 - e1)) maxBpOffset = max(abs(s2 - s1), abs(e2 - e1)) cc = carrierConcordance(nonRefHap, dup5to3['hap']) if (nestedO >= minNestedOverlap) and ( minBpOffset < maxInsertionOffset) and ( maxBpOffset > minDuplicationLength) and ( cc >= minCarrierConcordance): rdRatio = rdAltRefRatio( ((s1, e1), (s2, e2)), (nonRefHap, dup5to3['hap']), (rc, dup5to3['rc'])) if validRdRatio(recO / nestedO, rdRatio, args.readDepth)[0]: score = float(min(peCount, dup5to3['pe'])) * float(cc) if score > dupInfo['score']: dupInfo = { 'id': dup5to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score } if dupInfo['score'] >= 0: if not dupRegion.has_key(record.CHROM): dupRegion[record.CHROM] = banyan.SortedDict( key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
nonRefHap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')] support += call['DV'] if len(nonRefHap): # Collect overlapping calls s1 = record.POS e1 = record.INFO['END'] invInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1} for s2, e2 in sv[record.CHROM].overlap((s1, e1)): for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2-s1), abs(e2-e1)) maxBpOffset = max(abs(s2-s1), abs(e2-e1)) cc = carrierConcordance(nonRefHap, inv3to3['hap']) if (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (cc >= minCarrierConcordance): rdRatio = rdAltRefRatio(((s1, e1), (s2, e2)), (nonRefHap, inv3to3['hap']), (rc, inv3to3['rc'])) valid, updSVType = validRdRatio(recO/nestedO, rdRatio) if valid: if (record.INFO['SVTYPE'] != 'INV') and (updSVType != "DUP"): continue if (updSVType != 'DUP') or (maxBpOffset > minDuplicationLength): score = float(min(support, inv3to3['sup'])) * float(cc) if score > invInfo['score']: invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score} if invInfo['score'] >= 0: if not invRegion.has_key(record.CHROM): invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) G.add_node((record.ID, invInfo['id'])) G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score'] for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])): (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)] if overlapValid((invInfo['start'], invInfo['end']), (invIStart, invIEnd), 0.1, 10000):
if (call.gt_type != 0) and (call['DV'] > 0): nonRefHap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')] peCount += call['DV'] if len(nonRefHap): # Collect overlapping calls s1 = record.POS e1 = record.INFO['END'] invInfo = {'id': "NA", 'start': 0, 'end': 0, 'score': -1} for s2, e2 in sv[record.CHROM].overlap((s1, e1)): for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2-s1), abs(e2-e1)) cc = carrierConcordance(nonRefHap, inv3to3['hap']) if (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (cc >= minCarrierConcordance): rdRatio = rdAltRefRatio(((s1, e1), (s2, e2)), (nonRefHap, inv3to3['hap']), (rc, inv3to3['rc'])) if validRdRatio(recO/nestedO, rdRatio, args.readDepth)[0]: score = float(min(peCount, inv3to3['pe'])) * float(cc) if score > invInfo['score']: invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score} if invInfo['score'] >= 0: if not invRegion.has_key(record.CHROM): invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) G.add_node((record.ID, invInfo['id'])) G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score'] for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])): (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)] if overlapValid((invInfo['start'], invInfo['end']), (invIStart, invIEnd), 0.1, 10000): G.add_edge((record.ID, invInfo['id']), (id1, id2)) invRegion[record.CHROM][(invInfo['start'], invInfo['end'])] = (record.ID, invInfo['id']) # Pick best pair of inversions out of all overlapping calls
for s2, e2 in sv[record.CHROM].overlap((s1, e1)): for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) minBpOffset = min(abs(s2 - s1), abs(e2 - e1)) maxBpOffset = max(abs(s2 - s1), abs(e2 - e1)) cc = carrierConcordance(nonRefHap, inv3to3['hap']) if (nestedO >= minNestedOverlap) and ( minBpOffset < maxInsertionOffset) and ( cc >= minCarrierConcordance): rdRatio = rdAltRefRatio( ((s1, e1), (s2, e2)), (nonRefHap, inv3to3['hap']), (rc, inv3to3['rc'])) valid, updSVType = validRdRatio( recO / nestedO, rdRatio) if valid: if (record.INFO['SVTYPE'] != 'INV') and (updSVType != "DUP"): continue if (updSVType != 'DUP') or ( maxBpOffset > minDuplicationLength): score = float(min( support, inv3to3['sup'])) * float(cc) if score > invInfo['score']: invInfo = { 'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score }