padding = spacerLength for s2, e2 in sv[record.CHROM].overlap((max(s1 - padding, 0), e1 + padding)): for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]: if inv3to3['id'] == record.ID: continue spacer = min(abs(e1-s2), abs(e2-s1)) (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2)) cc = carrierConcordance(nonRefHap, inv3to3['nonRefHap']) delLength = min(e1-s1, e2-s2) if record.INFO['SVTYPE'] == "INV": delLength = min(max(s1,s2)-min(s1,s2), max(e1,e2)-min(e1,e2)) if (spacer <= spacerLength) and (cc >= minCarrierConcordance) and (min(spacer, delLength) >= deletionLength) and (recO <= maxReciprocalOverlap): svControlID1 = re.sub(r"^[A-Z0]*","", record.ID) rdRatio1 = 1.0 if int(svControlID1) in control.keys(): rdRatio1 = altRefReadDepthRatio(rc, control[int(svControlID1)], hap) svControlID2 = re.sub(r"^[A-Z0]*","", inv3to3['id']) rdRatio2 = 1.0 if int(svControlID2) in control.keys(): rdRatio2 = altRefReadDepthRatio(inv3to3['rc'], control[int(svControlID2)], inv3to3['hap']) print(record.CHROM, record.POS, record.INFO['END'], record.ID, record.CHROM, s2, e2, inv3to3['id'], spacer, delLength, cc, rdRatio1, rdRatio2) if (not args.readDepth) or ((rdRatio1<0.8) and (rdRatio2<0.8)): score = float(min(peCount, inv3to3['pe'])) * float(cc) if score > invInfo['score']: invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score} if invInfo['score'] >= 0: if not invRegion.has_key(record.CHROM): invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) G.add_node((record.ID, invInfo['id'])) G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score'] for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
continue if (not args.siteFilter) or (len(record.FILTER) == 0): hap = dict() rc = collections.defaultdict(int) peCount = 0 for call in record.samples: if call.called: rc[call.sample] = call['RC'] hap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')] if (call.gt_type != 0) and (call['DV'] > 0): peCount += call['DV'] if len(hap): svStart = record.POS svEnd = record.INFO['END'] svControlID = re.sub(r"^[A-Z0]*","", record.ID) rdRatio = altRefReadDepthRatio(rc, sv[int(svControlID)], hap) #print(record.CHROM, svStart, svEnd, record.ID, rdRatio, sep="\t") if rdRatio is not None: if ((record.INFO['SVTYPE'] == "DEL") and (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP") and (rdRatio >= 1.3) and (rdRatio <= 1.75)): # Valid Call if not cnvRegion.has_key(record.CHROM): cnvRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) G.add_node(record.ID) G.node[record.ID]['Score'] = peCount for cnvIStart, cnvIEnd in cnvRegion[record.CHROM].overlap((svStart, svEnd)): otherID = cnvRegion[record.CHROM][(cnvIStart, cnvIEnd)] if overlapValid((svStart, svEnd), (cnvIStart, cnvIEnd), 0.1, 10000): G.add_edge(record.ID, otherID) cnvRegion[record.CHROM][(svStart, svEnd)] = record.ID # Pick best deletion/duplication for all overlapping calls
hap = dict() rc = collections.defaultdict(int) peCount = 0 for call in record.samples: if call.called: rc[call.sample] = call['RC'] hap[call.sample] = [ int(gVal) for gVal in call['GT'].split('/') ] if (call.gt_type != 0) and (call['DV'] > 0): peCount += call['DV'] if len(hap): svStart = record.POS svEnd = record.INFO['END'] svControlID = re.sub(r"^[A-Z0]*", "", record.ID) rdRatio = altRefReadDepthRatio(rc, sv[int(svControlID)], hap) #print(record.CHROM, svStart, svEnd, record.ID, rdRatio, sep="\t") if rdRatio is not None: if ((record.INFO['SVTYPE'] == "DEL") and (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP") and (rdRatio >= 1.3) and (rdRatio <= 1.75)): # Valid Call if not cnvRegion.has_key(record.CHROM): cnvRegion[record.CHROM] = banyan.SortedDict( key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator) G.add_node(record.ID) G.node[record.ID]['Score'] = peCount for cnvIStart, cnvIEnd in cnvRegion[