Example #1
0
     padding = spacerLength
 for s2, e2 in sv[record.CHROM].overlap((max(s1 - padding, 0), e1 + padding)):
      for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]:
         if inv3to3['id'] == record.ID:
             continue
         spacer = min(abs(e1-s2), abs(e2-s1))
         (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2))
         cc = carrierConcordance(nonRefHap, inv3to3['nonRefHap'])
         delLength = min(e1-s1, e2-s2)
         if record.INFO['SVTYPE'] == "INV":
             delLength = min(max(s1,s2)-min(s1,s2), max(e1,e2)-min(e1,e2))
         if (spacer <= spacerLength) and (cc >= minCarrierConcordance) and (min(spacer, delLength) >= deletionLength) and (recO <= maxReciprocalOverlap):
             svControlID1 = re.sub(r"^[A-Z0]*","", record.ID)
             rdRatio1 = 1.0
             if int(svControlID1) in control.keys():
                 rdRatio1 = altRefReadDepthRatio(rc, control[int(svControlID1)], hap)
             svControlID2 = re.sub(r"^[A-Z0]*","", inv3to3['id'])
             rdRatio2 = 1.0
             if int(svControlID2) in control.keys():
                 rdRatio2 = altRefReadDepthRatio(inv3to3['rc'], control[int(svControlID2)], inv3to3['hap'])
             print(record.CHROM, record.POS, record.INFO['END'], record.ID, record.CHROM, s2, e2, inv3to3['id'], spacer, delLength, cc, rdRatio1, rdRatio2)
             if (not args.readDepth) or ((rdRatio1<0.8) and (rdRatio2<0.8)):
                 score = float(min(peCount, inv3to3['pe'])) * float(cc)
                 if score > invInfo['score']:
                     invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
 if invInfo['score'] >= 0:
     if not invRegion.has_key(record.CHROM):
         invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
     G.add_node((record.ID, invInfo['id']))
     G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score']
     for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
Example #2
0
            continue
        if (not args.siteFilter) or (len(record.FILTER) == 0):
            hap = dict()
            rc = collections.defaultdict(int)
            peCount = 0
            for call in record.samples:
                if call.called:
                    rc[call.sample] = call['RC']
                    hap[call.sample] = [int(gVal) for gVal in call['GT'].split('/')]
                    if (call.gt_type != 0) and (call['DV'] > 0):
                        peCount += call['DV']
            if len(hap):
                svStart = record.POS
                svEnd = record.INFO['END']
                svControlID = re.sub(r"^[A-Z0]*","", record.ID)
                rdRatio = altRefReadDepthRatio(rc, sv[int(svControlID)], hap)
                #print(record.CHROM, svStart, svEnd, record.ID, rdRatio, sep="\t")
                if rdRatio is not None:
                    if ((record.INFO['SVTYPE'] == "DEL") and (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP") and (rdRatio >= 1.3) and (rdRatio <= 1.75)):
                        # Valid Call
                        if not cnvRegion.has_key(record.CHROM):
                            cnvRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                        G.add_node(record.ID)
                        G.node[record.ID]['Score'] = peCount
                        for cnvIStart, cnvIEnd in cnvRegion[record.CHROM].overlap((svStart, svEnd)):
                            otherID = cnvRegion[record.CHROM][(cnvIStart, cnvIEnd)]
                            if overlapValid((svStart, svEnd), (cnvIStart, cnvIEnd), 0.1, 10000):
                                G.add_edge(record.ID, otherID)
                        cnvRegion[record.CHROM][(svStart, svEnd)] = record.ID

# Pick best deletion/duplication for all overlapping calls
Example #3
0
     padding = spacerLength
 for s2, e2 in sv[record.CHROM].overlap((max(s1 - padding, 0), e1 + padding)):
      for inv3to3 in svDups[(record.CHROM, s2, e2)] + [sv[record.CHROM][(s2, e2)]]:
         if inv3to3['id'] == record.ID:
             continue
         spacer = min(abs(e1-s2), abs(e2-s1))
         (recO, nestedO, recUnion, bpOffset, oLen) = overlapMetrics((s1, e1), (s2, e2))
         cc = carrierConcordance(nonRefHap, inv3to3['nonRefHap'])
         delLength = min(e1-s1, e2-s2)
         if record.INFO['SVTYPE'] == "INV":
             delLength = min(max(s1,s2)-min(s1,s2), max(e1,e2)-min(e1,e2))
         if (spacer <= spacerLength) and (cc >= minCarrierConcordance) and (min(spacer, delLength) >= deletionLength) and (recO <= maxReciprocalOverlap):
             svControlID1 = re.sub(r"^[A-Z0]*","", record.ID)
             rdRatio1 = 1.0
             if int(svControlID1) in control.keys():
                 rdRatio1 = altRefReadDepthRatio(rc, control[int(svControlID1)], hap)
             svControlID2 = re.sub(r"^[A-Z0]*","", inv3to3['id'])
             rdRatio2 = 1.0
             if int(svControlID2) in control.keys():
                 rdRatio2 = altRefReadDepthRatio(inv3to3['rc'], control[int(svControlID2)], inv3to3['hap'])
             print(record.CHROM, record.POS, record.INFO['END'], record.ID, record.CHROM, s2, e2, inv3to3['id'], spacer, delLength, cc, rdRatio1, rdRatio2)
             if (not args.readDepth) or ((rdRatio1<0.8) and (rdRatio2<0.8)):
                 score = float(min(peCount, inv3to3['pe'])) * float(cc)
                 if score > invInfo['score']:
                     invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
 if invInfo['score'] >= 0:
     if not invRegion.has_key(record.CHROM):
         invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
     G.add_node((record.ID, invInfo['id']))
     G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score']
     for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
Example #4
0
 hap = dict()
 rc = collections.defaultdict(int)
 peCount = 0
 for call in record.samples:
     if call.called:
         rc[call.sample] = call['RC']
         hap[call.sample] = [
             int(gVal) for gVal in call['GT'].split('/')
         ]
         if (call.gt_type != 0) and (call['DV'] > 0):
             peCount += call['DV']
 if len(hap):
     svStart = record.POS
     svEnd = record.INFO['END']
     svControlID = re.sub(r"^[A-Z0]*", "", record.ID)
     rdRatio = altRefReadDepthRatio(rc, sv[int(svControlID)], hap)
     #print(record.CHROM, svStart, svEnd, record.ID, rdRatio, sep="\t")
     if rdRatio is not None:
         if ((record.INFO['SVTYPE'] == "DEL") and
             (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP")
                                  and (rdRatio >= 1.3) and
                                  (rdRatio <= 1.75)):
             # Valid Call
             if not cnvRegion.has_key(record.CHROM):
                 cnvRegion[record.CHROM] = banyan.SortedDict(
                     key_type=(int, int),
                     alg=banyan.RED_BLACK_TREE,
                     updator=banyan.OverlappingIntervalsUpdator)
             G.add_node(record.ID)
             G.node[record.ID]['Score'] = peCount
             for cnvIStart, cnvIEnd in cnvRegion[