コード例 #1
0
ファイル: somaticFilter.py プロジェクト: macressler/delly
                for cStart, cEnd in sv[chrName].overlap((start, end)):
                    for cSvID, cScore in svDups[(chrName, cStart, cEnd)] + [
                            sv[chrName][(cStart, cEnd)]
                    ]:
                        if record.ID != cSvID:
                            if (cScore > record.INFO['PE']) or (
                                (cScore == record.INFO['PE']) and
                                (cSvID < record.ID)):
                                countBetterHits += 1
            if countBetterHits > 2:
                foundBetterHit = True
        else:
            for cStart, cEnd in sv[record.CHROM].overlap(
                (record.POS, record.INFO['END'])):
                for cSvID, cScore in svDups[(record.CHROM, cStart, cEnd)] + [
                        sv[record.CHROM][(cStart, cEnd)]
                ]:
                    if (record.ID != cSvID) and (overlapValid(
                        (record.POS, record.INFO['END']), (cStart, cEnd))):
                        if (cScore > record.INFO['PE']) or (
                            (cScore == record.INFO['PE']) and
                            (cSvID < record.ID)):
                            foundBetterHit = True
                            break
                if foundBetterHit:
                    break
        if not foundBetterHit:
            record.INFO['RDRATIO'] = rdRat[record.ID]
            record.INFO['SOMATIC'] = True
            vcf_writer.write_record(record)
コード例 #2
0
    vcf_writer = vcf.Writer(open(args.outFile, 'w'),
                            vcf_reader,
                            lineterminator='\n')
    for record in vcf_reader:
        if (record.CHROM not in sv.keys()) or ((record.POS, record.INFO['END'])
                                               not in sv[record.CHROM].keys()):
            continue
        overlapList = sv[record.CHROM].overlap(
            (record.POS, record.INFO['END']))
        foundBetterHit = False
        if not args.keepOverlap:
            for cStart, cEnd in sv[record.CHROM].overlap(
                (record.POS, record.INFO['END'])):
                if foundBetterHit:
                    break
                for cSvID, cScore, cCt in svDups[
                    (record.CHROM, cStart,
                     cEnd)] + [sv[record.CHROM][(cStart, cEnd)]]:
                    if record.ID != cSvID:
                        if (cScore > record.INFO['PE']) or (
                            (cScore == record.INFO['PE']) and
                            (cSvID < record.ID)):
                            if overlapValid((record.POS, record.INFO['END']),
                                            (cStart, cEnd), 0.1, 10000000):
                                foundBetterHit = True
                                break

        # Output VCF record
        if not foundBetterHit:
            vcf_writer.write_record(record)
コード例 #3
0
                            rdRatio2 = 1.0
                            if int(svControlID2) in control.keys():
                                rdRatio2 = altRefReadDepthRatio(inv3to3['rc'], control[int(svControlID2)], inv3to3['hap'])
                            print(record.CHROM, record.POS, record.INFO['END'], record.ID, record.CHROM, s2, e2, inv3to3['id'], spacer, delLength, cc, rdRatio1, rdRatio2)
                            if (not args.readDepth) or ((rdRatio1<0.8) and (rdRatio2<0.8)):
                                score = float(min(peCount, inv3to3['pe'])) * float(cc)
                                if score > invInfo['score']:
                                    invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
                if invInfo['score'] >= 0:
                    if not invRegion.has_key(record.CHROM):
                        invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node((record.ID, invInfo['id']))
                    G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score']
                    for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
                        (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)]
                        if overlapValid((invInfo['start'], invInfo['end']), (invIStart, invIEnd), 0.1, 10000):
                            G.add_edge((record.ID, invInfo['id']), (id1, id2))
                    invRegion[record.CHROM][(invInfo['start'], invInfo['end'])] = (record.ID, invInfo['id'])

# Pick best pair of inversions out of all overlapping calls
idPairs = dict()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSVs = n
    idPairs[bestSVs] = bestScore

# Extract selected calls
selectedSVs = dict()
コード例 #4
0
ファイル: ddelClassifier.py プロジェクト: nashera/delly
                            rdRatio2 = 1.0
                            if int(svControlID2) in control.keys():
                                rdRatio2 = altRefReadDepthRatio(inv3to3['rc'], control[int(svControlID2)], inv3to3['hap'])
                            print(record.CHROM, record.POS, record.INFO['END'], record.ID, record.CHROM, s2, e2, inv3to3['id'], spacer, delLength, cc, rdRatio1, rdRatio2)
                            if (not args.readDepth) or ((rdRatio1<0.8) and (rdRatio2<0.8)):
                                score = float(min(peCount, inv3to3['pe'])) * float(cc)
                                if score > invInfo['score']:
                                    invInfo = {'id': inv3to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
                if invInfo['score'] >= 0:
                    if not invRegion.has_key(record.CHROM):
                        invRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node((record.ID, invInfo['id']))
                    G.node[(record.ID, invInfo['id'])]['Score'] = invInfo['score']
                    for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo['start'], invInfo['end'])):
                        (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)]
                        if overlapValid((invInfo['start'], invInfo['end']), (invIStart, invIEnd), 0.1, 10000):
                            G.add_edge((record.ID, invInfo['id']), (id1, id2))
                    invRegion[record.CHROM][(invInfo['start'], invInfo['end'])] = (record.ID, invInfo['id'])

# Pick best pair of inversions out of all overlapping calls
idPairs = dict()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSVs = n
    idPairs[bestSVs] = bestScore

# Extract selected calls
selectedSVs = dict()
コード例 #5
0
ファイル: cnvClassifier.py プロジェクト: rpucheq/delly
                        rdRatio = numpy.median(numpy.array(hetRC))/numpy.median(numpy.array(refRC))
                        if ((record.INFO['SVTYPE'] == "DEL") and (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP") and (rdRatio >= 1.3) and (rdRatio <= 1.75)):
                            validRdRatio = True
                else:
                    validRdRatio = True

                # Check quality
                #print(record.CHROM, svStart, svEnd, record.ID, qIndex, numpy.percentile(ratioRef, 99), altgq, refgq, altratio, sep="\t")
                if (validRdRatio) and (qIndex > quality) and (numpy.percentile(ratioRef, 99) <= args.maxRefRatio):
                    if not cnvRegion.has_key(record.CHROM):
                        cnvRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node(record.ID)
                    G.node[record.ID]['Score'] = support
                    for cnvIStart, cnvIEnd in cnvRegion[record.CHROM].overlap((svStart, svEnd)):
                        otherID = cnvRegion[record.CHROM][(cnvIStart, cnvIEnd)]
                        if (record.INFO['SVTYPE'] == "INS") or (overlapValid((svStart, svEnd), (cnvIStart, cnvIEnd), 0.1, 10000)):
                            G.add_edge(record.ID, otherID)
                    cnvRegion[record.CHROM][(svStart - 15, svEnd + 15)] = record.ID  # padding for PRECISE insertion

# Pick best deletion/duplication for all overlapping calls
selectedSVs = set()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSV = n
    if bestSV is not None:
        selectedSVs.add(bestSV)

# Extract selected calls
コード例 #6
0
ファイル: populationFilter.py プロジェクト: rpucheq/delly
                        if not sv.has_key(record.CHROM):
                            sv[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                        if (record.POS, record.INFO['END']) not in sv[record.CHROM]:
                            sv[record.CHROM][(record.POS, record.INFO['END'])] = (record.ID, record.INFO['PE'], record.INFO['CT'])
                        else:
                            svDups[(record.CHROM, record.POS, record.INFO['END'])].append((record.ID, record.INFO['PE'], record.INFO['CT']))

# Output vcf records
if args.vcfFile:
    vcf_reader = vcf.Reader(open(args.vcfFile), 'r', compressed=True) if args.vcfFile.endswith('.gz') else vcf.Reader(open(args.vcfFile), 'r', compressed=False)
    vcf_writer = vcf.Writer(open(args.outFile, 'w'), vcf_reader, lineterminator='\n')
    for record in vcf_reader:
        if (record.CHROM not in sv.keys()) or ((record.POS, record.INFO['END']) not in sv[record.CHROM].keys()):
            continue
        overlapList = sv[record.CHROM].overlap((record.POS, record.INFO['END']))
        foundBetterHit = False
        if not args.keepOverlap:
            for cStart, cEnd in sv[record.CHROM].overlap((record.POS, record.INFO['END'])):
                if foundBetterHit:
                    break
                for cSvID, cScore, cCt in svDups[(record.CHROM, cStart, cEnd)] + [sv[record.CHROM][(cStart, cEnd)]]:
                    if record.ID != cSvID:
                        if (cScore > record.INFO['PE']) or ((cScore == record.INFO['PE']) and (cSvID < record.ID)):
                            if overlapValid((record.POS, record.INFO['END']), (cStart, cEnd), 0.1, 10000000):
                                foundBetterHit = True
                                break

        # Output VCF record
        if not foundBetterHit:
            vcf_writer.write_record(record)
コード例 #7
0
ファイル: pdupClassifier.py プロジェクト: macressler/delly
                                        'score': score
                                    }
                if dupInfo['score'] >= 0:
                    if not dupRegion.has_key(record.CHROM):
                        dupRegion[record.CHROM] = banyan.SortedDict(
                            key_type=(int, int),
                            alg=banyan.RED_BLACK_TREE,
                            updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node((record.ID, dupInfo['id']))
                    G.node[(record.ID,
                            dupInfo['id'])]['Score'] = dupInfo['score']
                    for dupIStart, dupIEnd in dupRegion[record.CHROM].overlap(
                        (dupInfo['start'], dupInfo['end'])):
                        (id1, id2) = dupRegion[record.CHROM][(dupIStart,
                                                              dupIEnd)]
                        if overlapValid((dupInfo['start'], dupInfo['end']),
                                        (dupIStart, dupIEnd), 0.1, 10000):
                            G.add_edge((record.ID, dupInfo['id']), (id1, id2))
                    dupRegion[record.CHROM][(dupInfo['start'],
                                             dupInfo['end'])] = (record.ID,
                                                                 dupInfo['id'])

# Pick best pair
idPairs = dict()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSVs = n
    idPairs[bestSVs] = bestScore
コード例 #8
0
ファイル: cnvClassifier.py プロジェクト: jiaolongsun/delly
            if len(hap):
                svStart = record.POS
                svEnd = record.INFO['END']
                svControlID = re.sub(r"^[A-Z0]*","", record.ID)
                rdRatio = altRefReadDepthRatio(rc, sv[int(svControlID)], hap)
                #print(record.CHROM, svStart, svEnd, record.ID, rdRatio, sep="\t")
                if rdRatio is not None:
                    if ((record.INFO['SVTYPE'] == "DEL") and (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP") and (rdRatio >= 1.3) and (rdRatio <= 1.75)):
                        # Valid Call
                        if not cnvRegion.has_key(record.CHROM):
                            cnvRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                        G.add_node(record.ID)
                        G.node[record.ID]['Score'] = peCount
                        for cnvIStart, cnvIEnd in cnvRegion[record.CHROM].overlap((svStart, svEnd)):
                            otherID = cnvRegion[record.CHROM][(cnvIStart, cnvIEnd)]
                            if overlapValid((svStart, svEnd), (cnvIStart, cnvIEnd), 0.1, 10000):
                                G.add_edge(record.ID, otherID)
                        cnvRegion[record.CHROM][(svStart, svEnd)] = record.ID

# Pick best deletion/duplication for all overlapping calls
selectedSVs = set()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSV = n
    if bestSV is not None:
        selectedSVs.add(bestSV)

# Extract selected calls
コード例 #9
0
ファイル: cnvClassifier.py プロジェクト: macressler/delly
                        (rdRatio < 0.8)) or ((record.INFO['SVTYPE'] == "DUP")
                                             and (rdRatio >= 1.3) and
                                             (rdRatio <= 1.75)):
                        # Valid Call
                        if not cnvRegion.has_key(record.CHROM):
                            cnvRegion[record.CHROM] = banyan.SortedDict(
                                key_type=(int, int),
                                alg=banyan.RED_BLACK_TREE,
                                updator=banyan.OverlappingIntervalsUpdator)
                        G.add_node(record.ID)
                        G.node[record.ID]['Score'] = peCount
                        for cnvIStart, cnvIEnd in cnvRegion[
                                record.CHROM].overlap((svStart, svEnd)):
                            otherID = cnvRegion[record.CHROM][(cnvIStart,
                                                               cnvIEnd)]
                            if overlapValid((svStart, svEnd),
                                            (cnvIStart, cnvIEnd), 0.1, 10000):
                                G.add_edge(record.ID, otherID)
                        cnvRegion[record.CHROM][(svStart, svEnd)] = record.ID

# Pick best deletion/duplication for all overlapping calls
selectedSVs = set()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSV = n
    if bestSV is not None:
        selectedSVs.add(bestSV)

# Extract selected calls
コード例 #10
0
ファイル: somaticFilter.py プロジェクト: youkefan18/delly
        if record.ID not in validRecordID:
            continue
        # Judge wether overlapping calls are better
        foundBetterHit = False
        if args.svType == "TRA":
            countBetterHits = 0
            for (chrName, start, end) in [
                (record.CHROM, record.POS - traWindow, record.POS + traWindow),
                (record.INFO["CHR2"], record.INFO["END"] - traWindow, record.INFO["END"] + traWindow),
            ]:
                for cStart, cEnd in sv[chrName].overlap((start, end)):
                    for cSvID, cScore in svDups[(chrName, cStart, cEnd)] + [sv[chrName][(cStart, cEnd)]]:
                        if record.ID != cSvID:
                            if (cScore > record.INFO["PE"]) or ((cScore == record.INFO["PE"]) and (cSvID < record.ID)):
                                countBetterHits += 1
            if countBetterHits > 2:
                foundBetterHit = True
        else:
            for cStart, cEnd in sv[record.CHROM].overlap((record.POS, record.INFO["END"])):
                for cSvID, cScore in svDups[(record.CHROM, cStart, cEnd)] + [sv[record.CHROM][(cStart, cEnd)]]:
                    if (record.ID != cSvID) and (overlapValid((record.POS, record.INFO["END"]), (cStart, cEnd))):
                        if (cScore > record.INFO["PE"]) or ((cScore == record.INFO["PE"]) and (cSvID < record.ID)):
                            foundBetterHit = True
                            break
                if foundBetterHit:
                    break
        if not foundBetterHit:
            record.INFO["RDRATIO"] = rdRat[record.ID]
            record.INFO["SOMATIC"] = True
            vcf_writer.write_record(record)
コード例 #11
0
                # Check quality
                #print(record.CHROM, svStart, svEnd, record.ID, qIndex, numpy.percentile(ratioRef, 99), altgq, refgq, altratio, sep="\t")
                if (validRdRatio) and (qIndex > quality) and (numpy.percentile(
                        ratioRef, 99) <= args.maxRefRatio):
                    if not cnvRegion.has_key(record.CHROM):
                        cnvRegion[record.CHROM] = banyan.SortedDict(
                            key_type=(int, int),
                            alg=banyan.RED_BLACK_TREE,
                            updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node(record.ID)
                    G.node[record.ID]['Score'] = support
                    for cnvIStart, cnvIEnd in cnvRegion[record.CHROM].overlap(
                        (svStart, svEnd)):
                        otherID = cnvRegion[record.CHROM][(cnvIStart, cnvIEnd)]
                        if (record.INFO['SVTYPE'] == "INS") or (overlapValid(
                            (svStart, svEnd),
                            (cnvIStart, cnvIEnd), 0.1, 10000)):
                            G.add_edge(record.ID, otherID)
                    cnvRegion[record.CHROM][(
                        svStart - 15, svEnd +
                        15)] = record.ID  # padding for PRECISE insertion

# Pick best deletion/duplication for all overlapping calls
selectedSVs = set()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSV = n
    if bestSV is not None:
コード例 #12
0
ファイル: pdupClassifier.py プロジェクト: jiaolongsun/delly
                        maxBpOffset = max(abs(s2-s1), abs(e2-e1))
                        cc = carrierConcordance(nonRefHap, dup5to3['hap'])
                        if (nestedO >= minNestedOverlap) and (minBpOffset < maxInsertionOffset) and (maxBpOffset > minDuplicationLength) and (cc >= minCarrierConcordance):
                            rdRatio = rdAltRefRatio(((s1, e1), (s2, e2)), (nonRefHap, dup5to3['hap']), (rc, dup5to3['rc']))
                            if validRdRatio(recO/nestedO, rdRatio, args.readDepth)[0]:
                                score = float(min(peCount, dup5to3['pe'])) * float(cc)
                                if score > dupInfo['score']:
                                    dupInfo = {'id': dup5to3['id'], 'start': min(s1, s2), 'end': max(e1, e2), 'score': score}
                if dupInfo['score'] >= 0:
                    if not dupRegion.has_key(record.CHROM):
                        dupRegion[record.CHROM] = banyan.SortedDict(key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator)
                    G.add_node((record.ID, dupInfo['id']))
                    G.node[(record.ID, dupInfo['id'])]['Score'] = dupInfo['score']
                    for dupIStart, dupIEnd in dupRegion[record.CHROM].overlap((dupInfo['start'], dupInfo['end'])):
                        (id1, id2) = dupRegion[record.CHROM][(dupIStart, dupIEnd)]
                        if overlapValid((dupInfo['start'], dupInfo['end']), (dupIStart, dupIEnd), 0.1, 10000):
                            G.add_edge((record.ID, dupInfo['id']), (id1, id2))
                    dupRegion[record.CHROM][(dupInfo['start'], dupInfo['end'])] = (record.ID, dupInfo['id'])

# Pick best pair
idPairs = dict()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d['Score'] > bestScore:
            bestScore = d['Score']
            bestSVs = n
    idPairs[bestSVs] = bestScore

# Extract selected calls
selectedSVs = dict()
コード例 #13
0
                                        invInfo = {
                                            "id": inv3to3["id"],
                                            "start": min(s1, s2),
                                            "end": max(e1, e2),
                                            "score": score,
                                        }
                if invInfo["score"] >= 0:
                    if not invRegion.has_key(record.CHROM):
                        invRegion[record.CHROM] = banyan.SortedDict(
                            key_type=(int, int), alg=banyan.RED_BLACK_TREE, updator=banyan.OverlappingIntervalsUpdator
                        )
                    G.add_node((record.ID, invInfo["id"]))
                    G.node[(record.ID, invInfo["id"])]["Score"] = invInfo["score"]
                    for invIStart, invIEnd in invRegion[record.CHROM].overlap((invInfo["start"], invInfo["end"])):
                        (id1, id2) = invRegion[record.CHROM][(invIStart, invIEnd)]
                        if overlapValid((invInfo["start"], invInfo["end"]), (invIStart, invIEnd), 0.1, 10000):
                            G.add_edge((record.ID, invInfo["id"]), (id1, id2))
                    invRegion[record.CHROM][(invInfo["start"], invInfo["end"])] = (record.ID, invInfo["id"])

# Pick best pair of inversions out of all overlapping calls
idPairs = dict()
for H in networkx.connected_component_subgraphs(G):
    bestScore = -1.0
    for n, d in H.nodes_iter(data=True):
        if d["Score"] > bestScore:
            bestScore = d["Score"]
            bestSVs = n
    idPairs[bestSVs] = bestScore

# Extract selected calls
selectedSVs = dict()