コード例 #1
0
def trimMatchOverlapsInBoth(inpfile,outfile,trim_subtype):
    gp = MyFile.myfile()
    MatchRecord.sortInXorderAP(inpfile,gp)
    # The following coalescing assumes perfect runs.
    hp = MyFile.myfile()
    coalesceMatches( gp, hp, ((trim_subtype == 'x') or (trim_subtype == 'u')) )
    gp = MyFile.myfile()
    trimMatchOverlapsInX(hp,gp,trim_subtype)
    hp = MyFile.myfile()
    MatchRecord.sortInYorderAP(gp,hp)
    trimMatchOverlapsInY(hp,outfile,trim_subtype)
    return
コード例 #2
0
def trimMatchOverlapsInBoth(inpfile, outfile, trim_subtype):
    gp = MyFile.myfile()
    MatchRecord.sortInXorderAP(inpfile, gp)
    # The following coalescing assumes perfect runs.
    hp = MyFile.myfile()
    coalesceMatches(gp, hp, ((trim_subtype == 'x') or (trim_subtype == 'u')))
    gp = MyFile.myfile()
    trimMatchOverlapsInX(hp, gp, trim_subtype)
    hp = MyFile.myfile()
    MatchRecord.sortInYorderAP(gp, hp)
    trimMatchOverlapsInY(hp, outfile, trim_subtype)
    return
コード例 #3
0
ファイル: UniqueFilter.py プロジェクト: peterhj/wgs-assembler
def applyBothKeepMasks(inpfile, outfile):

    # Maybe we can think of a masking implementation where each ATAC match
    # is treated atomicly.  Assume that the keep mask intervals are sorted
    # by start postition.  Assume that the ATAC matches are sorted by start
    # postion.  Assert that all keep mask intervals are non-overlapping and
    # were cut from only one ATAC match.  Thus the mapping from keep mask
    # intervals is a function.  Note that this requires that we do not
    # coalesce abutting keep mask intervals that originate from multiple
    # matches.  Note this still allows an ATAC match to overlap more than
    # one keep mask interval.  Ignore all keep mask intervals with zero
    # length their creation has tie breaking problems.  See notes on 2003
    # Jul 29.

    debug = 0
    debugnum = 0
    inpfile.seek(0)
    outfile.seek(0)

    # Apply the keepMask for the first axis.
    # Make the sorted the keep mask intervals for the first axis.
    processFirstAxis = 1
    keepMaskFile = MyFile.myfile()
    tmpfile2 = inpfile
    tmpfile3 = MyFile.myfile()
    tmpfile4 = MyFile.myfile()

    findCoverageIntervals(inpfile, keepMaskFile, processFirstAxis)
    if debug:
        debugnum += 1
        debugfile = open("debugfile.%d" % debugnum, "w")
        for line in keepMaskFile:
            print >> debugfile, line,

    MatchRecord.sortInXorderAP(tmpfile2, tmpfile3)
    if debug:
        #tmpfile2.seek(0)
        #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        #for line in tmpfile2: print >>debugfile, line,
        tmpfile3.seek(0)
        debugnum += 1
        debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile3:
            print >> debugfile, line,

    applyOneKeepMask(tmpfile3, tmpfile4, keepMaskFile, processFirstAxis)
    if debug:
        tmpfile4.seek(0)
        debugnum += 1
        debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile4:
            print >> debugfile, line,

    # Apply the keepMask for the second axis.
    # Make the sorted the keep mask intervals for the second axis.
    processFirstAxis = 0
    keepMaskFile = MyFile.myfile()
    tmpfile2 = tmpfile4
    tmpfile3 = MyFile.myfile()
    tmpfile4 = outfile

    findCoverageIntervals(inpfile, keepMaskFile, processFirstAxis)
    if debug:
        debugnum += 1
        debugfile = open("debugfile.%d" % debugnum, "w")
        for line in keepMaskFile:
            print >> debugfile, line,

    MatchRecord.sortInYorderAP(tmpfile2, tmpfile3)
    if debug:
        #tmpfile2.seek(0)
        #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        #for line in tmpfile2: print >>debugfile, line,
        tmpfile3.seek(0)
        debugnum += 1
        debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile3:
            print >> debugfile, line,

    applyOneKeepMask(tmpfile3, tmpfile4, keepMaskFile, processFirstAxis)
    if debug:
        tmpfile4.seek(0)
        debugnum += 1
        debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile4:
            print >> debugfile, line,
コード例 #4
0
def applyBothKeepMasks( inpfile, outfile ):

    # Maybe we can think of a masking implementation where each ATAC match
    # is treated atomicly.  Assume that the keep mask intervals are sorted
    # by start postition.  Assume that the ATAC matches are sorted by start
    # postion.  Assert that all keep mask intervals are non-overlapping and
    # were cut from only one ATAC match.  Thus the mapping from keep mask
    # intervals is a function.  Note that this requires that we do not
    # coalesce abutting keep mask intervals that originate from multiple
    # matches.  Note this still allows an ATAC match to overlap more than
    # one keep mask interval.  Ignore all keep mask intervals with zero
    # length their creation has tie breaking problems.  See notes on 2003
    # Jul 29.

    debug = 0
    debugnum = 0
    inpfile.seek(0)
    outfile.seek(0)


    # Apply the keepMask for the first axis.
    # Make the sorted the keep mask intervals for the first axis.
    processFirstAxis = 1
    keepMaskFile = MyFile.myfile()
    tmpfile2 = inpfile
    tmpfile3 = MyFile.myfile()
    tmpfile4 = MyFile.myfile()

    findCoverageIntervals( inpfile, keepMaskFile, processFirstAxis)
    if debug:
        debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        for line in keepMaskFile: print >>debugfile, line,
            
    MatchRecord.sortInXorderAP(tmpfile2,tmpfile3)
    if debug:
        #tmpfile2.seek(0)
        #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        #for line in tmpfile2: print >>debugfile, line,
        tmpfile3.seek(0)
        debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile3: print >>debugfile, line,
        
    applyOneKeepMask( tmpfile3, tmpfile4, keepMaskFile, processFirstAxis)
    if debug:
        tmpfile4.seek(0)
        debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile4: print >>debugfile, line,
        
    # Apply the keepMask for the second axis.
    # Make the sorted the keep mask intervals for the second axis.
    processFirstAxis = 0
    keepMaskFile = MyFile.myfile()
    tmpfile2 = tmpfile4
    tmpfile3 = MyFile.myfile()
    tmpfile4 = outfile

    findCoverageIntervals( inpfile, keepMaskFile, processFirstAxis)
    if debug:
        debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        for line in keepMaskFile: print >>debugfile, line,


    MatchRecord.sortInYorderAP(tmpfile2,tmpfile3)
    if debug:
        #tmpfile2.seek(0)
        #debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        #for line in tmpfile2: print >>debugfile, line,
        tmpfile3.seek(0)
        debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile3: print >>debugfile, line,

    applyOneKeepMask( tmpfile3, tmpfile4, keepMaskFile, processFirstAxis)
    if debug:
        tmpfile4.seek(0)
        debugnum += 1; debugfile = open("debugfile.%d" % debugnum, "w")
        for line in tmpfile4: print >>debugfile, line,