def filterChimericReads(IOstream, distance):
    line = common.outputHeader(IOstream)
    while line != "":
        multiMaps = list()
        multiMaps.append(line)
        readID = line.split()[0]
        line = IOstream.readline()
        while line != "" and line.split()[0] == readID:
            multiMaps.append(line)
            line = IOstream.readline()
        splitMappings = set()
        for i in range(len(multiMaps) - 1):
            for j in range(i + 1, len(multiMaps)):
                splitted_i, splitted_j = multiMaps[i].split(
                ), multiMaps[j].split()
                ipos_b, ipos_e = common.beg_end_of_seq(splitted_i[5])
                jpos_b, jpos_e = common.beg_end_of_seq(splitted_j[5])
                if (jpos_b - ipos_e) >= -distance:
                    splitMappings.add(multiMaps[i])
                    splitMappings.add(multiMaps[j])
                elif (ipos_b - jpos_e) >= -distance:
                    splitMappings.add(multiMaps[i])
                    splitMappings.add(multiMaps[j])
        for outputLine in multiMaps:
            if outputLine not in splitMappings:
                sys.stdout.write(outputLine)
Ejemplo n.º 2
0
def filterByQuality(IOstream, quality):
    line = common.outputHeader(IOstream)
    while line != "":
        readQuality = int(line.split()[4])
        if readQuality >= quality:
            sys.stdout.write(line)
        line = IOstream.readline()
def outputMultimappedReads(IOstream,multimappedReadsSet):
    IOstream.seek(0)
    line = common.outputHeader(IOstream)
    while line!='':
        readID = line.split()[0]
        if readID in multimappedReadsSet:
            sys.stdout.write(line)
        line = IOstream.readline()
def filterByReference(IOstream):
    line = common.outputHeader(IOstream)
    while line != "":
        multiMaps = list()
        multiMaps.append(line)
        readID = line.split()[0]
        line = IOstream.readline()
        while line != "" and line.split()[0] == readID:
            multiMaps.append(line)
            line = IOstream.readline()
        readsMultimappedSameRef = set()
        for i in range(len(multiMaps) - 1):
            for j in range(i + 1, len(multiMaps)):
                if common.compareReadIDReferences(multiMaps[i],
                                                  multiMaps[j]) == 0:
                    readsMultimappedSameRef.add(multiMaps[i])
                    readsMultimappedSameRef.add(multiMaps[j])
        for outputLine in multiMaps:
            if outputLine in readsMultimappedSameRef:
                sys.stdout.write(outputLine)
Ejemplo n.º 5
0
import functools
import argparse

if __name__=='__main__':

    parser = argparse.ArgumentParser(description="Sort a SAM file by several criteria, for now you cannot sort by pos or ref only, only id, id/pos, id/ref, id/pos/ref. The order used between read IDs is the alphabetic order. For sorting using samtools use the sortSam2 script.")

    parser.add_argument("-id", dest="id", action="store_true", help="Allows sorting of SAM file by read ID")
    parser.add_argument("-pos", dest="pos", action="store_true", help="Allows sorting of SAM file by position number")
    parser.add_argument("-ref", dest="ref", action="store_true", help="Allows sorting of SAM file by reference number")

    args = parser.parse_args()

    toBeSorted = list()

    line = common.outputHeader(sys.stdin)

    while line!="":
        toBeSorted.append(line)
        line = sys.stdin.readline()

    if args.pos and args.ref and args.id:
        toBeSorted.sort(key=functools.cmp_to_key(common.compareReadIDReferencesPositions))

    elif args.id and args.pos:
        toBeSorted.sort(key=functools.cmp_to_key(common.compareReadIDPositions))

    elif args.id and args.ref:
        toBeSorted.sort(key=functools.cmp_to_key(common.compareReadIDReferences))

    elif args.id: