Beispiel #1
0
def output_diff(sam1,sam2,reads,diff_1_2,diff_2_1,diff_1_2_reads,diff_2_1_reads):
    print("These mappings are only seen in "+sam1+" and not in "+sam2+" : ")
    f1 = open(sam1,'r')
    line = common.skipAts(f1)
    while line != "":
        if line in diff_1_2:
            print("READ ID : "+line.split()[0])
            print("POS ON RED : "+line.split()[3])
            print("CIGAR : "+line.split()[5])
        line = f1.readline()
    f1.close()
    print("Now outputting the relevent reads : ")
    common.output_selected_reads(diff_1_2,reads)
    f2 = open(sam2,'r')
    line = common.skipAts(f2)
    print("These mappings are only seen in "+sam2+" and not in "+sam1+" : ")
    while line != "":
        if line in diff_2_1:
            print("READ ID : "+line.split()[0])
            print("POS ON RED : "+line.split()[3])
            print("CIGAR : "+line.split()[5])
        line = f2.readline()
    f2.close()
    print("Now outputting the relevent reads : ")
    common.output_selected_reads(diff_2_1,reads)
Beispiel #2
0
def output_intersection(sam1,sam2,reads,inter,inter_reads):

    print("These are the common mappings between both files : ")
    f = open(sam1,'r')
    line = common.skipAts(f)
    while line != "":
        if line in inter:
            print("READ ID : "+line.split()[0])
            print("POS ON RED : "+line.split()[3])
            print("CIGAR : "+line.split()[5])
        line = f.readline()
    f.close()
    print("Now outputting the relevent reads : ")
    common.output_selected_reads(inter_reads,reads)
def extractMultiMappings(IOstream):
    IOstream.seek(0)
    multimappedReads = set()
    occurrences = dict()
    #We start by skipping the @ lines
    line = common.skipAts(IOstream)
    while line!='':
        readID = line.split()[0]
        if readID in occurrences:
            if occurrences[readID]==1:
                multimappedReads.add(readID)
            occurrences[readID]+=1
        else:
            occurrences[readID]=1
        line = IOstream.readline()
    return multimappedReads
Beispiel #4
0
    parser.add_argument("samFile1", help="One of the SAM files to be studied")
    parser.add_argument("samFile2", help="The second SAM file to be studied")
    parser.add_argument("outfileDifferences1", help="Name of the file where the mappings in file1 but not in file2 will be outputted")
    parser.add_argument("outfileDifferences2", help="Name of the file where the mappings in file2 but not in file1 will be outputted")
    parser.add_argument("outfileIntersection", help="Name of the file where the intersection will be outputted")

    args = parser.parse_args()

    o_diff1, o_diff2 , o_inter = open(args.outfileDifferences1,"w+") , open(args.outfileDifferences2,"w+") , open(args.outfileIntersection,"w+")
    o_diff1.write("Mappings found in "+args.samFile1+" and not in "+args.samFile2+"\n")
    o_diff2.write("Mapping found in "+args.samFile1+" and not in "+args.samFile2+"\n")
    o_inter.write("This file will contain the common mappings between both SAM files "+args.samFile1+" and "+args.samFile2+"\n")

    f1 , f2 = open(args.samFile1,'r') , open(args.samFile2, 'r')
    line1 , line2 = common.skipAts(f1) , common.skipAts(f2)

    while line1 !="" or line2 != "":
        read1 , read2 = line1.split()[0] , line2.split()[0]
        if read1 < read2:
            splitted = line1.split()
            o_diff1.write("Mappings with read ID"+splitted[0]+" :\n\n")
            while line1!="" and splitted[0] == read1:
                o_diff1.write("POS ON REF : "+splitted[3]+"\n")
                o_diff1.write("CIGAR : "+splitted[5]+"\n")
                line1 = f1.readline()
                splitted = line1.split()
            o_diff1.write("\n\n")
        elif read2 < read1:
            splitted = line2.split()
            o_diff2.write("Mappings with read ID "+read2+" :\n\n")
                        help="SAM file obtained with the -k option")
    parser.add_argument(
        "k",
        type=int,
        help="The value of k used when producing SAMobtainedByK")
    parser.add_argument("SAMobtainedByA",
                        help="SAM file obtained with the -a option")

    args = parser.parse_args()

    fk, fa = open(args.SAMobtainedByK, 'r'), open(args.SAMobtainedByA, 'r')

    occK, occA = common.count_occurences(
        args.SAMobtainedByK), common.count_occurences(args.SAMobtainedByA)

    ligne_k, ligne_a = common.skipAts(fk), common.skipAts(fa)

    while ligne_k != "" and ligne_a != "":

        if ligne_a.split()[0] not in occK:
            read_a = ligne_a.split()[0]
            print("Alignement(s) produit(s) par -a mais pas par -k")
            while ligne_a != "" and ligne_a.split()[0] == read_a:
                print(ligne_a)

        else:
            if occK[ligne_k.split()[0]] < occA[ligne_a.split()[0]]:
                read_id_k = ligne_k.split()[0]
                lignes_set_k = set()
                #We add every line from the file obtained with k to the set
                while ligne_k.split()[0] == read_id_k and ligne_k != "":