def output_diff(sam1,sam2,reads,diff_1_2,diff_2_1,diff_1_2_reads,diff_2_1_reads): print("These mappings are only seen in "+sam1+" and not in "+sam2+" : ") f1 = open(sam1,'r') line = common.skipAts(f1) while line != "": if line in diff_1_2: print("READ ID : "+line.split()[0]) print("POS ON RED : "+line.split()[3]) print("CIGAR : "+line.split()[5]) line = f1.readline() f1.close() print("Now outputting the relevent reads : ") common.output_selected_reads(diff_1_2,reads) f2 = open(sam2,'r') line = common.skipAts(f2) print("These mappings are only seen in "+sam2+" and not in "+sam1+" : ") while line != "": if line in diff_2_1: print("READ ID : "+line.split()[0]) print("POS ON RED : "+line.split()[3]) print("CIGAR : "+line.split()[5]) line = f2.readline() f2.close() print("Now outputting the relevent reads : ") common.output_selected_reads(diff_2_1,reads)
def output_intersection(sam1,sam2,reads,inter,inter_reads): print("These are the common mappings between both files : ") f = open(sam1,'r') line = common.skipAts(f) while line != "": if line in inter: print("READ ID : "+line.split()[0]) print("POS ON RED : "+line.split()[3]) print("CIGAR : "+line.split()[5]) line = f.readline() f.close() print("Now outputting the relevent reads : ") common.output_selected_reads(inter_reads,reads)
def extractMultiMappings(IOstream): IOstream.seek(0) multimappedReads = set() occurrences = dict() #We start by skipping the @ lines line = common.skipAts(IOstream) while line!='': readID = line.split()[0] if readID in occurrences: if occurrences[readID]==1: multimappedReads.add(readID) occurrences[readID]+=1 else: occurrences[readID]=1 line = IOstream.readline() return multimappedReads
parser.add_argument("samFile1", help="One of the SAM files to be studied") parser.add_argument("samFile2", help="The second SAM file to be studied") parser.add_argument("outfileDifferences1", help="Name of the file where the mappings in file1 but not in file2 will be outputted") parser.add_argument("outfileDifferences2", help="Name of the file where the mappings in file2 but not in file1 will be outputted") parser.add_argument("outfileIntersection", help="Name of the file where the intersection will be outputted") args = parser.parse_args() o_diff1, o_diff2 , o_inter = open(args.outfileDifferences1,"w+") , open(args.outfileDifferences2,"w+") , open(args.outfileIntersection,"w+") o_diff1.write("Mappings found in "+args.samFile1+" and not in "+args.samFile2+"\n") o_diff2.write("Mapping found in "+args.samFile1+" and not in "+args.samFile2+"\n") o_inter.write("This file will contain the common mappings between both SAM files "+args.samFile1+" and "+args.samFile2+"\n") f1 , f2 = open(args.samFile1,'r') , open(args.samFile2, 'r') line1 , line2 = common.skipAts(f1) , common.skipAts(f2) while line1 !="" or line2 != "": read1 , read2 = line1.split()[0] , line2.split()[0] if read1 < read2: splitted = line1.split() o_diff1.write("Mappings with read ID"+splitted[0]+" :\n\n") while line1!="" and splitted[0] == read1: o_diff1.write("POS ON REF : "+splitted[3]+"\n") o_diff1.write("CIGAR : "+splitted[5]+"\n") line1 = f1.readline() splitted = line1.split() o_diff1.write("\n\n") elif read2 < read1: splitted = line2.split() o_diff2.write("Mappings with read ID "+read2+" :\n\n")
help="SAM file obtained with the -k option") parser.add_argument( "k", type=int, help="The value of k used when producing SAMobtainedByK") parser.add_argument("SAMobtainedByA", help="SAM file obtained with the -a option") args = parser.parse_args() fk, fa = open(args.SAMobtainedByK, 'r'), open(args.SAMobtainedByA, 'r') occK, occA = common.count_occurences( args.SAMobtainedByK), common.count_occurences(args.SAMobtainedByA) ligne_k, ligne_a = common.skipAts(fk), common.skipAts(fa) while ligne_k != "" and ligne_a != "": if ligne_a.split()[0] not in occK: read_a = ligne_a.split()[0] print("Alignement(s) produit(s) par -a mais pas par -k") while ligne_a != "" and ligne_a.split()[0] == read_a: print(ligne_a) else: if occK[ligne_k.split()[0]] < occA[ligne_a.split()[0]]: read_id_k = ligne_k.split()[0] lignes_set_k = set() #We add every line from the file obtained with k to the set while ligne_k.split()[0] == read_id_k and ligne_k != "":