#for key in sorted(dicoOutputTxt.keys()): for key in sorted(listKeepSouche, key=sort_human): value = "\t".join(dicoOutputTxt[key]) outputTxt += "%s\t%s\n" % (str(key), str(value)) output_handle.write(outputTxt) outputListEmpty = open(basename + "_outputListEmpty.txt", "w") for fileEmptyName in listFileEmpty: outputListEmpty.write(fileEmptyName + "\n") with open(basename + "_LenAlign_nbSNP.txt", "w") as output1: txt1 = dictDict2txt(dicoFileCountSNP) output1.write(txt1) with open(basename + "_nbSNPallFile.txt", "w") as output2: txt1 = dict2txt(dicoFilenbSNP) output2.write(txt1) print("\n\nExecution summary:") print(" - Outputting \n\ Il y a au final %i Motif dans tout les MGG\n\ Il y a %i fichiers vides\n\ les sequences sont ajouter dans le fichier %s\n\ la liste des fichiers vides est dans le fichier outputListEmpty.txt" % (nbMotifTotal, fileEmpty, outputfilename)) print("\nStop time: ", strftime("%d-%m-%Y_%H:%M:%S", localtime())) print("#################################################################") print("# End of execution #") print("#################################################################")
sensDNA = record.strand geneIDsens[geneID] = sensDNA if geneID not in keepValidList: keepValidList.append(geneID) recordCount += 1 for posIter in range(record.start, record.end): if record.seqid not in dicoGenesKeepPosOnScaff.keys(): dicoGenesKeepPosOnScaff[record.seqid] = {posIter: geneID} else: dicoGenesKeepPosOnScaff[record.seqid].update( {posIter: geneID}) print("\nTotal records: %d" % recordCount) print(dict2txt(geneIDsens)) # Parcours du fichier de SNP nblignetotal, ctr = 74215263, 0 dicoSeqBuild = {} with open(tabFile, "r") as tabFileRead: header = tabFileRead.readline().rstrip().split("\t") soucheIndice = header[3:] #print(header) for line in tabFileRead: if ((ctr % 10000 == 0) and (ctr != 0)) or (float(ctr) == nblignetotal): percent = (float(ctr) / float(nblignetotal)) * 100 sys.stdout.write("\rProcessed up to %0.2f %%..." % percent)
namesouche2) # DEBUG if ref in namesouche1: dico_ortho.setdefault(souche_contig1, []).append(souche_contig2) if namesouche2 not in listSouches: listSouches.append(namesouche2) if ref in namesouche2: dico_ortho.setdefault(souche_contig2, []).append(souche_contig1) if namesouche1 not in listSouches: listSouches.append(namesouche1) listSouchessort = sorted(listSouches, key=sort_human) if args.debug == "True": print(dict2txt(dico_ortho)) if args.debug == "True": print(listSouchessort) if args.debug == "True": print(len(listSouchessort)) # ecriture des correspondence othologue 1/1: nbOrtho1_1 = 0 listKeep = [] for souche_contig1, listcorresp in dico_ortho.items(): namesouche1 = souche_contig1.split("_")[0] tabsoucheFind = [souche.split("_")[0] for souche in listcorresp] if len(listcorresp) == len(listSouchessort) and sorted( listSouchessort, key=sort_human) == sorted(tabsoucheFind, key=sort_human): listKeep.append(souche_contig1) for soucheFind in listcorresp:
files.add_argument('-po', '--pathout', metavar="<path/to/fileout>",type = directory, required=True, dest = 'pathDirectoryOut', help = 'path to fasta files Out') args = parser.parse_args() #Welcome message print("#################################################################") print("# Welcome in addSample (Version " + version + ") #") print("#################################################################") print('Start time: ', start_time,'\n') # Récupère le fichier de conf passer en argument pathDirectoryIn = args.pathDirectoryIn pathDirectoryOut = args.pathDirectoryOut pathSampleIn = args.pathSampleIn dicoNbSeqInFiles,dicoNbFilesNbSouche = nbSeqInFile2dict(str(pathSampleIn.pathDirectory)) print((dict2txt(dicoNbSeqInFiles))) print(dict2txt(dicoNbFilesNbSouche)) # ajoute à la variable current_dir le chemin ou est executer le script current_dir = os.path.dirname(os.path.abspath(__file__)) # Ouverture du fichier OrthoMCL pour correspondance famille nom gene dictNameFamilly={} with open(args.csvFileParam,"r") as fichier: for ligne in fichier: ltab = ligne.rstrip().split("\t") geneName=ltab[1] famillyName=ltab[2] if famillyName not in dictNameFamilly.keys(): dictNameFamilly[famillyName]={ "MGG":"", "BR32":""
souche = geneId.split("_")[2] #print(souche) # ouverture du fichier de sortie dicoOpenFile[MGGName] = open( outputfilePath + "orthologue/" + MGGName + "_Orthologue.fasta", "a") #with open(outputfilePath+"orthologue/"+MGGName+"_Orthologue.fasta", "a") as output_handle: #output_handle = open(outputfilePath+"orthologue/"+MGGName+"_Orthologue.fasta", "a") new_record_name = souche record.id = "" record.id = new_record_name record.name = "" seq = record.seq SeqIO.write(record.upper(), dicoOpenFile[MGGName], "fasta") dicoOpenFile[MGGName].close() #output_handle.close() ctr += 1 dico1, dico2 = nbSeqInFile2dict(outputfilePath + "orthologue/") print("check if NBsouche and sequences are correctly extract:\n") print(dict2txt(dico2)) print("\n\nIf up are same below OK\n\n%i\t%i" % (len(toRM), len(mggKeep))) #print(" - Outputting \n\ #Il y a au final %i Sequences garder\n\ #les sequences sont ajouter dans le fichier %s" %(nbKeep,outputfilePath)) print("\nStop time: ", strftime("%d-%m-%Y_%H:%M:%S", localtime())) print("#################################################################") print("# End of execution #") print("#################################################################")