output_handle = open(outputfilename, "w") #print(dictList2txt(dicoOutputTxt)) outputTxt = "" #for key in sorted(dicoOutputTxt.keys()): for key in sorted(listKeepSouche, key=sort_human): value = "\t".join(dicoOutputTxt[key]) outputTxt += "%s\t%s\n" % (str(key), str(value)) output_handle.write(outputTxt) outputListEmpty = open(basename + "_outputListEmpty.txt", "w") for fileEmptyName in listFileEmpty: outputListEmpty.write(fileEmptyName + "\n") with open(basename + "_LenAlign_nbSNP.txt", "w") as output1: txt1 = dictDict2txt(dicoFileCountSNP) output1.write(txt1) with open(basename + "_nbSNPallFile.txt", "w") as output2: txt1 = dict2txt(dicoFilenbSNP) output2.write(txt1) print("\n\nExecution summary:") print(" - Outputting \n\ Il y a au final %i Motif dans tout les MGG\n\ Il y a %i fichiers vides\n\ les sequences sont ajouter dans le fichier %s\n\ la liste des fichiers vides est dans le fichier outputListEmpty.txt" % (nbMotifTotal, fileEmpty, outputfilename)) print("\nStop time: ", strftime("%d-%m-%Y_%H:%M:%S", localtime()))
#"nbSNP": len(x['siteIndices'])} #print(name) align = Align(filein) x = Align.polymorphism(align) #print dict2txt(x) dicoOutput[name] = { "singletons": x['singletons'], "siteIndices": x['siteIndices'], } with open(infoFile, "w") as output_handle: output_handle.write(dictDict2txt(dicoOutput)) listFileRM = [] for fileName, dico in dicoOutput.items(): listSingleton = list(dico["singletons"]) listSiteIndices = list(dico["siteIndices"]) intersection = compare_intersect(listSingleton, listSiteIndices) if len(listSiteIndices) == 0: listFileRM.append(fileName) elif len(compare_intersect(listSingleton, listSiteIndices)) == len(listSiteIndices): listFileRM.append(fileName) #print listSingleton, listSiteIndices, len(listSingleton), len(listSiteIndices) with open(noInfoFile, "w") as outputListNoInfo: txt = "\n".join(listFileRM)
"Theta_allSNPs": stats['thetaW'], "Theta_scaffold": stats['thetaW'] / int(dictSizes[scaffold]) } dicoMeanTheta = {} sommeTheta, sommeSize = 0, 0 for scaffold, dico in dictThetaInfo.iteritems(): sommeTheta += dico["Theta_allSNPs"] sommeSize += int(dictSizes[scaffold]) thetaCoreGenome = sommeTheta / sommeSize with open( workingObjDir.pathDirectory + basename + "/" + basename + "_ThetaValues.tab", "w") as ThetaTab: ThetaTab.write(dictDict2txt(dictThetaInfo)) ThetaTab.write("\nthetaCoreGenome\t%.4f" % thetaCoreGenome) #MAKE sh script to run LDhat objDir = directory(workingObjDir.pathDirectory + basename) # list all directory and files in the path #nbInd = 13 #thetaCoreGenome = 0.007 cmdLoadR = "module load compiler/gcc/4.9.2 bioinfo/geos/3.4.2 bioinfo/gdal/1.9.2 mpi/openmpi/1.6.5 bioinfo/R/3.2.2" cmdLookTable = completeLDhatPATH + " -n " + str( nbInd) + " -rhomax 100 -n_pts 201 -theta " + str( thetaCoreGenome) + " -prefix " + objDir.pathDirectory + basename with open(
for souche in listSouchessort: dicoCountNB[souche_contig1][souche] = 0 dicoCountNB["ZERO-NB"][souche] = 0 for souche_contig2 in dico_ortho[souche_contig1]: namesouche2 = souche_contig2.split("_")[0] dicoCountNB[souche_contig1][namesouche2] += 1 for gene, dico in dicoCountNB.items(): nbOrthoTotal += 1 for souche, nbOrtho in dico.items(): if nbOrtho == 0: dicoCountNB["ZERO-NB"][souche] += 1 tabFileOut.write(dictDict2txt(dicoCountNB, ref)) #print(dict2txt(dicoCountNB["ZERO-NB"])) for key, value in sorted(dicoCountNB["ZERO-NB"].items(), key=lambda x: x[1], reverse=True): percent = (value / nbOrthoTotal) * 100 if percent < 20: printCol.purple("%s\t%s\t%.2f" % (key, value, percent)) elif percent > 30: printCol.red("%s\t%s\t%.2f" % (key, value, percent)) elif percent > 20: printCol.yellow("%s\t%s\t%.2f" % (key, value, percent))