Esempio n. 1
0
    #for key in sorted(dicoOutputTxt.keys()):
    for key in sorted(listKeepSouche, key=sort_human):
        value = "\t".join(dicoOutputTxt[key])
        outputTxt += "%s\t%s\n" % (str(key), str(value))
    output_handle.write(outputTxt)

    outputListEmpty = open(basename + "_outputListEmpty.txt", "w")
    for fileEmptyName in listFileEmpty:
        outputListEmpty.write(fileEmptyName + "\n")

    with open(basename + "_LenAlign_nbSNP.txt", "w") as output1:
        txt1 = dictDict2txt(dicoFileCountSNP)
        output1.write(txt1)

    with open(basename + "_nbSNPallFile.txt", "w") as output2:
        txt1 = dict2txt(dicoFilenbSNP)
        output2.write(txt1)

    print("\n\nExecution summary:")

    print("  - Outputting \n\
	Il y a au final %i Motif dans tout les MGG\n\
	Il y a %i fichiers vides\n\
	les sequences sont ajouter dans le fichier %s\n\
	la liste des fichiers vides est dans le fichier outputListEmpty.txt" %
          (nbMotifTotal, fileEmpty, outputfilename))
    print("\nStop time: ", strftime("%d-%m-%Y_%H:%M:%S", localtime()))
    print("#################################################################")
    print("#                        End of execution                       #")
    print("#################################################################")
Esempio n. 2
0
            sensDNA = record.strand
            geneIDsens[geneID] = sensDNA

            if geneID not in keepValidList:
                keepValidList.append(geneID)
                recordCount += 1
            for posIter in range(record.start, record.end):
                if record.seqid not in dicoGenesKeepPosOnScaff.keys():
                    dicoGenesKeepPosOnScaff[record.seqid] = {posIter: geneID}
                else:
                    dicoGenesKeepPosOnScaff[record.seqid].update(
                        {posIter: geneID})

    print("\nTotal records: %d" % recordCount)

    print(dict2txt(geneIDsens))

    # Parcours du fichier de SNP
    nblignetotal, ctr = 74215263, 0
    dicoSeqBuild = {}
    with open(tabFile, "r") as tabFileRead:
        header = tabFileRead.readline().rstrip().split("\t")
        soucheIndice = header[3:]
        #print(header)

        for line in tabFileRead:

            if ((ctr % 10000 == 0) and
                (ctr != 0)) or (float(ctr) == nblignetotal):
                percent = (float(ctr) / float(nblignetotal)) * 100
                sys.stdout.write("\rProcessed up to %0.2f %%..." % percent)
                          namesouche2)  # DEBUG

                if ref in namesouche1:
                    dico_ortho.setdefault(souche_contig1,
                                          []).append(souche_contig2)
                    if namesouche2 not in listSouches:
                        listSouches.append(namesouche2)

                if ref in namesouche2:
                    dico_ortho.setdefault(souche_contig2,
                                          []).append(souche_contig1)
                    if namesouche1 not in listSouches:
                        listSouches.append(namesouche1)

    listSouchessort = sorted(listSouches, key=sort_human)
    if args.debug == "True": print(dict2txt(dico_ortho))
    if args.debug == "True": print(listSouchessort)
    if args.debug == "True": print(len(listSouchessort))

    # ecriture des correspondence othologue 1/1:
    nbOrtho1_1 = 0
    listKeep = []
    for souche_contig1, listcorresp in dico_ortho.items():
        namesouche1 = souche_contig1.split("_")[0]
        tabsoucheFind = [souche.split("_")[0] for souche in listcorresp]

        if len(listcorresp) == len(listSouchessort) and sorted(
                listSouchessort, key=sort_human) == sorted(tabsoucheFind,
                                                           key=sort_human):
            listKeep.append(souche_contig1)
            for soucheFind in listcorresp:
Esempio n. 4
0
	files.add_argument('-po', '--pathout', metavar="<path/to/fileout>",type = directory, required=True, dest = 'pathDirectoryOut', help = 'path to fasta files Out')
	args = parser.parse_args()

	#Welcome message
	print("#################################################################")
	print("#            Welcome in addSample (Version " + version + ")              #")
	print("#################################################################")
	print('Start time: ', start_time,'\n')

	# Récupère le fichier de conf passer en argument
	pathDirectoryIn = args.pathDirectoryIn
	pathDirectoryOut = args.pathDirectoryOut
	pathSampleIn = args.pathSampleIn

	dicoNbSeqInFiles,dicoNbFilesNbSouche = nbSeqInFile2dict(str(pathSampleIn.pathDirectory))
	print((dict2txt(dicoNbSeqInFiles)))
	print(dict2txt(dicoNbFilesNbSouche))

	# ajoute à la variable current_dir le chemin ou est executer le script
	current_dir = os.path.dirname(os.path.abspath(__file__))

	# Ouverture du fichier OrthoMCL pour correspondance famille nom gene
	dictNameFamilly={}
	with open(args.csvFileParam,"r") as fichier:
		for ligne in fichier:
			ltab = ligne.rstrip().split("\t")
			geneName=ltab[1]
			famillyName=ltab[2]
			if famillyName not in dictNameFamilly.keys():
				dictNameFamilly[famillyName]={	"MGG":"",
												"BR32":""
            souche = geneId.split("_")[2]
            #print(souche)
            # ouverture du fichier de sortie
            dicoOpenFile[MGGName] = open(
                outputfilePath + "orthologue/" + MGGName + "_Orthologue.fasta",
                "a")
            #with open(outputfilePath+"orthologue/"+MGGName+"_Orthologue.fasta", "a") as output_handle:
            #output_handle = open(outputfilePath+"orthologue/"+MGGName+"_Orthologue.fasta", "a")
            new_record_name = souche
            record.id = ""
            record.id = new_record_name
            record.name = ""
            seq = record.seq
            SeqIO.write(record.upper(), dicoOpenFile[MGGName], "fasta")
            dicoOpenFile[MGGName].close()
            #output_handle.close()
        ctr += 1

    dico1, dico2 = nbSeqInFile2dict(outputfilePath + "orthologue/")
    print("check if NBsouche and sequences are correctly extract:\n")
    print(dict2txt(dico2))
    print("\n\nIf up are same below OK\n\n%i\t%i" % (len(toRM), len(mggKeep)))

    #print("  - Outputting \n\
    #Il y a au final %i Sequences garder\n\
    #les sequences sont ajouter dans le fichier %s" %(nbKeep,outputfilePath))
    print("\nStop time: ", strftime("%d-%m-%Y_%H:%M:%S", localtime()))
    print("#################################################################")
    print("#                        End of execution                       #")
    print("#################################################################")