Esempio n. 1
0
def buildLabelFile(labelFile, pathDir):
    """Build label file and labelAtop file"""
    labelFile = relativeToAbsolutePath(labelFile)
    orderlist = []
    with open(labelFile, "r") as labelFileOpen, open(
            pathDir + "labelsAtop",
            "w") as labelAtopOut, open(pathDir + "label", "w") as labelOut:
        i = 1
        for line in labelFileOpen:
            souche, hote = line.rstrip().split("\t")
            hote = hote.replace(" ", "_")
            orderlist.append(souche)
            labelAtopOut.write("%i %s\n" % (i, hote))
            labelOut.write("%i\t%s\n" % (i, souche))
            i += 1
    return pathDir + "label", pathDir + "labelsAtop", orderlist
Esempio n. 2
0
        required=False,
        dest='tableFileOut',
        help='Name of table file out (default tablein_extractedIDs.tab)')

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#            Welcome in extractColfromList (Version " + version +
          ")              #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    #get arguments
    tableFile = relativeToAbsolutePath(args.tableFile)
    tableFileOut = args.tableFileOut
    IDlist = args.IDlist

    if tableFileOut == "":
        tableFileOut = tableFile.split(".")[0] + "_extractedIDs.tab"

    #loading IDs to be kept in a list
    listNameKeep = loadInList(IDlist)

    if ".gz" in tableFile:
        fichier = gzip.open(tableFile, "rb")
    else:
        fichier = open(tableFile, "rb")

    #loading column IDs in a list
Esempio n. 3
0
                          dest='paramfile',
                          help='tab file with SNP')

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#            Welcome in getdistrgenotypeTAB (Version " + version +
          ")              #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    basename = args.paramfile.split(".")[0]
    tabFile = relativeToAbsolutePath(args.paramfile)
    workingDir = "/".join(tabFile.split("/")[:-1])
    print("\t - Working directory is: %s" % workingDir)

    nblignetotal = float(
        os.popen("wc -l " + tabFile).read().rstrip().split(" ")[0])

    print("Parse tab file with %i lines" % nblignetotal)

    # ajoute à la variable current_dir le chemin ou est executer le script
    current_dir = os.path.dirname(os.path.abspath(__file__))

    # Utilisation du VCF
    # lecture et ecriture du header dans le fichier de output

    ctr = 0
Esempio n. 4
0
    #Welcome message
    print("#################################################################")
    print("#         Welcome in make_structure_dir (Version " + version +
          ")           #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    nbRepiParam = int(args.nbRepiParam)
    nbRepmParam = int(args.nbRepmParam)
    nbpopiParam = int(args.nbpopiParam)
    nbpopmParam = int(args.nbpopmParam)
    nbIndivParam = args.nbIndivParam
    nbMarkerParam = args.nbMarkerParam
    inputFile = relativeToAbsolutePath(args.inputFile)
    outputFile = args.outputFile

    if outputFile == None:
        outputFile = inputFile.split("/")[-1].split(".")[0]

    workingDir = "/".join(inputFile.split("/")[:-1]) + "/" + outputFile + "/"

    outputSHDir = workingDir + "sh/"
    outputTrashDir = workingDir + "trash/"
    SGENameFile = outputSHDir + "submitQsubstructure.sge"

    print(" - Intput Info:")
    print("\t - Input matrice is: %s" % inputFile)
    print("\t - Output prefix name is: %s" % outputFile)
    print("\t - You want %s < K < %s and %s < Repetition < %s" %
Esempio n. 5
0
                          required=True,
                          dest='pathOut',
                          help='Name of output file directory')

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#          Welcome in buildSNPtoFasta (Version " + version +
          ")          #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère les arguments
    gffFile = relativeToAbsolutePath(args.gffFile)
    listKeepFile = relativeToAbsolutePath(args.listKeepFile)
    tabFile = relativeToAbsolutePath(args.tabFile)
    fastaPath = args.fastaPath
    pathFileOut = args.pathOut

    #fastaFile = relativeToAbsolutePath(args.fastaFile)

    print("\t - Input GFF is: %s" % gffFile)
    print("\t - Input listKeppFile is: %s" % listKeepFile)
    print("\t - Input tabFile is: %s" % tabFile)
    print("\t - Input fasta files is: %s" % fastaPath.pathDirectory)
    print("\t - Output fasta files is: %s" % pathFileOut.pathDirectory)

    listKeepID = [
        ID.replace("Mycfi_gene", "gene_") for ID in loadInList(listKeepFile)
Esempio n. 6
0
                       default="L",
                       choices=["L", "C"],
                       dest='flag',
                       help='L for CO (default), C pour gene conversion')

    # check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#          Welcome in make_ldhatfiles (Version " + version +
          ")            #")
    print("#################################################################")
    # get arguments
    workingObjDir = args.workdir
    tabFile = relativeToAbsolutePath(args.tabFile)
    sizeTab = relativeToAbsolutePath(args.sizeTab)
    dataType = args.datatype
    intervalLDhatPATH = args.methode
    flag = args.flag

    print("\t - Workink Directory: %s" % workingObjDir.pathDirectory)
    print("\t - Input Path matrice is: %s" % tabFile)
    print("\t - Input Path size is: %s" % sizeTab)
    print("\t - dataType is : %s" % dataType)
    print("\t - Working with : %s" % intervalLDhatPATH)
    print("\t - flag is: %s\n\n" % flag)

    #exit()
    ##
    # code
Esempio n. 7
0
	parser.add_argument('-v', '--version', action='version', version='You are using %(prog)s version: ' + version,
						help='display make_hapmapfile.py version number and exit')

	filesreq = parser.add_argument_group('Input mandatory infos for running')
	filesreq.add_argument('-f', '--fileIn', metavar="<filename>", type = existant_file, required = True,  dest = 'fileIn', help = 'Table of SNPs without FU')

	files = parser.add_argument_group('Input infos for running with default values')
	files.add_argument('-o', '--out', metavar="<filename>", default="outfile.hapmap", required = False, dest = 'fileOut', help = 'Name of hapmap file (default outfile.hapmap)')
	files.add_argument('-w', '--window', metavar="<int>", type = int, default=1, required = False, dest = 'window', help = 'Minimal window by which SNPs have to be separated, in bp (default = 1 - keep everything)')
	files.add_argument('-c', '--chrom', action='store_true', dest = 'chrom', help = 'If used, hapmap files will be produced by chromosomes')

	# Check parameters
	args = parser.parse_args()

	# get arguments
	fileIn = relativeToAbsolutePath(args.fileIn)
	workdir = "/".join(fileIn.split("/")[:-1])+"/"
	fileOut = workdir+args.fileOut
	window = args.window
	chrom = args.chrom

	if window > 1:
		fileOut = fileOut.split(".")[0]+"_window"+str(window)+".hapmap"
	if window == 0:
		window = 1

	print("#### STARTING")
	print("File will be written in same directory as original file: %s" %workdir)
	print("\n#### CREATING FILES")

	headerHapmap = "rs\talleles\tchrom\tpos\tstrand\tassembly\tcenter\tprotLSID\tassayLSID\tpanelLSID\tQCcode\t"
Esempio n. 8
0
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#           Welcome in run_multiblast (Version " + version +
          ")            #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère les arguments
    pathFastaFile = args.fastaFileDir
    pathFileOut = args.pathOut

    # defaults option
    typeBlast = args.typeBlast
    dbPath = relativeToAbsolutePath(args.dbPath)
    outfmtValue = args.outfmtValue
    blastOptionValue = " ".join(args.blastOptionValue)
    nbThreads = args.nbThreads

    outputBlastResDir = pathFileOut.pathDirectory + "blastRes/"
    outputSHDir = pathFileOut.pathDirectory + "sh/"
    outputTrashDir = pathFileOut.pathDirectory + "trash/"
    SGENameFile = outputSHDir + "submitQsubBLAST.sge"

    if len(outfmtValue) > 1:
        outfmtValue = "'" + outfmtValue + "'"

    # resume value to user
    print(" - Intput Info:")
    print("\t - Working in directory: %s" % pathFileOut.pathDirectory)
Esempio n. 9
0
    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#           Welcome in struc2runClumpak (Version " + version +
          ")           #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère les infos passer en argument

    # création de l'objet directory
    workingObjDir = args.dirPath
    clumpakObjDir = args.dirPathClumpak
    labelFileParam = relativeToAbsolutePath(args.labelFileParam)

    # build Drawparam if not add
    if args.drawparamsParam != None:
        drawparamsFile = relativeToAbsolutePath(args.drawparamsParam)
    else:
        drawparamsFile = relativeToAbsolutePath(workingObjDir.pathDirectory +
                                                "drawparams")
        with open(drawparamsFile, "w") as drawparamsFileWrite:
            drawparamsFileWrite.write(drawparams)
    # build color if not add
    if args.colorParam != None:
        colorParamFile = relativeToAbsolutePath(args.colorParam)
    else:
        colorParamFile = relativeToAbsolutePath(workingObjDir.pathDirectory +
                                                "colorsfile")
    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#      Welcome in extractFromProteineOrtho (Version " + version +
          ")        #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument

    ref = args.refName
    workingDir = "/".join(
        relativeToAbsolutePath(args.proteineOrthoFile).split("/")[:-1]) + "/"
    correspondingCDSDir = workingDir + "correspondingCDS-contig" + args.suffixParam + "/"

    print("\t - Suffix is: %s" % args.suffixParam)
    print("\t - Ref strain is : %s" % ref)
    print("\t - Working directory is: %s" % workingDir)
    print("\t - Corresonding CDS ref/strain directory is: %s\n\n" %
          correspondingCDSDir)

    # liste de toute les souches de proteineOrtho
    listSouches = []

    # dico de proteine orthologue
    dico_ortho = {}
    exist = 0
    # creer le répertoire contenant les correspondance entre ref et souches
Esempio n. 11
0
        help=
        'choice keep sequences size greater than -l (g/greater) or keep lower (l/lower)'
    )

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#        Welcome in extractSeqFastaFromLen (Version " + version +
          ")          #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    fastaFile = relativeToAbsolutePath(args.fastaFile)
    outputfilename = relativeToAbsolutePath(args.paramoutfile)
    lenSize = args.lenSize
    keepValue = args.keepValue
    output_handle = open(relativeToAbsolutePath(outputfilename), "w")

    dicoSize = lenSeq2dict(fastaFile)
    dicoFasta = fasta2dict(fastaFile)

    nbKeep = 0
    nbTotal = len(dicoFasta.keys())

    for ID in sorted(dicoSize.keys(), key=sort_human):
        lenSeq = dicoSize[ID]
        if keepValue in ["g", "greater"]:
            if lenSeq >= lenSize:
Esempio n. 12
0
                          metavar="<filename>",
                          required=True,
                          dest='outputFileParam',
                          help='Name of output figure file')

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print "#################################################################"
    print "#          Welcome in plot_barplot_DAPC (Version " + version + ")           #"
    print "#################################################################"
    print 'Start time: ', start_time, '\n'

    # Récupère le fichier de conf passer en argument
    dirPath = relativeToAbsolutePath(args.dirPath)
    labelFileParam = relativeToAbsolutePath(args.labelFileParam)
    outputFileParam = args.outputFileParam

    fileListCSV = lsExtInDirToList(dirPath, "csv")

    #read membership data and store in dictionary with keys=line numbers, which corresponds to the 'count' key in the tags dictionary
    memberships = {}
    listK = []
    for file in fileListCSV:
        K = int((file.replace(dirPath + "/",
                              '').replace('K', '')).replace('.csv', ''))
        if K not in listK:
            listK.append(K)
        IN = open(file, 'r')
        IN.readline()  # remove header
Esempio n. 13
0
                          required=True,
                          dest='pathOut',
                          help='Name of output file directory')

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#        Welcome in splitMultiFasta (Version " + version +
          ")          #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    fastaFile = relativeToAbsolutePath(args.fastaFile)
    pathFileOut = args.pathOut

    print("\t - Input Path is: %s" % pathFileOut.pathDirectory)
    print("\t - fasta file is : %s" % fastaFile)

    dicoFasta = fasta2dict(fastaFile)

    for name, sequence in dicoFasta.items():
        with open(pathFileOut.pathDirectory + name + ".fasta",
                  "w") as output_handle:
            SeqIO.write(sequence, output_handle, "fasta")

    #print("\n\nExecution summary:")

    #print("  - Outputting \n\
Esempio n. 14
0
        choices=["yes", "y", "no", "n"],
        help='choise keep (y/yes) or not keep (n/no) sequences in list file')

    # Check parameters
    args = parser.parse_args()
    checkParameters(args)

    #Welcome message
    print("#################################################################")
    print("#        Welcome in extractSeqFasta (Version " + version +
          ")          #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    fastaFile = relativeToAbsolutePath(args.fastaFile)
    outputfilename = relativeToAbsolutePath(args.paramoutfile)
    listFile = relativeToAbsolutePath(args.listFile)
    keepValue = args.keepValue

    output_handle = open(outputfilename, "w")

    if keepValue in ["no", "n"]:
        dico_keep, nbTotal = extractInverseListFromFasta(fastaFile, listFile)
    elif keepValue in ["yes", "y"]:
        dico_keep, nbTotal = extractListFromFasta(fastaFile, listFile)

    nbKeep = len(dico_keep.keys())
    for geneId, sequence in dico_keep.items():
        SeqIO.write(sequence, output_handle, "fasta")
    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#     Welcome in extractSeqFastaCorresponding (Version " + version +
          ")     #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    fastaFile = args.fastaFile
    listFile = args.listFile
    outputfilePath = args.paramoutfile
    mggFileKeep = relativeToAbsolutePath(args.mggFileKeep)

    print("\t - Path with fasta is: %s" % fastaFile.pathDirectory)
    print("\t - Path with corresponding Orthologues is  : %s" %
          listFile.pathDirectory)
    print("\t - MGG list keep are in file: %s\n" % mggFileKeep)

    print("\t - Output Orthologues fasta is: %s\n\n" % outputfilePath)

    #recupération de la liste des CDS complet
    listCDSfiles = fastaFile.lsExtInDirToList(["fasta", "fas", "fa"])
    print("\n".join(listCDSfiles))

    #ouverture de la liste des MGG à garder
    mggKeepall = loadInList(mggFileKeep)
Esempio n. 16
0
    print("#           Welcome in %s (Version %s)            #" %
          (__file__, version))
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère les infos passer en argument
    tabFileParam = args.tabFileParam
    outFastaParam = args.outFastaParam
    IDParam = args.IDParam
    compress = args.compress

    basename = tabFileParam.split("/")[-1].split(".")[0]
    print(basename)

    if outFastaParam == None:
        outFastaParam = relativeToAbsolutePath(basename + ".fasta")

    # resume value to user
    print(" - Intput Info:")
    print("\t - TAB files is : %s" % tabFileParam)
    if IDParam != None:
        print("\t - Change Individual ID with custom ID provied table : %s" %
              IDParam)
        dicoCustomID = loadInDictCol(IDParam, 0, 1)

    print(" - Output Info:")
    if compress:
        print("\t - Output fasta will be gzip")
    print("\t - Output fasta is:  %s\n\n" % outFastaParam)

    if ".gz" in tabFileParam:
Esempio n. 17
0
        help='File with Strain to keep (one per row), default keep all strains'
    )

    # Check parameters
    args = parser.parse_args()

    #Welcome message
    print("#################################################################")
    print("#        Welcome in grepMotifFromAlignment (Version " + version +
          ")          #")
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    # Récupère le fichier de conf passer en argument
    pathDirectory = args.pathDirectory
    outputfilename = relativeToAbsolutePath(args.paramoutfile)

    print("\t - Input pathDirectory is: %s" % pathDirectory)
    print("\t - Output file name is: %s" % outputfilename)

    if args.listKeepFile not in ["ALL"]:
        listKeepSouche = loadInList(existant_file(args.listKeepFile))
        print("\t - You want to keep strains:\n%s" % "\n".join(listKeepSouche))
        basename = paramlistKeep.split(".")[0]
    else:
        listKeepSouche = []
        print("\t - You want to keep all strains \n")
        basename = "All"

    dicoOutputTxt = {}
    dicoSeqSNP = {}
Esempio n. 18
0
                       metavar="<filename>",
                       type=existant_file,
                       required=False,
                       dest='colorParam',
                       help='File with colors (default 15 color max)')
    # Check parameters
    args = parser.parse_args()
    print(args)
    #Welcome message
    print("#################################################################")
    print("#           Welcome in %s (Version %s)            #" %
          (__file__, version))
    print("#################################################################")
    print('Start time: ', start_time, '\n')

    de = relativeToAbsolutePath(args.deFile)
    rapmsu = relativeToAbsolutePath(args.rapmsuFile)
    irgsp = relativeToAbsolutePath(args.irgspFile)
    msu = relativeToAbsolutePath(args.msuFile)
    outputName = relativeToAbsolutePath(args.outputFile)

    #parcours fichier DE
    fileDE = open(de, "r")
    with open(outputName, 'w', newline='') as csvfile:
        outwriter = csv.writer(csvfile,
                               delimiter='\t',
                               quoting=csv.QUOTE_MINIMAL)

        for line in fileDE:
            #DBG::print ("---------------------------------------------------------------------------")
            cleanedline = cleaningLine(line, ",")