Esempio n. 1
0
            args.input
        )  #here user specified the predict_results folder, or it is a custom folder

    #get files that you need
    for file in os.listdir(inputdir):
        if file.endswith('.gbk'):
            genbank = os.path.join(inputdir, file)
    #now create the files from genbank input file for consistency in gene naming, etc
    if not genbank:
        lib.log.error(
            "Properly formatted 'funannotate predict' files do no exist in this directory"
        )
        sys.exit(1)
    else:
        if 'predict_results' in inputdir or 'update_results' in inputdir:  #if user gave predict_results folder, then set output to up one directory
            outputdir = lib.get_parent_dir(inputdir)
        else:
            if not args.out:
                outputdir = inputdir  #output the results in the input directory
            else:
                outputdir = args.out
                if not os.path.isdir(outputdir):
                    os.makedirs(outputdir)
        #create output directories
        if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')):
            os.makedirs(os.path.join(outputdir, 'annotate_misc'))
            os.makedirs(os.path.join(outputdir, 'annotate_results'))
        else:
            lib.log.error(
                "Output directory %s already exists, will use any existing data.  If this is not what you want, exit, and provide a unique name for output folder"
                % (outputdir))
lib.CheckDependencies(programs)

#take care of some preliminary checks
SBT = args.sbt

#need to do some checks here of the input
#should be a folder, with funannotate files, thus store results there, no need to create output folder
if not os.path.isdir(args.input):
    lib.log.error("%i directory does not exist" % args.input)
    os._exit(1)
if os.path.isdir(os.path.join(args.input, 'predict_results')): #funannotate results should be here
    inputdir = os.path.join(args.input, 'predict_results')
    outputdir = args.input
else:
    inputdir = os.path.join(args.input) #here user specified the predict_results folder, or it is all together wrong, find out in next few lines
    outputdir = lib.get_parent_dir(args.input)
#get files that you need
for file in os.listdir(inputdir):
    if file.endswith('.scaffolds.fa'):
        Scaffolds = os.path.join(inputdir, file)
    if file.endswith('.gff3'):
        GFF = os.path.join(inputdir, file)

#now get the AGP file - this will be in annotate_results folder
if os.path.isdir(os.path.join(outputdir, 'annotate_results')):
    for file in os.listdir(os.path.join(outputdir, 'annotate_results')):
        if file.endswith('.agp'):
            AGP = os.path.join(outputdir, 'annotate_results', file)
        if file.endswith('.gbk'):
            GBK = os.path.join(outputdir, 'annotate_results', file)
 if not os.path.isdir(args.input[i]):
     if args.input[i].endswith('.gbk') or args.input[i].endswith('.gbff'):
         GBK = args.input[i]
     else:
         lib.log.error(
             "Error, %s is not a funannotate folder and does not seem to be a GenBank file."
             % args.input[i])
         os._exit(1)
 else:  #split arguments into genomes and run a bunch of stats/comparisons
     #look for annotate_results folder
     GBK = ''
     fun_dir = args.input[i]
     if not os.path.isdir(os.path.join(args.input[i], 'annotate_results')
                          ):  #this means was not passed the whole folder
         fun_dir = lib.get_parent_dir(
             args.input[i]
         )  #set fun_dir up a directory to find other results if needed
         for file in os.listdir(args.input[i]):
             if file.endswith('.gbk'):
                 GBK = os.path.join(args.input[i], file)
     else:  #whole folder is passed, now get the genbank file
         for file in os.listdir(
                 os.path.join(args.input[i], 'annotate_results')):
             if file.endswith('.gbk'):
                 GBK = os.path.join(args.input[i], 'annotate_results', file)
 if not GBK:  #check this
     lib.log.error(
         "Error, was not able to find appropriate GenBank file in the annotate_results folder"
     )
 gbkfilenames.append(GBK)
 #now run genome routines
num_input = len(args.input)
if num_input == 0:
    lib.log.error("Error, you did not specify an input, -i")
    os._exit(1)
lib.log.info("Now parsing %i genomes" % num_input)
for i in range(0,num_input):
    #parse the input, I want user to give output folder for funannotate, put they might give a results folder, so do the best you can to check
    if not os.path.isdir(args.input[i]):
        lib.log.error("Error, one of the inputs is not a folder")
        os._exit(1)
    else: #split arguments into genomes and run a bunch of stats/comparisons
        #look for annotate_results folder
        GBK = ''
        fun_dir = args.input[i]
        if not os.path.isdir(os.path.join(args.input[i], 'annotate_results')): #this means was not passed the whole folder
            fun_dir = lib.get_parent_dir(args.input[i]) #set fun_dir up a directory to find other results if needed
            for file in os.listdir(args.input[i]):
                if file.endswith('.gbk'):
                    GBK = os.path.join(args.input[i], file) 
        else: #whole folder is passed, now get the genbank file
            for file in os.listdir(os.path.join(args.input[i], 'annotate_results')):
                if file.endswith('.gbk'):
                    GBK = os.path.join(args.input[i], 'annotate_results', file)
        if not GBK: #check this
            lib.log.error("Error, was not able to find appropriate GenBank file in the annotate_results folder")
        gbkfilenames.append(GBK)
        #now run genome routines
        stats.append(lib.genomeStats(GBK))
        merops.append(lib.getStatsfromNote(GBK, 'MEROPS'))
        ipr.append(lib.getStatsfromDbxref(GBK, 'InterPro'))
        pfam.append(lib.getStatsfromDbxref(GBK, 'PFAM'))
Esempio n. 5
0
lib.CheckDependencies(programs)

#take care of some preliminary checks
SBT = args.sbt

#need to do some checks here of the input
#should be a folder, with funannotate files, thus store results there, no need to create output folder
if not os.path.isdir(args.input):
    lib.log.error("%i directory does not exist" % args.input)
    os._exit(1)
if os.path.isdir(os.path.join(args.input, 'predict_results')): #funannotate results should be here
    inputdir = os.path.join(args.input, 'predict_results')
    outputdir = args.input
else:
    inputdir = os.path.join(args.input) #here user specified the predict_results folder, or it is all together wrong, find out in next few lines
    outputdir = lib.get_parent_dir(args.input)
#get files that you need
for file in os.listdir(inputdir):
    if file.endswith('.scaffolds.fa'):
        Scaffolds = os.path.join(inputdir, file)
    if file.endswith('.gff3'):
        GFF = os.path.join(inputdir, file)

#now get the AGP file - this will be in annotate_results folder
if os.path.isdir(os.path.join(outputdir, 'annotate_results')):
    for file in os.listdir(os.path.join(outputdir, 'annotate_results')):
        if file.endswith('.agp'):
            AGP = os.path.join(outputdir, 'annotate_results', file)
        if file.endswith('.gbk'):
            GBK = os.path.join(outputdir, 'annotate_results', file)
    else:
        inputdir = os.path.join(args.input) #here user specified the predict_results folder, or it is all together wrong, find out in next few lines
    #get files that you need
    for file in os.listdir(inputdir):
        if file.endswith('.gbk'):
            genbank = os.path.join(inputdir, file)
        if file.endswith('.gff3'):
            GFF = os.path.join(inputdir, file)
    
    #now create the files from genbank input file for consistency in gene naming, etc
    if not genbank or not GFF:
        lib.log.error("Properly formatted 'funannotate predict' files do no exist in this directory")
        os._exit(1)
    else:
        if inputdir == outputdir: #I'm assuming means that user supplied the predict folder (probably bad to assume...)
            outputdir = lib.get_parent_dir(inputdir)
        #create output directories
        if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')):
            os.makedirs(os.path.join(outputdir, 'annotate_misc'))
            os.makedirs(os.path.join(outputdir, 'annotate_results'))
        else:
            lib.log.error("Output directory %s already exists, will use any existing data.  If this is not what you want, exit, and provide a unique name for output folder" % (outputdir))
        lib.log.info("Parsing input files")
        Scaffolds = os.path.join(outputdir, 'annotate_misc', 'genome.scaffolds.fasta')
        Proteins = os.path.join(outputdir, 'annotate_misc','genome.proteins.fasta')
        Transcripts = os.path.join(outputdir, 'annotate_misc', 'genome.transcripts.fasta')
        lib.gb2output(genbank, Proteins, Transcripts, Scaffolds)

#get absolute path for all input so there are no problems later
for i in Scaffolds, Proteins, Transcripts, GFF:
    i = os.path.abspath(i)