args.input ) #here user specified the predict_results folder, or it is a custom folder #get files that you need for file in os.listdir(inputdir): if file.endswith('.gbk'): genbank = os.path.join(inputdir, file) #now create the files from genbank input file for consistency in gene naming, etc if not genbank: lib.log.error( "Properly formatted 'funannotate predict' files do no exist in this directory" ) sys.exit(1) else: if 'predict_results' in inputdir or 'update_results' in inputdir: #if user gave predict_results folder, then set output to up one directory outputdir = lib.get_parent_dir(inputdir) else: if not args.out: outputdir = inputdir #output the results in the input directory else: outputdir = args.out if not os.path.isdir(outputdir): os.makedirs(outputdir) #create output directories if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')): os.makedirs(os.path.join(outputdir, 'annotate_misc')) os.makedirs(os.path.join(outputdir, 'annotate_results')) else: lib.log.error( "Output directory %s already exists, will use any existing data. If this is not what you want, exit, and provide a unique name for output folder" % (outputdir))
lib.CheckDependencies(programs) #take care of some preliminary checks SBT = args.sbt #need to do some checks here of the input #should be a folder, with funannotate files, thus store results there, no need to create output folder if not os.path.isdir(args.input): lib.log.error("%i directory does not exist" % args.input) os._exit(1) if os.path.isdir(os.path.join(args.input, 'predict_results')): #funannotate results should be here inputdir = os.path.join(args.input, 'predict_results') outputdir = args.input else: inputdir = os.path.join(args.input) #here user specified the predict_results folder, or it is all together wrong, find out in next few lines outputdir = lib.get_parent_dir(args.input) #get files that you need for file in os.listdir(inputdir): if file.endswith('.scaffolds.fa'): Scaffolds = os.path.join(inputdir, file) if file.endswith('.gff3'): GFF = os.path.join(inputdir, file) #now get the AGP file - this will be in annotate_results folder if os.path.isdir(os.path.join(outputdir, 'annotate_results')): for file in os.listdir(os.path.join(outputdir, 'annotate_results')): if file.endswith('.agp'): AGP = os.path.join(outputdir, 'annotate_results', file) if file.endswith('.gbk'): GBK = os.path.join(outputdir, 'annotate_results', file)
if not os.path.isdir(args.input[i]): if args.input[i].endswith('.gbk') or args.input[i].endswith('.gbff'): GBK = args.input[i] else: lib.log.error( "Error, %s is not a funannotate folder and does not seem to be a GenBank file." % args.input[i]) os._exit(1) else: #split arguments into genomes and run a bunch of stats/comparisons #look for annotate_results folder GBK = '' fun_dir = args.input[i] if not os.path.isdir(os.path.join(args.input[i], 'annotate_results') ): #this means was not passed the whole folder fun_dir = lib.get_parent_dir( args.input[i] ) #set fun_dir up a directory to find other results if needed for file in os.listdir(args.input[i]): if file.endswith('.gbk'): GBK = os.path.join(args.input[i], file) else: #whole folder is passed, now get the genbank file for file in os.listdir( os.path.join(args.input[i], 'annotate_results')): if file.endswith('.gbk'): GBK = os.path.join(args.input[i], 'annotate_results', file) if not GBK: #check this lib.log.error( "Error, was not able to find appropriate GenBank file in the annotate_results folder" ) gbkfilenames.append(GBK) #now run genome routines
num_input = len(args.input) if num_input == 0: lib.log.error("Error, you did not specify an input, -i") os._exit(1) lib.log.info("Now parsing %i genomes" % num_input) for i in range(0,num_input): #parse the input, I want user to give output folder for funannotate, put they might give a results folder, so do the best you can to check if not os.path.isdir(args.input[i]): lib.log.error("Error, one of the inputs is not a folder") os._exit(1) else: #split arguments into genomes and run a bunch of stats/comparisons #look for annotate_results folder GBK = '' fun_dir = args.input[i] if not os.path.isdir(os.path.join(args.input[i], 'annotate_results')): #this means was not passed the whole folder fun_dir = lib.get_parent_dir(args.input[i]) #set fun_dir up a directory to find other results if needed for file in os.listdir(args.input[i]): if file.endswith('.gbk'): GBK = os.path.join(args.input[i], file) else: #whole folder is passed, now get the genbank file for file in os.listdir(os.path.join(args.input[i], 'annotate_results')): if file.endswith('.gbk'): GBK = os.path.join(args.input[i], 'annotate_results', file) if not GBK: #check this lib.log.error("Error, was not able to find appropriate GenBank file in the annotate_results folder") gbkfilenames.append(GBK) #now run genome routines stats.append(lib.genomeStats(GBK)) merops.append(lib.getStatsfromNote(GBK, 'MEROPS')) ipr.append(lib.getStatsfromDbxref(GBK, 'InterPro')) pfam.append(lib.getStatsfromDbxref(GBK, 'PFAM'))
else: inputdir = os.path.join(args.input) #here user specified the predict_results folder, or it is all together wrong, find out in next few lines #get files that you need for file in os.listdir(inputdir): if file.endswith('.gbk'): genbank = os.path.join(inputdir, file) if file.endswith('.gff3'): GFF = os.path.join(inputdir, file) #now create the files from genbank input file for consistency in gene naming, etc if not genbank or not GFF: lib.log.error("Properly formatted 'funannotate predict' files do no exist in this directory") os._exit(1) else: if inputdir == outputdir: #I'm assuming means that user supplied the predict folder (probably bad to assume...) outputdir = lib.get_parent_dir(inputdir) #create output directories if not os.path.isdir(os.path.join(outputdir, 'annotate_misc')): os.makedirs(os.path.join(outputdir, 'annotate_misc')) os.makedirs(os.path.join(outputdir, 'annotate_results')) else: lib.log.error("Output directory %s already exists, will use any existing data. If this is not what you want, exit, and provide a unique name for output folder" % (outputdir)) lib.log.info("Parsing input files") Scaffolds = os.path.join(outputdir, 'annotate_misc', 'genome.scaffolds.fasta') Proteins = os.path.join(outputdir, 'annotate_misc','genome.proteins.fasta') Transcripts = os.path.join(outputdir, 'annotate_misc', 'genome.transcripts.fasta') lib.gb2output(genbank, Proteins, Transcripts, Scaffolds) #get absolute path for all input so there are no problems later for i in Scaffolds, Proteins, Transcripts, GFF: i = os.path.abspath(i)