def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-a", metavar="EMBL-a", help="First EMBL file", action="store", type="string", dest="first_embl") parser.add_option("-b", metavar="EMBL-b", help="Second EMBL file to compare", action="store", dest="second_embl") parser.add_option("--merge", help="To transfer /product of identical annotations into a merged file", action="store_true", dest="merge") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() first_record = SeqIO.read(open(options.first_embl), "embl") second_record = SeqIO.read(open(options.second_embl), "embl") print "Analysis of EMBL features A from %s" % options.first_embl print "Analysis of EMBL features B from %s" % options.second_embl stat(first_record) if options.merge: merged_record = transfer(first_record, second_record) # Write out genbank file SeqIO.write([merged_record], open("merged.embl", "w"), "embl")
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names to compare", action="store", type="string", dest="list") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() if options.list: # Read organism common name and related fasta sequence file list_file = options.list util.checkFile(list_file) for line in open(list_file, "r"): if line[0] == '!': continue # ! common_name common_name = line.strip() gendb_file = "GenDB/%s.gendb.embl" % common_name rast_file = "RAST/%s.rast.embl" % common_name img_file = "IMG/%s.img.embl" % common_name if not os.path.exists(gendb_file) or not os.path.exists(rast_file) or not os.path.exists(img_file): print "No three results for %s" % common_name continue print "Processing %s" % common_name doCompare(common_name, gendb_file, rast_file, img_file)
def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option( "-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names and its associated parameters depending on submitter type", action="store", type="string", dest="list", ) parser.add_option( "-r", "--run", metavar="SCRIPT", help="name of the script to run from %s against each genome of the list" % SUBMIT_SCRIPTS, action="store", choices=SUBMIT_SCRIPTS, dest="run", ) parser.add_option("--submit", help="To submit data, not only for checking", action="store_true", dest="submit") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " log.info(cmdline) script = options.run try: if options.list: util.checkFile(options.list) if script == "genome_project": import submitters.genome_project as genome_project genome_project.doRun(options.list, options.submit) elif script == "annotated_genome": import submitters.annotated_genome as annotated_genome annotated_genome.doRun(options.list, options.submit) else: log.info("Organism list file not provided! Please provide one using -l FILE or --list=FILE") except Exception, e: import traceback log.error(traceback.extract_stack()) log.error(e)
def mainUsage(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names and its associated FASTA sequence file", action="store", type="string", dest="list") parser.add_option("-r", "--run", metavar="SCRIPT", help="name of the script to run from %s against each genome of the list" % ANNOTATOR_SCRIPTS, action="store", choices=ANNOTATOR_SCRIPTS, dest="run") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " log.info(cmdline) return options
# Comando de la aplicacion. APP_CMD = "udpClient.py" # Tiempo que se espera por la respuesta del servidor. WAIT_FOR_RESPONSE = 2 # Se obtienen los parametros del Servidor UDP, ip, puerto y # tamano de buffer en bytes. # Los parametros son seleccionados por defecto o bien pasados en # la linea de comandos. connectionIp, connectionPort, bufferSize, helpFlag = util.parseParameters( sys.argv, DEBUG) # En caso de solicitarse informacion de ayuda, se imprime y se termina el programa. if (helpFlag): util.printHelp(APP_NAME, APP_CMD) # Se sale del programa sys.exit() # Se indican datos del programa. if (DEBUG): util.printAppInfo(APP_NAME, connectionIp, connectionPort, bufferSize) while (1): try: # Se crea el socket sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) except socket.error as err_msg: # En caso de no poder crear la conexion, se indica y se termina el programa.
def main(): # Fasta file extension: # .ffn for the untranslated nucleotide sequences for each CDS; .faa for protein coding sequences (CDS) # .fa for the fasta alignment results # .fna for whole genomic DNA sequences; .frn for nucleotide sequences of RNA related features usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-d", "--dna", metavar="FILE", help="input dna FILE in fasta format", action="store", type="string", dest="dna") parser.add_option("-t", "--tab", metavar="FILE", help="input tab FILE in embl format", action="store", type="string", dest="tab") parser.add_option("-e", "--embl", metavar="FILE", help="input embl FILE with CDS features in embl format", action="store", type="string", dest="embl") parser.add_option("--genedb", help="extract reference genome protein sequences from geneDB", action="store_true", dest="db") parser.add_option("--fasta", help="run fasta against each extracted in-house genomes", action="store_true", dest="fasta") parser.add_option("--hamap", help="run pfscan against HAMAP profiles", action="store_true", dest="hamap") parser.add_option("--clean", help="delete all results without deleting reference genomes", action="store_true", dest="clean") parser.add_option("--deepclean", help="delete all reference genomes and results", action="store_true", dest="deepclean") (options, args) = parser.parse_args() # Print help if no argument given if util.printHelp(options): parser.print_help() sys.exit() # Print command line cmdline = "$ python " for argv in sys.argv: cmdline += argv + " " logger.debug(cmdline) # >>> --------------------------------------------------------------------- # >>> DATA PREPARATION # >>> --------------------------------------------------------------------- # List of needed software for softname in soft_lists: util.checkSoft(softname) # Prepare new genome data if options.dna and options.tab and not options.embl: util.checkFile(options.dna) mygenome_emblfile = fasta2embl(options.dna) mygenome_emblfile_withcds = concatFeatures(mygenome_emblfile, options.tab) splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS") translateSeq(mygenome_dir) elif not options.dna and not options.tab and options.embl: mygenome_emblfile_withcds = options.embl splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS") #splitSeqWithBiopython(mygenome_emblfile_withcds, "CDS") # does not work with testdata_01 translateSeq(mygenome_dir) elif not options.deepclean: util.checkDir(mygenome_dir) # Extract in house genomes from chado db if options.db: chadoDump(refgenomes_dir) elif not options.deepclean: util.checkDir(refgenomes_dir) # bsub output directory if IS_LSF and not (options.clean or options.deepclean): util.createDir(bsub_dir) # >>> --------------------------------------------------------------------- # >>> ORTHOLOG SEARCH # >>> --------------------------------------------------------------------- # Run fasta & reciprocal fasta if options.fasta: runFasta(mygenome_dir, refgenomes_dir, fasta_dir) fasta_hits = topFastaHits(fasta_dir, refgenomes_extractedseq_dir) concatSeq(mygenome_fastafile_allcds, mygenome_dir) runReciprocalFasta(refgenomes_extractedseq_dir, mygenome_fastafile_allcds, reciprocalfasta_dir) reciprocalfasta_hits = topReciprocalFastaHits(reciprocalfasta_dir) printMSPCrunch(fasta_hits, reciprocalfasta_hits) hits = getHits(fasta_hits, reciprocalfasta_hits) logger.info("ORTHOLOGS") logger.info(hits['ortholog']) logger.info("SIMILARITY") logger.info(hits['similarity']) transferFeatures(hits['ortholog']) # Run hamap scan if options.hamap: runHamapScan(mygenome_dir, hamap_dir) # >>> --------------------------------------------------------------------- # >>> CLEANING OUTPUT DATA # >>> --------------------------------------------------------------------- # Clean results before a re-run if options.clean: # fasta results util.rmDir(fasta_dir) util.rmDir(reciprocalfasta_dir) util.rmDir(refgenomes_extractedseq_dir) util.rmFile(mygenome_fastafile_allcds) # hamap results util.rmDir(hamap_dir) # bsub outputs if IS_LSF: util.rmDir(bsub_dir) # Deep clean - remove all if options.deepclean: util.rmDir(refgenomes_dir) util.rmDir(mygenome_dir) util.rmDir(fasta_dir) util.rmDir(reciprocalfasta_dir) util.rmDir(refgenomes_extractedseq_dir) util.rmFile(mygenome_fastafile_allcds) util.rmDir(hamap_dir)