Ejemplo n.º 1
0
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-a", metavar="EMBL-a", help="First EMBL file", action="store", type="string", dest="first_embl")
    parser.add_option("-b", metavar="EMBL-b", help="Second EMBL file to compare", action="store", dest="second_embl")
    parser.add_option("--merge", help="To transfer /product of identical annotations into a merged file", action="store_true", dest="merge")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    first_record = SeqIO.read(open(options.first_embl), "embl")
    second_record = SeqIO.read(open(options.second_embl), "embl")

    print "Analysis of EMBL features A from %s" % options.first_embl
    print "Analysis of EMBL features B from %s" % options.second_embl

    stat(first_record)
    
    if options.merge:
        merged_record = transfer(first_record, second_record)
        # Write out genbank file
        SeqIO.write([merged_record], open("merged.embl", "w"), "embl")
Ejemplo n.º 2
0
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names to compare", action="store", type="string", dest="list")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    
    if options.list:
        # Read organism common name and related fasta sequence file
        list_file = options.list
        util.checkFile(list_file)
        for line in open(list_file, "r"):
            if line[0] == '!':
                continue
            # ! common_name
            common_name = line.strip()
                        
            gendb_file = "GenDB/%s.gendb.embl" % common_name
            rast_file = "RAST/%s.rast.embl" % common_name
            img_file = "IMG/%s.img.embl" % common_name
            if not os.path.exists(gendb_file) or not os.path.exists(rast_file) or not os.path.exists(img_file):
                print "No three results for %s" % common_name
                continue
            
            print "Processing %s" % common_name
            doCompare(common_name, gendb_file, rast_file, img_file)
Ejemplo n.º 3
0
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-l",
        "--list",
        metavar="FILE",
        help="FILE containing the list of all organism common names and its associated parameters depending on submitter type",
        action="store",
        type="string",
        dest="list",
    )
    parser.add_option(
        "-r",
        "--run",
        metavar="SCRIPT",
        help="name of the script to run from %s against each genome of the list" % SUBMIT_SCRIPTS,
        action="store",
        choices=SUBMIT_SCRIPTS,
        dest="run",
    )
    parser.add_option("--submit", help="To submit data, not only for checking", action="store_true", dest="submit")

    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()

    # Print command line
    cmdline = "$ python "
    for argv in sys.argv:
        cmdline += argv + " "
    log.info(cmdline)

    script = options.run
    try:
        if options.list:
            util.checkFile(options.list)
            if script == "genome_project":
                import submitters.genome_project as genome_project

                genome_project.doRun(options.list, options.submit)
            elif script == "annotated_genome":
                import submitters.annotated_genome as annotated_genome

                annotated_genome.doRun(options.list, options.submit)
        else:
            log.info("Organism list file not provided! Please provide one using -l FILE or --list=FILE")

    except Exception, e:
        import traceback

        log.error(traceback.extract_stack())
        log.error(e)
Ejemplo n.º 4
0
def mainUsage():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-l", "--list", metavar="FILE", help="FILE containing the list of all organism common names and its associated FASTA sequence file", action="store", type="string", dest="list")
    parser.add_option("-r", "--run", metavar="SCRIPT", help="name of the script to run from %s against each genome of the list" % ANNOTATOR_SCRIPTS, action="store", choices=ANNOTATOR_SCRIPTS, dest="run")
    
    (options, args) = parser.parse_args()

    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()

    # Print command line
    cmdline = "$ python "
    for argv in sys.argv:
        cmdline += argv + " " 
    log.info(cmdline)
    
    return options
Ejemplo n.º 5
0
# Comando de la aplicacion.
APP_CMD = "udpClient.py"

# Tiempo que se espera por la respuesta del servidor.
WAIT_FOR_RESPONSE = 2

# Se obtienen los parametros del Servidor UDP, ip, puerto y
# tamano de buffer en bytes.
# Los parametros son seleccionados por defecto o bien pasados en
# la linea de comandos.
connectionIp, connectionPort, bufferSize, helpFlag = util.parseParameters(
    sys.argv, DEBUG)

# En caso de solicitarse informacion de ayuda, se imprime y se termina el programa.
if (helpFlag):
    util.printHelp(APP_NAME, APP_CMD)
    # Se sale del programa
    sys.exit()

# Se indican datos del programa.
if (DEBUG):
    util.printAppInfo(APP_NAME, connectionIp, connectionPort, bufferSize)

while (1):

    try:
        # Se crea el socket
        sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)

    except socket.error as err_msg:
        # En caso de no poder crear la conexion, se indica y se termina el programa.
Ejemplo n.º 6
0
def main():
    # Fasta file extension: 
    # .ffn for the untranslated nucleotide sequences for each CDS; .faa for protein coding sequences (CDS)
    # .fa for the fasta alignment results
    # .fna for whole genomic DNA sequences; .frn for nucleotide sequences of RNA related features
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-d", "--dna", metavar="FILE", help="input dna FILE in fasta format", action="store", type="string", dest="dna")
    parser.add_option("-t", "--tab", metavar="FILE", help="input tab FILE in embl format", action="store", type="string", dest="tab")
    parser.add_option("-e", "--embl", metavar="FILE", help="input embl FILE with CDS features in embl format", action="store", type="string", dest="embl")
    parser.add_option("--genedb", help="extract reference genome protein sequences from geneDB", action="store_true", dest="db")
    parser.add_option("--fasta", help="run fasta against each extracted in-house genomes", action="store_true", dest="fasta")
    parser.add_option("--hamap", help="run pfscan against HAMAP profiles", action="store_true", dest="hamap")
    parser.add_option("--clean", help="delete all results without deleting reference genomes", action="store_true", dest="clean")
    parser.add_option("--deepclean", help="delete all reference genomes and results", action="store_true", dest="deepclean")
    (options, args) = parser.parse_args()
    # Print help if no argument given
    if util.printHelp(options):
        parser.print_help()
        sys.exit()
    # Print command line
    cmdline = "$ python "
    for argv in sys.argv:
        cmdline += argv + " " 
    logger.debug(cmdline)
    
    # >>> ---------------------------------------------------------------------
    # >>> DATA PREPARATION
    # >>> ---------------------------------------------------------------------
    # List of needed software
    for softname in soft_lists:
        util.checkSoft(softname)
    # Prepare new genome data
    if options.dna and options.tab and not options.embl:
        util.checkFile(options.dna)
        mygenome_emblfile = fasta2embl(options.dna)
        mygenome_emblfile_withcds = concatFeatures(mygenome_emblfile, options.tab)
        splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS")
        translateSeq(mygenome_dir)
    elif not options.dna and not options.tab and options.embl:
        mygenome_emblfile_withcds = options.embl
        splitSeq(mygenome_dir, mygenome_emblfile_withcds, "CDS")
        #splitSeqWithBiopython(mygenome_emblfile_withcds, "CDS") # does not work with testdata_01
        translateSeq(mygenome_dir)
    elif not options.deepclean:
        util.checkDir(mygenome_dir)
    # Extract in house genomes from chado db
    if options.db:
        chadoDump(refgenomes_dir)
    elif not options.deepclean:
        util.checkDir(refgenomes_dir)
    # bsub output directory
    if IS_LSF and not (options.clean or options.deepclean):
        util.createDir(bsub_dir)

    # >>> ---------------------------------------------------------------------
    # >>> ORTHOLOG SEARCH
    # >>> ---------------------------------------------------------------------
    # Run fasta & reciprocal fasta
    if options.fasta:
        runFasta(mygenome_dir, refgenomes_dir, fasta_dir)
        fasta_hits = topFastaHits(fasta_dir, refgenomes_extractedseq_dir)
        concatSeq(mygenome_fastafile_allcds, mygenome_dir)
        runReciprocalFasta(refgenomes_extractedseq_dir, mygenome_fastafile_allcds, reciprocalfasta_dir)
        reciprocalfasta_hits = topReciprocalFastaHits(reciprocalfasta_dir)
        printMSPCrunch(fasta_hits, reciprocalfasta_hits)
        hits = getHits(fasta_hits, reciprocalfasta_hits)
        logger.info("ORTHOLOGS")
        logger.info(hits['ortholog'])
        logger.info("SIMILARITY")
        logger.info(hits['similarity'])
        transferFeatures(hits['ortholog'])
    # Run hamap scan
    if options.hamap:
        runHamapScan(mygenome_dir, hamap_dir)

    # >>> ---------------------------------------------------------------------
    # >>> CLEANING OUTPUT DATA
    # >>> ---------------------------------------------------------------------
    # Clean results before a re-run
    if options.clean:
        # fasta results
        util.rmDir(fasta_dir)
        util.rmDir(reciprocalfasta_dir)
        util.rmDir(refgenomes_extractedseq_dir)
        util.rmFile(mygenome_fastafile_allcds)
        # hamap results
        util.rmDir(hamap_dir)
        # bsub outputs
        if IS_LSF:
            util.rmDir(bsub_dir)
    # Deep clean - remove all
    if options.deepclean:
        util.rmDir(refgenomes_dir)
        util.rmDir(mygenome_dir)
        util.rmDir(fasta_dir)
        util.rmDir(reciprocalfasta_dir)
        util.rmDir(refgenomes_extractedseq_dir)
        util.rmFile(mygenome_fastafile_allcds)
        util.rmDir(hamap_dir)