Ejemplo n.º 1
0
def setup_hmm_search(args):
    host = 'localhost'
    idmap = None
    if args.usemem:
        scantype = 'mem'
    else:
        scantype = 'disk'

    connecting_to_server = False
    # If searching against a predefined database name
    if args.db in EGGNOG_DATABASES:
        dbpath, port = get_db_info(args.db)
        print dbpath
        db_present = [
            pexists(dbpath + "." + ext)
            for ext in 'h3f h3i h3m h3p idmap'.split()
        ]

        if False in db_present:
            print db_present
            print colorify(
                'Database %s not present. Use download_eggnog_database.py to fetch it'
                % (args.db), 'red')
            raise ValueError('Database not found')

        if not args.no_refine:
            if not pexists(pjoin(get_data_path(), 'OG_fasta')):
                print colorify(
                    'Database data/OG_fasta/ not present. Use download_eggnog_database.py to fetch it',
                    'red')
                raise ValueError('Database not found')

        if scantype == 'mem':
            idmap_file = dbpath + '.idmap'
            end_port = 53200

    # If searching against a custom hmm database
    elif os.path.isfile(args.db + '.h3f'):
        dbpath = args.db
        if scantype == 'mem':
            idmap_file = args.db + ".idmap"
            if not pexists(idmap_file):
                if generate_idmap(args.db):
                    idmap_file = args.db + ".idmap"
                    print >> sys.stderr, "idmap succesfully created!"
                else:
                    raise ValueError("idmap could not be created!")
            port = 53000
            end_port = 53200
        else:
            idmap_file = None
            port = None

    # If searching against a emapper hmm server
    elif ":" in args.db:
        dbname, host, port = map(str.strip, args.db.split(":"))
        scantype = 'mem'
        port = int(port)
        if dbname in EGGNOG_DATABASES:
            dbfile, port = get_db_info(dbname)
            args.db = dbname
        else:
            dbfile = dbname

        idmap_file = dbfile + '.idmap'
        if not pexists(idmap_file):
            raise ValueError("idmap file not found: %s" % idmap_file)

        dbpath = host
        if not server_functional(host, port, args.dbtype):
            print colorify(
                "eggnog-mapper server not found at %s:%s" % (host, port),
                'red')
            exit(1)
        connecting_to_server = True
    else:
        raise ValueError('Invalid database name/server')

    # If memory based searches requested, start server
    if scantype == "mem" and not connecting_to_server:
        master_db, worker_db = None, None
        for try_port in range(port, end_port, 2):
            print colorify(
                "Loading server at localhost, port %s-%s" %
                (try_port, try_port + 1), 'lblue')
            dbpath, master_db, worker_db = load_server(dbpath, try_port,
                                                       try_port + 1, args.cpu)
            port = try_port
            ready = False
            for _ in xrange(TIMEOUT_LOAD_SERVER):
                print "Waiting for server to become ready...", host, try_port
                time.sleep(1)
                if not master_db.is_alive() or not worker_db.is_alive():
                    master_db.terminate()
                    master_db.join()
                    worker_db.terminate()
                    worker_db.join()
                    break
                elif server_functional(host, port, args.dbtype):
                    ready = True
                    break
            if ready:
                dbpath = host
                break
    elif scantype == "mem":
        print colorify("DB Server already running or not needed!", 'yellow')
        dbpath = host

    # Preload seqid map to translate hits from hmmpgmd
    if scantype == "mem":
        print colorify("Reading idmap %s" % idmap_file, color='lblue')
        idmap = {}
        for _lnum, _line in enumerate(open(idmap_file)):
            if not _line.strip():
                continue
            try:
                _seqid, _seqname = map(str, _line.strip().split(' '))
            except ValueError:
                if _lnum == 0:
                    # idmap generated by esl_reformat has info line at beginning
                    continue
                else:
                    raise
            _seqid = int(_seqid)
            idmap[_seqid] = [_seqname]
        print len(idmap), "names loaded"

    # If server mode, just listen for connections and exit when interrupted
    if args.servermode:
        while True:
            print colorify(
                "Server ready listening at %s:%s and using %d CPU cores" %
                (host, port, args.cpu), 'green')
            print colorify(
                "Use `emapper.py -d %s:%s:%s (...)` to search against this server"
                % (args.db, host, port), 'lblue')
            time.sleep(10)
        raise emapperException()
    else:
        return host, port, dbpath, scantype, idmap
Ejemplo n.º 2
0
def setup_hmm_search(args):
    host = 'localhost'
    idmap = None
    if args.usemem:
        scantype = 'mem'
    else:
       scantype = 'disk'

    connecting_to_server = False
    # If searching against a predefined database name
    if args.db in EGGNOG_DATABASES:
        dbpath, port = get_db_info(args.db)
        db_present = [pexists(dbpath + "." + ext)
                      for ext in 'h3f h3i h3m h3p idmap'.split()]

        if False in db_present:
            print db_present
            print colorify('Database %s not present. Use download_eggnog_database.py to fetch it' % (args.db), 'red')
            raise ValueError('Database not found')

        if not args.no_refine:
            if not pexists(pjoin(DATA_PATH, 'OG_fasta')):
                print colorify('Database data/OG_fasta/ not present. Use download_eggnog_database.py to fetch it', 'red')
                raise ValueError('Database not found')

        if scantype == 'mem':
            idmap_file = dbpath + '.idmap'
            end_port = 53200

    # If searching against a custom hmm database
    elif os.path.isfile(args.db + '.h3f'):
        dbpath = args.db
        if scantype == 'mem':
            idmap_file = args.db + ".idmap"
            if not pexists(idmap_file):
                if generate_idmap(args.db):
                    idmap_file = args.db + ".idmap"
                    print >>sys.stderr, "idmap succesfully created!"
                else:
                    raise ValueError("idmap could not be created!")
            port = 53000
            end_port = 53200
        else:
            idmap_file = None
            port = None

    # If searching against a emapper hmm server
    elif ":" in args.db:
        dbname, host, port = map(str.strip, args.db.split(":"))
        scantype = 'mem'
        port = int(port)
        if dbname in EGGNOG_DATABASES:
            dbfile, port = get_db_info(dbname)
            args.db = dbname
        else:
            dbfile = dbname

        idmap_file = dbfile + '.idmap'
        if not pexists(idmap_file):
            raise ValueError("idmap file not found: %s" % idmap_file)

        dbpath = host
        if not server_functional(host, port, args.dbtype):
            print colorify("eggnog-mapper server not found at %s:%s" % (host, port), 'red')
            exit(1)
        connecting_to_server = True
    else:
        raise ValueError('Invalid database name/server')


    # If memory based searches requested, start server
    if scantype == "mem" and not connecting_to_server:
        master_db, worker_db = None, None
        for try_port in range(port, end_port, 2):
            print colorify("Loading server at localhost, port %s-%s" %
                           (try_port, try_port + 1), 'lblue')
            dbpath, master_db, worker_db = load_server(
                dbpath, try_port, try_port + 1, args.cpu)
            port = try_port
            ready = False
            for _ in xrange(TIMEOUT_LOAD_SERVER):
                print "Waiting for server to become ready...", host, try_port
                time.sleep(1)
                if not master_db.is_alive() or not worker_db.is_alive():
                    master_db.terminate()
                    master_db.join()
                    worker_db.terminate()
                    worker_db.join()
                    break
                elif server_functional(host, port, args.dbtype):
                    ready = True
                    break
            if ready:
                dbpath = host
                break
    elif scantype == "mem":
        print colorify("DB Server already running or not needed!", 'yellow')
        dbpath = host

    # Preload seqid map to translate hits from hmmpgmd
    if scantype == "mem":
        print colorify("Reading idmap %s" % idmap_file, color='lblue')
        idmap = {}
        for _lnum, _line in enumerate(open(idmap_file)):
            if not _line.strip():
                continue
            try:
                _seqid, _seqname = map(str, _line.strip().split(' '))
            except ValueError:
                if _lnum == 0:
                    # idmap generated by esl_reformat has info line at beginning
                    continue  
                else:
                    raise
            _seqid = int(_seqid)
            idmap[_seqid] = [_seqname]
        print len(idmap), "names loaded"

    # If server mode, just listen for connections and exit when interrupted
    if args.servermode:
        while True:
            print colorify("Server ready listening at %s:%s and using %d CPU cores" % (host, port, args.cpu), 'green')
            print colorify("Use `emapper.py -d %s:%s:%s (...)` to search against this server" % (args.db, host, port), 'lblue')
            time.sleep(10)
        raise emapperException()
    else:
        return host, port, dbpath, scantype, idmap