Esempio n. 1
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Set game to online model.
    actors = ["actor"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=1,
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()
    args = env["game"].options
    model = env["model_loaders"][0].load_model(GC.params)

    # for actor_name, stat, model_loader, e in \
    #         zip(actors, stats, env["model_loaders"], evaluators):
    for i in range(len(actors)):
        actor_name = actors[i]
        e = env["eval_" + actor_name]
        mi = env["mi_" + actor_name]

        mi.add_model("actor", model, cuda=(args.gpu >= 0), gpu_id=args.gpu)

        print("register " + actor_name + " for e = " + str(e))
        e.setup(sampler=env["sampler"], mi=mi)

        def actor(batch, e):
            reply = e.actor(batch)
            return reply

        GC.reg_callback(actor_name, lambda batch, e=e: actor(batch, e))

    args = env["game"].options

    GC.start()
    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while True:
        GC.run()

    GC.stop()
Esempio n. 2
0
def test_mock_load_env():
    init_for_test()
    from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider,
                           ModelLoader, model_loader, Sampler, ModelInterface)
    envs = os.environ
    load_module = model_loader.load_module
    defaults = dict()
    overrides = dict()
    num_models = None
    kwargs = {}

    trainer = Trainer()
    runner = SingleProcessRun()

    game = load_module(envs["game"]).Loader()
    model_file = load_module(envs["model_file"])

    if len(model_file.Models[envs["model"]]) == 2:
        model_class, method_class = model_file.Models[envs["model"]]
        sampler_class = Sampler
    else:
        model_class, method_class, sampler_class = model_file.Models[
            envs["model"]]

    defaults.update(getattr(model_file, "Defaults", dict()))
    overrides.update(getattr(model_file, "Overrides", dict()))

    method = method_class()
    sampler = sampler_class()
    mi = ModelInterface()

    # You might want multiple models loaded.
    if num_models is None:
        model_loaders = [ModelLoader(model_class)]
    else:
        model_loaders = [
            ModelLoader(model_class, model_idx=i) for i in range(num_models)
        ]

    env = dict(game=game,
               method=method,
               sampler=sampler,
               model_loaders=model_loaders,
               mi=mi)
    env.update(kwargs)

    parser = argparse.ArgumentParser()
    # 模拟命令行
    cmd_key = 'save_replay_prefix'
    cmd_v = '~/log/elf/'
    cmd_line = [f'--{cmd_key}', cmd_v]
    all_args = ArgsProvider.Load(parser,
                                 env,
                                 cmd_line=cmd_line,
                                 global_defaults=defaults,
                                 global_overrides=overrides)
    assert all_args[cmd_key] == cmd_v
    assert 'game' in env.keys()
Esempio n. 3
0
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(num_games=1, batchsize=1, num_games_per_thread=1, greedy=True, T=1, additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
Esempio n. 4
0
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ,
                             evaluator=evaluator,
                             overrides=dict(
                                 num_games=1,
                                 batchsize=1,
                                 num_games_per_thread=1,
                                 greedy=True,
                                 T=1,
                                 additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor",
                     model,
                     copy=True,
                     cuda=args.gpu is not None,
                     gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
Esempio n. 5
0
def main():
    # Set game to online model.
    actors = ["actor_black", "actor_white"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=2,
                   overrides=dict(actor_only=True),
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()

    stats = [Stats(), Stats()]

    # for actor_name, stat, model_loader, e in \
    #         zip(actors, stats, env["model_loaders"], evaluators):
    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]
        e = env["eval_" + actor_name]

        print("register " + actor_name + " for e = " + str(e))
        e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, e, stat):
            reply = e.actor(batch)
            stat.feed(batch)
            # eval_iters.stats.feed_batch(batch)
            return reply

        GC.reg_callback(actor_name,
                        lambda batch, e=e, stat=stat: actor(batch, e, stat))

    root = os.environ.get("root", "./")
    print("Root: \"%s\"" % root)
    args = env["game"].options
    global loop_end
    loop_end = False

    def game_start(batch):
        print("In game start")

        vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    def game_end(batch):
        global loop_end
        # print("In game end")
        wr = batch.GC.getGameStats().getWinRateStats()
        win_rate = 100.0 * wr.black_wins / wr.total_games \
            if wr.total_games > 0 else 0.0
        print("%s B/W: %d/%d. Black winrate: %.2f (%d)" %
              (str(datetime.now()), wr.black_wins, wr.white_wins, win_rate,
               wr.total_games))
        if args.suicide_after_n_games > 0 and \
           wr.total_games >= args.suicide_after_n_games:
            print("#suicide_after_n_games: %d, total_games: %d" %
                  (args.suicide_after_n_games, wr.total_games))
            loop_end = True

    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    # def episode_start(i):
    #     global GC
    #     GC.GC.setSelfplayCount(10000)
    #     evaluator.episode_start(i)

    GC.start()
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])

            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.setRequest(int(black), int(white),
                         env['game'].options.resign_thres, 1)

    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
Esempio n. 6
0
import argparse
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, trainer=trainer, runner=runner)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={ "lr" : 0.001})
    mi.add_model("actor", model, copy=True, cuda=all_args.gpu is not None, gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"])

    GC.reg_callback("train", trainer.train)
    GC.reg_callback("actor", trainer.actor)
    runner.setup(GC, episode_summary=trainer.episode_summary,
                episode_start=trainer.episode_start)

    runner.run()

Esempio n. 7
0
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, trainer=trainer, runner=runner)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={"lr": 0.001})
    mi.add_model("actor",
                 model,
                 copy=True,
                 cuda=all_args.gpu is not None,
                 gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"])

    GC.reg_callback("train", trainer.train)
    GC.reg_callback("actor", trainer.actor)
    runner.setup(GC,
                 episode_summary=trainer.episode_summary,
                 episode_start=trainer.episode_start)
Esempio n. 8
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Set game to online model.
    actors = ["actor_black", "actor_white"]
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })

    env = load_env(os.environ,
                   num_models=2,
                   overrides={'actor_only': True},
                   additional_to_load=additional_to_load)

    GC = env["game"].initialize()

    stats = [Stats(), Stats()]

    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]
        e = env["eval_" + actor_name]

        print(f'register {actor_name} for e = {e!s}')
        e.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, e, stat):
            reply = e.actor(batch)
            stat.feed(batch)
            return reply

        GC.reg_callback(actor_name,
                        lambda batch, e=e, stat=stat: actor(batch, e, stat))

    root = os.environ.get("root", "./")
    print(f'Root: "{root}"')
    args = env["game"].options
    loop_end = False

    def game_start(batch):
        print("In game start")

        vers = [int(batch["black_ver"][0]), int(batch["white_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    def game_end(batch):
        nonlocal loop_end
        wr = batch.GC.getClient().getGameStats().getWinRateStats()
        win_rate = (100.0 * wr.black_wins /
                    wr.total_games if wr.total_games > 0 else 0.0)
        print(f'{datetime.now()!s} B/W: {wr.black_wins}/{wr.white_wins}.'
              f'Black winrate: {win_rate:.2f} ({wr.total_games})')

        if args.suicide_after_n_games > 0 and \
                wr.total_games >= args.suicide_after_n_games:
            print(f'#suicide_after_n_games: {args.suicide_after_n_games}, '
                  f'total_games: {wr.total_games}')
            loop_end = True

    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    GC.start()
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])

            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.getClient().setRequest(int(black), int(white),
                                     env['game'].options.resign_thres, 1)

    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()
Esempio n. 9
0
import os

from rlpytorch import LSTMTrainer, Sampler, EvalIters, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    eval_iters = EvalIters()
    env, all_args = load_env(os.environ,
                             overrides=dict(actor_only=True),
                             trainer=trainer,
                             eval_iters=eval_iters)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model)
    mi.add_model("actor",
                 model,
                 copy=True,
                 cuda=all_args.gpu is not None,
                 gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=env["mi"])

    def actor(batch):
        reply = trainer.actor(batch)
        eval_iters.stats.feed_batch(batch)
        return reply

    GC.reg_callback("actor", actor)
Esempio n. 10
0
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# Console for DarkForest
import sys
import os
from rlpytorch import load_env, Evaluator, ModelInterface, ArgsProvider, EvalIters

if __name__ == '__main__':
    evaluator = Evaluator(stats=False)
    # Set game to online model.
    env, args = load_env(os.environ, evaluator=evaluator, overrides=dict(mode="selfplay", T=1))

    GC = env["game"].initialize()
    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model)
    mi.add_model("actor", model, copy=True, cuda=args.gpu is not None, gpu_id=args.gpu)
    mi["model"].eval()
    mi["actor"].eval()

    evaluator.setup(mi=mi)

    total_batchsize = 0
    total_sel_batchsize = 0

    def actor(batch):
        global total_batchsize, total_sel_batchsize
        reply = evaluator.actor(batch)
        total_sel_batchsize += batch.batchsize
        total_batchsize += batch.max_batchsize
Esempio n. 11
0
import re

from rlpytorch import load_env, SingleProcessRun, Trainer, ModelInterface

matcher = re.compile(r"save-(\d+).bin")

if __name__ == '__main__':
    additional_to_load = {
        'trainer0': (
            Trainer.get_option_spec(),
            lambda option_map: Trainer(option_map)),
        'trainer1': (
            Trainer.get_option_spec(),
            lambda option_map: Trainer(option_map)),
        'mi0': (
            ModelInterface.get_option_spec(), ModelInterface),
        'mi1': (
            ModelInterface.get_option_spec(), ModelInterface),
        'runner': (
            SingleProcessRun.get_option_spec(),
            lambda option_map: SingleProcessRun(option_map)),
    }

    env = load_env(os.environ, num_models=2,
                   additional_to_load=additional_to_load,
                   overrides=dict(backprop0=False,
                                  backprop1=False, mode="offline_train"))

    trainer0 = env['trainer0']
    trainer1 = env['trainer1']
    runner = env['runner']
Esempio n. 12
0
def main():
    print('Python version:', sys.version)
    print('PyTorch version:', torch.__version__)
    print('CUDA version', torch.version.cuda)
    print('Conda env:', os.environ.get("CONDA_DEFAULT_ENV", ""))

    # Register player names
    actors = ["actor_white", "actor_black"]
    """
    Class Evaluator is a pure python class, 
    which run neural network in eval mode and get 
    return results and update some stat info.
    Will creates 'eval_actor_white', 'eval_actor_black'.
  """
    additional_to_load = {
        ("eval_" + actor_name):
        (Evaluator.get_option_spec(name="eval_" + actor_name),
         lambda object_map, actor_name=actor_name: Evaluator(
             object_map,
             name="eval_" + actor_name,
             actor_name=actor_name,
             stats=None))
        for i, actor_name in enumerate(actors)
    }
    """
    class ModelInterface is a python class saving network models.
    Its member models is a key-value store to call a CNN model by name.
    Will creates 'mi_actor_white', 'mi_actor_black'.
  """
    additional_to_load.update({
        ("mi_" + name): (ModelInterface.get_option_spec(), ModelInterface)
        for name in actors
    })
    """
    load_env:
    game - load file game elfgames.american_checkers.game
    method - load "method" passed via params:
        file model_american_checkers.py return array with [model, method]
        model_file=elfgames.american_checkers.model_american_checkers
        model=df_pred 
    model_loaders - prepare to load(returns instance of class ModelLoader)
        "model" passed via params:
        file model_american_checkers.py return array with [model, method]
        model_file=elfgames.american_checkers.model_american_checkers
        model=df_pred
    
    sampler - Used to sample an action from policy.
    mi - class ModelInterface is a python class saving network models.
        Its member models is a key-value store to call a CNN model by name.
    eval_* - run neural network in eval mode and get 
        return results and update some stat info.
  """
    env = load_env(os.environ,
                   num_models=2,
                   overrides={'actor_only': True},
                   additional_to_load=additional_to_load)
    """
    Initializes keys('game_end', 'game_start', 'actor_white', 'actor_black')
    for communication Python and C++ code, defined in Game.py and GameFeature.h.
    Also, initializes GameContext from C++ library wrapped by GC from python side
    + sets mode that parsed from options like play/selfplay/train/offline_train.
  """
    GC = env["game"].initialize()
    """
    Registering the methods in the GameContext on the python side.
    We registered their names earlier when the game was 
    initialized(names were registered on the python and C++ sides).
    Now its a registration of methods that will be called 
    when we try to pass batch on eval from C++ to Python.
    Example:
      We register "human_actor" as key and register the 
      same method on the python side. 
      When AIClientT calls method act(it takes 2 parameters: state, and key)
      act connect to python and transmits the state by 
      key("human_actor", "actor_black")
      to these methods(actor() func defined below).
  """
    # Some statistic about batch usage, also we can add more info about games stats.
    stats = [Stats(), Stats()]

    for i in range(len(actors)):
        actor_name = actors[i]
        stat = stats[i]

        evaluator = env["eval_" + actor_name]
        evaluator.setup(sampler=env["sampler"], mi=env["mi_" + actor_name])

        def actor(batch, evaluator, stat):
            reply = evaluator.actor(batch)
            stat.feed(batch)
            return reply

        # To expand the functionality we use lambda
        GC.reg_callback(actor_name,
                        lambda batch, evaluator=evaluator, stat=stat: actor(
                            batch, evaluator, stat))

    # Get the directory containing the models.
    root = os.environ.get("root", "./")
    args = env["game"].options
    # Stops client after N games, defined in --suicide_after_n_games param.
    loop_end = False
    """
    This method is responsible for updating the model to the 
    current one(received from the server) after starting. 
    Called by 'game_start' key from C++ side.
  """
    def game_start(batch):
        info = "game_start() load/reload models\n"
        logger.info(info)

        vers = [int(batch["white_ver"][0]), int(batch["black_ver"][0])]

        # Use the version number to load models.
        for model_loader, ver, actor_name in zip(env["model_loaders"], vers,
                                                 actors):
            if ver >= 0:
                while True:
                    try:
                        reload(env["mi_" + actor_name], model_loader,
                               GC.params, args, root, ver, actor_name)
                        break
                    except BaseException:
                        import traceback
                        traceback.print_exc()
                        time.sleep(10)

    """
    This method is responsible for displaying game statistics, 
    as well as stopping the client after N games(loop_end).
    Called by 'game_end' key from C++ side.
  """

    def game_end(batch):
        nonlocal loop_end
        wr = batch.GC.getClient().getGameStats().getWinRateStats()
        win_rate = (100.0 * wr.black_wins / (wr.black_wins + wr.white_wins) if
                    (wr.black_wins + wr.white_wins) > 0 else 0.0)

        info = f'game_end()\tB/W: {wr.black_wins}/{wr.white_wins}, '
        info += f'Draw: {wr.both_lost}, '
        info += f'Black winrate: {win_rate:.2f}, '
        info += f'Total Games: {wr.total_games}'

        logger.info(info)
        if args.suicide_after_n_games > 0 and \
            wr.total_games >= args.suicide_after_n_games:
            info = f'game_end()\tTotal Games: {wr.total_games}, '
            info += f'#suicide_after_n_games: {args.suicide_after_n_games}'
            logger.info(info)
            loop_end = True

    # Registering the methods described above in Python's GameContext.
    GC.reg_callback_if_exists("game_start", game_start)
    GC.reg_callback_if_exists("game_end", game_end)

    GC.start()
    """
    Upon receiving the --eval_model_pair parameter, we load 2 models 
    from a file and pass models versions to C++ side for evaluation.
  """
    if args.eval_model_pair:
        if args.eval_model_pair.find(",") >= 0:
            black, white = args.eval_model_pair.split(",")
        else:
            black = extract_ver(env["model_loaders"][0])
            white = extract_ver(env["model_loaders"][1])
            # Force them to reload in the future.
            for model_loader, actor_name in zip(env["model_loaders"], actors):
                reload_model(model_loader, GC.params, env["mi_" + actor_name],
                             actor_name, args)

        # We just use one thread to do selfplay.
        GC.GC.getClient().setRequest(int(black), int(white), 1)

    # Called before each episode, resets actor_count(num of total nn call)
    for actor_name in actors:
        env["eval_" + actor_name].episode_start(0)

    while not loop_end:
        GC.run()

    GC.stop()