Ejemplo n.º 1
0
def test_mock_load_env():
    init_for_test()
    from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider,
                           ModelLoader, model_loader, Sampler, ModelInterface)
    envs = os.environ
    load_module = model_loader.load_module
    defaults = dict()
    overrides = dict()
    num_models = None
    kwargs = {}

    trainer = Trainer()
    runner = SingleProcessRun()

    game = load_module(envs["game"]).Loader()
    model_file = load_module(envs["model_file"])

    if len(model_file.Models[envs["model"]]) == 2:
        model_class, method_class = model_file.Models[envs["model"]]
        sampler_class = Sampler
    else:
        model_class, method_class, sampler_class = model_file.Models[
            envs["model"]]

    defaults.update(getattr(model_file, "Defaults", dict()))
    overrides.update(getattr(model_file, "Overrides", dict()))

    method = method_class()
    sampler = sampler_class()
    mi = ModelInterface()

    # You might want multiple models loaded.
    if num_models is None:
        model_loaders = [ModelLoader(model_class)]
    else:
        model_loaders = [
            ModelLoader(model_class, model_idx=i) for i in range(num_models)
        ]

    env = dict(game=game,
               method=method,
               sampler=sampler,
               model_loaders=model_loaders,
               mi=mi)
    env.update(kwargs)

    parser = argparse.ArgumentParser()
    # 模拟命令行
    cmd_key = 'save_replay_prefix'
    cmd_v = '~/log/elf/'
    cmd_line = [f'--{cmd_key}', cmd_v]
    all_args = ArgsProvider.Load(parser,
                                 env,
                                 cmd_line=cmd_line,
                                 global_defaults=defaults,
                                 global_overrides=overrides)
    assert all_args[cmd_key] == cmd_v
    assert 'game' in env.keys()
Ejemplo n.º 2
0
    def main_loop(self):
        evaluator = Evaluator(stats=False)
        # Set game to online model.
        env, args = load_env(os.environ,
                             evaluator=evaluator,
                             overrides=dict(
                                 num_games=1,
                                 batchsize=1,
                                 num_games_per_thread=1,
                                 greedy=True,
                                 T=1,
                                 additional_labels="aug_code,move_idx"))

        GC = env["game"].initialize()
        model = env["model_loaders"][0].load_model(GC.params)
        mi = ModelInterface()
        mi.add_model("model", model)
        mi.add_model("actor",
                     model,
                     copy=True,
                     cuda=args.gpu is not None,
                     gpu_id=args.gpu)
        mi["model"].eval()
        mi["actor"].eval()

        self.evaluator = evaluator
        self.last_move_idx = None

        def human_actor(batch):
            print("In human_actor")
            return self.prompt("DF> ", batch)

        def actor(batch):
            return self.actor(batch)

        def train(batch):
            self.prompt("DF Train> ", batch)

        evaluator.setup(sampler=env["sampler"], mi=mi)

        GC.reg_callback_if_exists("actor", actor)
        GC.reg_callback_if_exists("human_actor", human_actor)
        GC.reg_callback_if_exists("train", train)

        GC.Start()

        evaluator.episode_start(0)

        while True:
            GC.Run()
            if self.exit: break
        GC.Stop()
Ejemplo n.º 3
0
from datetime import datetime

import sys
import os

from rlpytorch import LSTMTrainer, Sampler, SingleProcessRun, load_env, ModelLoader, ArgsProvider, ModelInterface

if __name__ == '__main__':
    trainer = LSTMTrainer()
    runner = SingleProcessRun()
    env, all_args = load_env(os.environ, trainer=trainer, runner=runner)

    GC = env["game"].initialize()

    model = env["model_loaders"][0].load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={"lr": 0.001})
    mi.add_model("actor",
                 model,
                 copy=True,
                 cuda=all_args.gpu is not None,
                 gpu_id=all_args.gpu)

    trainer.setup(sampler=env["sampler"], mi=mi, rl_method=env["method"])

    GC.reg_callback("train", trainer.train)
    GC.reg_callback("actor", trainer.actor)
    runner.setup(GC,
                 episode_summary=trainer.episode_summary,
                 episode_start=trainer.episode_start)