Example #1
0
def test_mock_load_env():
    init_for_test()
    from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider,
                           ModelLoader, model_loader, Sampler, ModelInterface)
    envs = os.environ
    load_module = model_loader.load_module
    defaults = dict()
    overrides = dict()
    num_models = None
    kwargs = {}

    trainer = Trainer()
    runner = SingleProcessRun()

    game = load_module(envs["game"]).Loader()
    model_file = load_module(envs["model_file"])

    if len(model_file.Models[envs["model"]]) == 2:
        model_class, method_class = model_file.Models[envs["model"]]
        sampler_class = Sampler
    else:
        model_class, method_class, sampler_class = model_file.Models[
            envs["model"]]

    defaults.update(getattr(model_file, "Defaults", dict()))
    overrides.update(getattr(model_file, "Overrides", dict()))

    method = method_class()
    sampler = sampler_class()
    mi = ModelInterface()

    # You might want multiple models loaded.
    if num_models is None:
        model_loaders = [ModelLoader(model_class)]
    else:
        model_loaders = [
            ModelLoader(model_class, model_idx=i) for i in range(num_models)
        ]

    env = dict(game=game,
               method=method,
               sampler=sampler,
               model_loaders=model_loaders,
               mi=mi)
    env.update(kwargs)

    parser = argparse.ArgumentParser()
    # 模拟命令行
    cmd_key = 'save_replay_prefix'
    cmd_v = '~/log/elf/'
    cmd_line = [f'--{cmd_key}', cmd_v]
    all_args = ArgsProvider.Load(parser,
                                 env,
                                 cmd_line=cmd_line,
                                 global_defaults=defaults,
                                 global_overrides=overrides)
    assert all_args[cmd_key] == cmd_v
    assert 'game' in env.keys()
Example #2
0
                                   "terminal"
                               ])),
                    reply=None)


nIter = 5000
elapsed_wait_only = 0

import pickle
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader])

    def actor(batch):
        '''
        import pdb
        pdb.set_trace()
        pickle.dump(utils_elf.to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2)
        '''
        return dict(a=[0] * batch["s"].size(1))

    GC = loader.initialize()
    GC.reg_callback("actor", actor)

    before = datetime.now()
    GC.Start()
Example #3
0
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)


cmd_line = "--num_games 16 --batchsize 4 --hist_len 1 --frame_skip 4 --actor_only"

nIter = 5000
elapsed_wait_only = 0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line.split(" "))

    GC = loader.initialize()

    def actor(sel, sel_gpu):
        # pickle.dump(to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2)
        return dict(a=[0] * sel[0]["s"].size(0))

    GC.reg_callback("actor", actor)

    reward_dist = Counter()

    before = datetime.now()
    GC.Start()

    import tqdm
Example #4
0
File: game.py Project: xdcesc/ELF
nIter = 5000
elapsed_wait_only = 0

import pickle
import random

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    if len(sys.argv) > 1:
        cmd_line = sys.argv[1:]
    else:
        cmd_line = cmd_line.split(" ")

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line)

    GC = loader.initialize()

    actor_count = 0
    train_count = 0

    def actor(batch):
        global actor_count, GC
        actor_count += 1
        batchsize = sel["s"].size(1)
        actions = [
            random.randint(0, GC.params["num_action"] - 1)
            for i in range(batchsize)
        ]
        reply = dict(a=actions)
Example #5
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    model_file = load_module(os.environ["model_file"])
    model_class, method_class = model_file.Models[os.environ["model"]]
    model_loader = ModelLoader(model_class)

    game = load_module(os.environ["game"]).Loader()
    game.args.set_override(actor_only=True, game_multi=2)
    sampler = Sampler()
    evaluator = Evaluator(stats=False)

    eval_iters = EvalIters()

    args = ArgsProvider.Load(parser, [ game, sampler, evaluator, model_loader, eval_iters ])

    GC = game.initialize()
    GC.setup_gpu(args.gpu)

    model = model_loader.load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={ "lr" : 0.001})
    mi.add_model("actor", model, copy=True, cuda=True, gpu_id=args.gpu)

    def actor(batch):
        reply = evaluator.actor(batch)
        '''
        s = batch["s"][0][0]
        seq = batch["seq"][0][0]
        for i in range(s.size(0)):
Example #6
0
        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            T = args.T,
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

elapsed_wait_only = 0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--num_iter", type=int, default=5000)

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader], global_overrides=dict(additional_labels="move_idx,game_record_idx"))

    GC = loader.initialize()

    import torch
    nbin = 10
    board_size = GC["params"]["board_size"]
    stats = torch.FloatTensor(nbin, board_size, board_size)
    counts = torch.FloatTensor(10)

    game_records_visited = Counter()

    our_idx = GC.params["our_stone_plane"]
    opp_idx = GC.params["opponent_stone_plane"]

    def train(batch):
Example #7
0
                    if recorded_a != actual_a:
                        self._debug(
                            "%s Action was different. recorded %d, actual %d" %
                            (prompt, recorded_a, actual_a))

            # Overlapped by 1.
            self.id2seqs_train[id] = last_seq - 1


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    game = load_module(os.environ["game"]).Loader()
    collector = StatsCollector()
    runner = SingleProcessRun()

    args_providers = [game, runner]

    all_args = ArgsProvider.Load(parser, args_providers)

    GC = game.initialize()
    # GC.setup_gpu(0)
    collector.set_params(GC.params)

    GC.reg_callback("actor", collector.actor)
    GC.reg_callback("train", collector.train)
    GC.reg_sig_int()

    runner.setup(GC)
    runner.run()