Ejemplo n.º 1
0
    def __init__(self):
        self.args = ArgsProvider(
            call_from = self,
            define_args = [
                ("num_games", 1024),
                ("batchsize", 128),  # 64
                ("game_multi", dict(type=int, default=None)),  
                ("T", 6),  # 20
                ("eval", dict(action="store_true")),  # False
                ("wait_per_group", dict(action="store_true")), # False
                ("num_collectors", 0), 
                ("verbose_comm", dict(action="store_true")), # False
                ("verbose_collector", dict(action="store_true")), # False
                ("mcts_threads", 0),  
                ("mcts_rollout_per_thread", 1),
                ("mcts_verbose", dict(action="store_true")), # False
                ("mcts_save_tree_filename", ""), 
                ("mcts_verbose_time", dict(action="store_true")), # False

                ("mcts_use_prior", dict(action="store_true")), # False
                ("mcts_pseudo_games", 0), 
                ("mcts_pick_method", "most_visited"),
            ],
            on_get_args = self._on_get_args
        )
Ejemplo n.º 2
0
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(define_args=[
            ("handicap_level", 0), ("latest_start", 1000),
            ("latest_start_decay", 0.7), ("fs_ai", 50), ("fs_opponent", 50),
            ("ai_type",
             dict(type=str,
                  choices=[
                      "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN",
                      "AI_TD_NN"
                  ],
                  default="AI_NN")),
            ("opponent_type",
             dict(type=str,
                  choices=[
                      "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE",
                      "AI_TD_BUILT_IN"
                  ],
                  default="AI_SIMPLE")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("mcts_threads", 64), ("seed", 0), ("simple_ratio", -1),
            ("ratio_change", 0), ("actor_only", dict(action="store_true"))
        ],
                                 more_args=["batchsize", "T"],
                                 child_providers=[self.context_args.args])
Ejemplo n.º 3
0
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.module = module

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("handicap_level", 0),
                ("players",
                 dict(
                     type=str,
                     help=
                     ";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50"
                 )),
                ("max_tick", dict(type=int, default=30000,
                                  help="Maximal tick")),
                ("shuffle_player", dict(action="store_true")),
                ("mcts_threads", 64),
                ("seed", 0),
                ("actor_only", dict(action="store_true")),
                ("additional_labels",
                 dict(
                     type=str,
                     default=None,
                     help=
                     "Add additional labels in the batch. E.g., id,seq,last_terminal"
                 )),
                ("model_no_spatial",
                 dict(action="store_true")),  # TODO, put it to model
                ("save_replay_prefix", dict(type=str, default=None)),
                ("output_file", dict(type=str, default=None)),
                ("cmd_dumper_prefix", dict(type=str, default=None))
            ],
            more_args=["batchsize", "T", "gpu"],
            child_providers=[self.context_args.args])
Ejemplo n.º 4
0
    def __init__(self, module):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()
        self.module = module

        basic_define_args = [
            ("handicap_level", 0),  
            ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")),
            ("max_tick", dict(type=int, default=30000, help="Maximal tick")),
            ("shuffle_player", dict(action="store_true")),    # false
            ("num_frames_in_state", 1),    
            ("max_unit_cmd", 1),
            ("seed", 0),
            ("actor_only", dict(action="store_true")),   # false
            ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false
            ("save_replay_prefix", dict(type=str, default=None)),  
            ("output_file", dict(type=str, default=None)),         
            ("cmd_dumper_prefix", dict(type=str, default=None)),
            ("gpu", dict(type=int, help="gpu to use", default=None)),
        ]

        self.args = ArgsProvider(
            call_from = self,
            define_args = basic_define_args + self._define_args(),
            more_args = ["num_games", "batchsize", "T"],
            child_providers = [ self.context_args.args, self.more_labels.args ]
        )
Ejemplo n.º 5
0
    def __init__(self):
        self.pg = PolicyGradient()
        self.discounted_reward = DiscountedReward()
        self.value_matcher = ValueMatcher()

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("fixed_policy", dict(action="store_true")),
                ("h_match_policy", dict(action="store_true")),
                ("h_match_action", dict(action="store_true")),
                ("h_smooth", dict(action="store_true")),
                ("contrastive_V", dict(action="store_true")),
            ],
            more_args=["num_games", "batchsize", "min_prob"],
            child_providers=[
                self.pg.args, self.discounted_reward.args,
                self.value_matcher.args
            ],
        )

        self.prediction_loss = nn.SmoothL1Loss().cuda()
        self.policy_match_loss = nn.SmoothL1Loss().cuda()
        self.policy_max_action_loss = nn.NLLLoss().cuda()
        self.rank_loss = nn.MarginRankingLoss().cuda()
Ejemplo n.º 6
0
def test_mock_load_env():
    init_for_test()
    from rlpytorch import (Trainer, SingleProcessRun, ArgsProvider,
                           ModelLoader, model_loader, Sampler, ModelInterface)
    envs = os.environ
    load_module = model_loader.load_module
    defaults = dict()
    overrides = dict()
    num_models = None
    kwargs = {}

    trainer = Trainer()
    runner = SingleProcessRun()

    game = load_module(envs["game"]).Loader()
    model_file = load_module(envs["model_file"])

    if len(model_file.Models[envs["model"]]) == 2:
        model_class, method_class = model_file.Models[envs["model"]]
        sampler_class = Sampler
    else:
        model_class, method_class, sampler_class = model_file.Models[
            envs["model"]]

    defaults.update(getattr(model_file, "Defaults", dict()))
    overrides.update(getattr(model_file, "Overrides", dict()))

    method = method_class()
    sampler = sampler_class()
    mi = ModelInterface()

    # You might want multiple models loaded.
    if num_models is None:
        model_loaders = [ModelLoader(model_class)]
    else:
        model_loaders = [
            ModelLoader(model_class, model_idx=i) for i in range(num_models)
        ]

    env = dict(game=game,
               method=method,
               sampler=sampler,
               model_loaders=model_loaders,
               mi=mi)
    env.update(kwargs)

    parser = argparse.ArgumentParser()
    # 模拟命令行
    cmd_key = 'save_replay_prefix'
    cmd_v = '~/log/elf/'
    cmd_line = [f'--{cmd_key}', cmd_v]
    all_args = ArgsProvider.Load(parser,
                                 env,
                                 cmd_line=cmd_line,
                                 global_defaults=defaults,
                                 global_overrides=overrides)
    assert all_args[cmd_key] == cmd_v
    assert 'game' in env.keys()
Ejemplo n.º 7
0
    def __init__(self, args=None):
        self.args = ArgsProvider(
            call_from=self,
            define_args=[],
            fixed_args=args,
        )

        self.policy_loss = nn.NLLLoss().cuda()
        self.value_loss = nn.MSELoss().cuda()
Ejemplo n.º 8
0
    def __init__(self):
        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("multipred_no_backprop", dict(action="store_true")),
            ],
        )

        self.policy_loss = nn.NLLLoss().cuda()
        self.value_loss = nn.MSELoss().cuda()
Ejemplo n.º 9
0
 def __init__(self):
     self.args = ArgsProvider(
         call_from=self,
         define_args=[("num_games", 1024), ("batchsize", 128),
                      ("game_multi", dict(type=int, default=None)),
                      ("T", 6), ("eval", dict(action="store_true")),
                      ("wait_per_group", dict(action="store_true")),
                      ("verbose_comm", dict(action="store_true")),
                      ("verbose_collector", dict(action="store_true"))],
         on_get_args=self._on_get_args)
Ejemplo n.º 10
0
Archivo: game.py Proyecto: snowfeet/ELF
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(call_from=self,
                                 define_args=[
                                     ("actor_only", dict(action="store_true")),
                                     ("list_file", "./train.lst"),
                                     ("verbose", dict(action="store_true"))
                                 ],
                                 more_args=["batchsize", "T"],
                                 child_providers=[self.context_args.args])
Ejemplo n.º 11
0
    def __init__(self):
        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("fixed_policy", dict(action="store_true")),
                ("h_smooth", dict(action="store_true")),
            ],
            more_args=["num_games", "batchsize", "min_prob"],
            child_providers=[],
        )

        self.prediction_loss = nn.SmoothL1Loss().cuda()
Ejemplo n.º 12
0
 def __init__(self):
     self.args = ArgsProvider(
         call_from=self,
         define_args=[
             ("additional_labels",
              dict(
                  type=str,
                  default=None,
                  help=
                  "Add additional labels in the batch. E.g., id,seq,last_terminal"
              )),
         ])
Ejemplo n.º 13
0
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(call_from=self,
                                 define_args=[
                                     ("frame_skip", 4), ("hist_len", 4),
                                     ("rom_file", "pong.bin"),
                                     ("actor_only", dict(action="store_true")),
                                     ("reward_clip", 1),
                                     ("rom_dir", os.path.dirname(__file__))
                                 ],
                                 more_args=["batchsize", "T", "env_eval_only"],
                                 child_providers=[self.context_args.args])
Ejemplo n.º 14
0
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("handicap_level", 0),
                ("latest_start", 1000),
                ("latest_start_decay", 0.7),
                ("fs_ai", 50),
                ("fs_opponent", 50),
                ("ai_type",
                 dict(type=str,
                      choices=[
                          "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN",
                          "AI_TD_NN"
                      ],
                      default="AI_NN")),
                ("opponent_type",
                 dict(type=str,
                      choices=[
                          "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE",
                          "AI_TD_BUILT_IN"
                      ],
                      default="AI_SIMPLE")),
                ("max_tick", dict(type=int, default=30000,
                                  help="Maximal tick")),
                ("mcts_threads", 64),
                ("seed", 0),
                ("without_fow", dict(action="store_true")),
                ("simple_ratio", -1),
                ("ratio_change", 0),
                ("actor_only", dict(action="store_true")),
                ("additional_labels",
                 dict(
                     type=str,
                     default=None,
                     help=
                     "Add additional labels in the batch. E.g., id,seq,last_terminal"
                 )),
                ("model_no_spatial", dict(action="store_true")
                 )  # TODO, put it to model
            ],
            more_args=["batchsize", "T"],
            child_providers=[self.context_args.args])
Ejemplo n.º 15
0
Archivo: game.py Proyecto: xdcesc/ELF
    def __init__(self):
        self.context_args = ContextArgs()
        self.more_labels = MoreLabels()

        self.args = ArgsProvider(
            call_from=self,
            define_args=
            [("actor_only", dict(action="store_true")),
             ("list_file", "./train.lst"),
             ("verbose", dict(action="store_true")),
             ("data_aug",
              dict(type=int,
                   default=-1,
                   help="specify data augumentation, 0-7, -1 mean random")),
             ("ratio_pre_moves",
              dict(type=float,
                   default=0,
                   help=
                   "how many moves to perform in each thread, before we use the data train the model"
                   )),
             ("start_ratio_pre_moves",
              dict(
                  type=float,
                  default=0.5,
                  help=
                  "how many moves to perform in each thread, before we use the first sgf file to train the model"
              )),
             ("num_games_per_thread",
              dict(
                  type=int,
                  default=5,
                  help=
                  "number of concurrent games per threads, used to increase diversity of games"
              )),
             ("move_cutoff",
              dict(type=int, default=-1, help="Cutoff ply in replay")),
             ("online",
              dict(action="store_true", help="Set game to online mode")),
             ("gpu", dict(type=int, default=None))],
            more_args=["batchsize", "T"],
            child_providers=[self.context_args.args, self.more_labels.args])
Ejemplo n.º 16
0
    def __init__(self):
        self.context_args = ContextArgs()

        self.args = ArgsProvider(
            call_from=self,
            define_args=[
                ("frame_skip", 4),
                ("hist_len", 4),
                ("rom_file", "pong.bin"),
                ("actor_only", dict(action="store_true")),
                ("reward_clip", 1),
                ("rom_dir", os.path.dirname(__file__)),
                ("additional_labels",
                 dict(
                     type=str,
                     default=None,
                     help=
                     "Add additional labels in the batch. E.g., id,seq,last_terminal"
                 )),
            ],
            more_args=["batchsize", "T", "env_eval_only"],
            child_providers=[self.context_args.args])
Ejemplo n.º 17
0
                                   "terminal"
                               ])),
                    reply=None)


nIter = 5000
elapsed_wait_only = 0

import pickle
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader])

    def actor(batch):
        '''
        import pdb
        pdb.set_trace()
        pickle.dump(utils_elf.to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2)
        '''
        return dict(a=[0] * batch["s"].size(1))

    GC = loader.initialize()
    GC.reg_callback("actor", actor)

    before = datetime.now()
    GC.Start()
Ejemplo n.º 18
0
        params["hist_len"] = args.hist_len
        params["T"] = args.T

        return GCWrapper(GC, co, desc, use_numpy=False, params=params)


cmd_line = "--num_games 16 --batchsize 4 --hist_len 1 --frame_skip 4 --actor_only"

nIter = 5000
elapsed_wait_only = 0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line.split(" "))

    GC = loader.initialize()

    def actor(sel, sel_gpu):
        # pickle.dump(to_numpy(sel), open("tmp%d.bin" % k, "wb"), protocol=2)
        return dict(a=[0] * sel[0]["s"].size(0))

    GC.reg_callback("actor", actor)

    reward_dist = Counter()

    before = datetime.now()
    GC.Start()

    import tqdm
Ejemplo n.º 19
0
Archivo: game.py Proyecto: xdcesc/ELF
nIter = 5000
elapsed_wait_only = 0

import pickle
import random

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    if len(sys.argv) > 1:
        cmd_line = sys.argv[1:]
    else:
        cmd_line = cmd_line.split(" ")

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader], cmd_line=cmd_line)

    GC = loader.initialize()

    actor_count = 0
    train_count = 0

    def actor(batch):
        global actor_count, GC
        actor_count += 1
        batchsize = sel["s"].size(1)
        actions = [
            random.randint(0, GC.params["num_action"] - 1)
            for i in range(batchsize)
        ]
        reply = dict(a=actions)
Ejemplo n.º 20
0
        params.update(dict(
            num_group = 1 if args.actor_only else 2,
            T = args.T,
        ))

        return GCWrapper(GC, co, desc, gpu=args.gpu, use_numpy=False, params=params)

elapsed_wait_only = 0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("--num_iter", type=int, default=5000)

    loader = Loader()
    args = ArgsProvider.Load(parser, [loader], global_overrides=dict(additional_labels="move_idx,game_record_idx"))

    GC = loader.initialize()

    import torch
    nbin = 10
    board_size = GC["params"]["board_size"]
    stats = torch.FloatTensor(nbin, board_size, board_size)
    counts = torch.FloatTensor(10)

    game_records_visited = Counter()

    our_idx = GC.params["our_stone_plane"]
    opp_idx = GC.params["opponent_stone_plane"]

    def train(batch):
Ejemplo n.º 21
0
                    if recorded_a != actual_a:
                        self._debug(
                            "%s Action was different. recorded %d, actual %d" %
                            (prompt, recorded_a, actual_a))

            # Overlapped by 1.
            self.id2seqs_train[id] = last_seq - 1


if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    game = load_module(os.environ["game"]).Loader()
    collector = StatsCollector()
    runner = SingleProcessRun()

    args_providers = [game, runner]

    all_args = ArgsProvider.Load(parser, args_providers)

    GC = game.initialize()
    # GC.setup_gpu(0)
    collector.set_params(GC.params)

    GC.reg_callback("actor", collector.actor)
    GC.reg_callback("train", collector.train)
    GC.reg_sig_int()

    runner.setup(GC)
    runner.run()
Ejemplo n.º 22
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    model_file = load_module(os.environ["model_file"])
    model_class, method_class = model_file.Models[os.environ["model"]]
    model_loader = ModelLoader(model_class)

    game = load_module(os.environ["game"]).Loader()
    game.args.set_override(actor_only=True, game_multi=2)
    sampler = Sampler()
    evaluator = Evaluator(stats=False)

    eval_iters = EvalIters()

    args = ArgsProvider.Load(parser, [ game, sampler, evaluator, model_loader, eval_iters ])

    GC = game.initialize()
    GC.setup_gpu(args.gpu)

    model = model_loader.load_model(GC.params)
    mi = ModelInterface()
    mi.add_model("model", model, optim_params={ "lr" : 0.001})
    mi.add_model("actor", model, copy=True, cuda=True, gpu_id=args.gpu)

    def actor(batch):
        reply = evaluator.actor(batch)
        '''
        s = batch["s"][0][0]
        seq = batch["seq"][0][0]
        for i in range(s.size(0)):