def __init__(self): self.pg = PolicyGradient() self.discounted_reward = DiscountedReward() self.value_matcher = ValueMatcher() self.args = ArgsProvider( call_from=self, define_args=[ ("fixed_policy", dict(action="store_true")), ("h_match_policy", dict(action="store_true")), ("h_match_action", dict(action="store_true")), ("h_smooth", dict(action="store_true")), ("contrastive_V", dict(action="store_true")), ], more_args=["num_games", "batchsize", "min_prob"], child_providers=[ self.pg.args, self.discounted_reward.args, self.value_matcher.args ], ) self.prediction_loss = nn.SmoothL1Loss().cuda() self.policy_match_loss = nn.SmoothL1Loss().cuda() self.policy_max_action_loss = nn.NLLLoss().cuda() self.rank_loss = nn.MarginRankingLoss().cuda()
def __init__(self): self.args = ArgsProvider( call_from = self, define_args = [ ("num_games", 1024), ("batchsize", 128), # 64 ("game_multi", dict(type=int, default=None)), ("T", 6), # 20 ("eval", dict(action="store_true")), # False ("wait_per_group", dict(action="store_true")), # False ("num_collectors", 0), ("verbose_comm", dict(action="store_true")), # False ("verbose_collector", dict(action="store_true")), # False ("mcts_threads", 0), ("mcts_rollout_per_thread", 1), ("mcts_verbose", dict(action="store_true")), # False ("mcts_save_tree_filename", ""), ("mcts_verbose_time", dict(action="store_true")), # False ("mcts_use_prior", dict(action="store_true")), # False ("mcts_pseudo_games", 0), ("mcts_pick_method", "most_visited"), ], on_get_args = self._on_get_args )
def __init__(self, module): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.module = module basic_define_args = [ ("handicap_level", 0), ("players", dict(type=str, help=";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), # false ("num_frames_in_state", 1), ("max_unit_cmd", 1), ("seed", 0), ("actor_only", dict(action="store_true")), # false ("model_no_spatial", dict(action="store_true")), # TODO, put it to model false ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)), ("gpu", dict(type=int, help="gpu to use", default=None)), ] self.args = ArgsProvider( call_from = self, define_args = basic_define_args + self._define_args(), more_args = ["num_games", "batchsize", "T"], child_providers = [ self.context_args.args, self.more_labels.args ] )
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider(define_args=[ ("handicap_level", 0), ("latest_start", 1000), ("latest_start_decay", 0.7), ("fs_ai", 50), ("fs_opponent", 50), ("ai_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN", "AI_TD_NN" ], default="AI_NN")), ("opponent_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE", "AI_TD_BUILT_IN" ], default="AI_SIMPLE")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("mcts_threads", 64), ("seed", 0), ("simple_ratio", -1), ("ratio_change", 0), ("actor_only", dict(action="store_true")) ], more_args=["batchsize", "T"], child_providers=[self.context_args.args])
def __init__(self, module): self.context_args = ContextArgs() self.module = module self.args = ArgsProvider( call_from=self, define_args=[ ("handicap_level", 0), ("players", dict( type=str, help= ";-separated player infos. For example: type=AI_NN,fs=50,args=backup/AI_SIMPLE|decay/0.99|start/1000,fow=True;type=AI_SIMPLE,fs=50" )), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("shuffle_player", dict(action="store_true")), ("mcts_threads", 64), ("seed", 0), ("actor_only", dict(action="store_true")), ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ("model_no_spatial", dict(action="store_true")), # TODO, put it to model ("save_replay_prefix", dict(type=str, default=None)), ("output_file", dict(type=str, default=None)), ("cmd_dumper_prefix", dict(type=str, default=None)) ], more_args=["batchsize", "T", "gpu"], child_providers=[self.context_args.args])
def __init__(self, args=None): self.args = ArgsProvider( call_from=self, define_args=[], fixed_args=args, ) self.policy_loss = nn.NLLLoss().cuda() self.value_loss = nn.MSELoss().cuda()
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[ ("multipred_no_backprop", dict(action="store_true")), ], ) self.policy_loss = nn.NLLLoss().cuda() self.value_loss = nn.MSELoss().cuda()
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[("num_games", 1024), ("batchsize", 128), ("game_multi", dict(type=int, default=None)), ("T", 6), ("eval", dict(action="store_true")), ("wait_per_group", dict(action="store_true")), ("verbose_comm", dict(action="store_true")), ("verbose_collector", dict(action="store_true"))], on_get_args=self._on_get_args)
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider(call_from=self, define_args=[ ("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")) ], more_args=["batchsize", "T"], child_providers=[self.context_args.args])
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[ ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ])
def __init__(self): self.args = ArgsProvider( call_from=self, define_args=[ ("fixed_policy", dict(action="store_true")), ("h_smooth", dict(action="store_true")), ], more_args=["num_games", "batchsize", "min_prob"], child_providers=[], ) self.prediction_loss = nn.SmoothL1Loss().cuda()
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider(call_from=self, define_args=[ ("frame_skip", 4), ("hist_len", 4), ("rom_file", "pong.bin"), ("actor_only", dict(action="store_true")), ("reward_clip", 1), ("rom_dir", os.path.dirname(__file__)) ], more_args=["batchsize", "T", "env_eval_only"], child_providers=[self.context_args.args])
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider( call_from=self, define_args=[ ("handicap_level", 0), ("latest_start", 1000), ("latest_start_decay", 0.7), ("fs_ai", 50), ("fs_opponent", 50), ("ai_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_NN", "AI_FLAG_NN", "AI_TD_NN" ], default="AI_NN")), ("opponent_type", dict(type=str, choices=[ "AI_SIMPLE", "AI_HIT_AND_RUN", "AI_FLAG_SIMPLE", "AI_TD_BUILT_IN" ], default="AI_SIMPLE")), ("max_tick", dict(type=int, default=30000, help="Maximal tick")), ("mcts_threads", 64), ("seed", 0), ("without_fow", dict(action="store_true")), ("simple_ratio", -1), ("ratio_change", 0), ("actor_only", dict(action="store_true")), ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ("model_no_spatial", dict(action="store_true") ) # TODO, put it to model ], more_args=["batchsize", "T"], child_providers=[self.context_args.args])
def __init__(self): self.context_args = ContextArgs() self.more_labels = MoreLabels() self.args = ArgsProvider( call_from=self, define_args= [("actor_only", dict(action="store_true")), ("list_file", "./train.lst"), ("verbose", dict(action="store_true")), ("data_aug", dict(type=int, default=-1, help="specify data augumentation, 0-7, -1 mean random")), ("ratio_pre_moves", dict(type=float, default=0, help= "how many moves to perform in each thread, before we use the data train the model" )), ("start_ratio_pre_moves", dict( type=float, default=0.5, help= "how many moves to perform in each thread, before we use the first sgf file to train the model" )), ("num_games_per_thread", dict( type=int, default=5, help= "number of concurrent games per threads, used to increase diversity of games" )), ("move_cutoff", dict(type=int, default=-1, help="Cutoff ply in replay")), ("online", dict(action="store_true", help="Set game to online mode")), ("gpu", dict(type=int, default=None))], more_args=["batchsize", "T"], child_providers=[self.context_args.args, self.more_labels.args])
def __init__(self): self.context_args = ContextArgs() self.args = ArgsProvider( call_from=self, define_args=[ ("frame_skip", 4), ("hist_len", 4), ("rom_file", "pong.bin"), ("actor_only", dict(action="store_true")), ("reward_clip", 1), ("rom_dir", os.path.dirname(__file__)), ("additional_labels", dict( type=str, default=None, help= "Add additional labels in the batch. E.g., id,seq,last_terminal" )), ], more_args=["batchsize", "T", "env_eval_only"], child_providers=[self.context_args.args])