def __init__(self, opt): self.game_actions = DotDic({'NOTHING': 0, 'TELL': 1}) self.game_states = DotDic({ 'OUTSIDE': 0, 'INSIDE': 1, }) self.opt = opt # Set game defaults opt_game_default = DotDic({ 'game_action_space': 2, 'game_reward_shift': 0, 'game_comm_bits': 1, 'game_comm_sigma': 2 }) for k in opt_game_default: if k not in self.opt: self.opt[k] = opt_game_default[k] self.opt.nsteps = 4 * self.opt.game_nagents - 6 self.reward_all_live = 1 self.reward_all_die = -1 self.reset()
def create_episode(self): opt = self.opt episode = DotDic({}) episode.steps = torch.zeros(opt.bs).int() episode.ended = torch.zeros(opt.bs).int() episode.r = torch.zeros(opt.bs, opt.game_nagents).float() episode.step_records = [] return episode
def __init__(self, opt): self.step_count = 0 self.game_actions = DotDic({'NOTHING': 1, 'TELL': 2}) self.game_states = DotDic({ 'OUTSIDE': 0, 'INSIDE': 1, }) self.opt = opt self.reward_all_live = 1 self.reward_all_die = -1 self.step_count = 0 self.reward = torch.zeros(self.opt["bs"], self.opt["game_nagents"]) self.has_been = torch.zeros(self.opt["bs"], self.opt["nsteps"], self.opt["game_nagents"]) self.terminal = torch.zeros(self.opt["bs"], dtype=torch.long) self.active_agent = torch.zeros(self.opt["bs"], self.opt["nsteps"], dtype=torch.long) # 1-indexed agents self.reset()
def main(unused_arg): opt = DotDic(json.loads(open(FLAGS.config_path, 'r').read())) result_path = None if FLAGS.results_path: result_path = FLAGS.config_path and os.path.join(FLAGS.results_path, Path(FLAGS.config_path).stem) or \ os.path.join(FLAGS.results_path, 'result-', datetime.datetime.now().isoformat()) for i in range(FLAGS.ntrials): trial_result_path = None if result_path: trial_result_path = result_path + '_' + str( i + FLAGS.start_index) + '.csv' trial_opt = copy.deepcopy(opt) run_trial(trial_opt, result_path=trial_result_path, verbose=FLAGS.verbose)
def __init__(self, opt, size): self.opt = opt self.game_actions = DotDic({ 'NOTHING': 0, 'UP': 1, 'DOWN': 2, 'LEFT': 3, 'RIGHT': 4 }) if self.opt.game_action_space != len(self.game_actions): raise ValueError( "Config action space doesn't match game's ({} != {}).".format( self.opt.game_action_space, len(self.game_actions))) self.H = size[0] self.W = size[1] self.goal_reward = 10 self.reset()
def __init__(self, opt): self.opt = opt if opt.bs is not 1: raise NotImplementedError() # Set game defaults opt_game_default = DotDic({ 'render': True, 'feature_screen_size': 48, 'feature_minimap_size': 48, 'rgb_screen_size': None, 'rgb_minimap_size': None, 'action_space':"RAW", 'use_feature_units':True, 'use_raw_units':True, 'disable_fog':True, 'max_agent_step':0, 'game_steps_per_episode':None, 'max_episodes':0, 'step_mul':4, 'agent':'pysc2.agents.random_agent.RandomAgent', 'agent_name':None, 'agent_race':'random', 'agent2':'Bot', 'agent2_name':None, 'agent2_race':'random', 'difficulty':'very_easy', 'bot_build':'random', 'save_replay':False, 'map':'1', 'battle_net_map':False }) for k in opt_game_default: if k not in self.opt: self.opt[k] = opt_game_default[k] self.env = make_env(self.opt) self.reset()
def get_stats(self, steps): stats = DotDic({}) stats.god_reward = self.god_strategy_reward(steps) return stats
def create_step_record(self): opt = self.opt record = DotDic({}) record.s_t = None record.r_t = torch.zeros(opt.bs, opt.game_nagents) record.terminal = torch.zeros(opt.bs) record.agent_inputs = [] # Track actions at time t per agent record.a_t = torch.zeros(opt.bs, opt.game_nagents, dtype=torch.long) if not opt.model_dial: record.a_comm_t = torch.zeros(opt.bs, opt.game_nagents, dtype=torch.long) # Track messages sent at time t per agent if opt.comm_enabled: comm_dtype = opt.model_dial and torch.float or torch.long comm_dtype = torch.float record.comm = torch.zeros(opt.bs, opt.game_nagents, opt.game_comm_bits, dtype=comm_dtype) if opt.model_dial and opt.model_target: record.comm_target = record.comm.clone() # Track hidden state per time t per agent record.hidden = torch.zeros(opt.game_nagents, opt.model_rnn_layers, opt.bs, opt.model_rnn_size) record.hidden_target = torch.zeros(opt.game_nagents, opt.model_rnn_layers, opt.bs, opt.model_rnn_size) # Track Q(a_t) and Q(a_max_t) per agent record.q_a_t = torch.zeros(opt.bs, opt.game_nagents) record.q_a_max_t = torch.zeros(opt.bs, opt.game_nagents) # Track Q(m_t) and Q(m_max_t) per agent if not opt.model_dial: record.q_comm_t = torch.zeros(opt.bs, opt.game_nagents) record.q_comm_max_t = torch.zeros(opt.bs, opt.game_nagents) return record
'--ntrials', type=int, default=1, help='number of trials to run') parser.add_argument('-s', '--start_index', type=int, default=0, help='starting index for trial output') parser.add_argument('-v', '--verbose', action='store_true', help='prints training epoch rewards if set') args = parser.parse_args() opt = DotDic(json.loads(open(args.config_path, 'r').read())) result_path = None if args.results_path: result_path = args.config_path and os.path.join(args.results_path, Path(args.config_path).stem) or \ os.path.join(args.results_path, 'result-', datetime.datetime.now().isoformat()) for i in range(args.ntrials): trial_result_path = None if result_path: trial_result_path = result_path + '_' + str( i + args.start_index) + '.csv' trial_opt = copy.deepcopy(opt) run_trial(trial_opt, result_path=trial_result_path, verbose=args.verbose)
from envs.grid_game_flat import GridGame from utils.dotdic import DotDic import json import torch opt = DotDic(json.loads(open('config/grid_3_dial.json', 'r').read())) opt.bs = 3 opt.game_nagents = 4 opt.game_action_space_total = 6 g = GridGame(opt, (4, 4)) g.show(vid=False) u = torch.zeros((opt.bs, opt.game_nagents)) + 4 g.get_reward(u) g.show(vid=False) # print(g.get_action_range(None, None))