def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my = [] self.cur_ball = [] self.state_dim = 2 # relative ball self.history_size = 2 # frame history size self.action_dim = 2 # 2 self.arglist = Argument() self.state_shape = (self.state_dim * self.history_size, ) # state dimension self.act_space = [Discrete(self.action_dim * 2 + 1)] self.trainers = MADDPGAgentTrainer('agent_moving', self.mlp_model, self.state_shape, self.act_space, 0, self.arglist, local_q_func=False) # for tensorboard self.summary_placeholders, self.update_ops, self.summary_op = \ self.setup_summary() self.summary_writer = \ tf.summary.FileWriter('summary/moving_test', U.get_session().graph) U.initialize() # Load previous results, if necessary if self.arglist.load_dir == "": self.arglist.load_dir = self.arglist.save_dir if self.arglist.restore: print('Loading previous state... %s' % self.arglist.load_dir) U.load_state(self.arglist.load_dir) self.saver = tf.train.Saver(max_to_keep=1100) self.state = np.zeros([self.state_dim * self.history_size ]) # histories self.train_step = 216000 self.wheels = np.zeros(self.number_of_robots * 2) self.action = np.zeros(self.action_dim * 2 + 1) # not np.zeros(2) self.stats_steps = 6000 # for tensorboard self.rwd_sum = 0 self.done = False self.control_idx = 0 return
def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2*info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my_posture = [] self.cur_op_posture = [] self.cur_ball = [] self.pre_ball = [0, 0] self.state_dim = 2 # 3*my robots, relative to the ball position self.history_size = 2 # frame history size self.action_dim = 2 # 2 self.arglist = Argument() self.obs_shape_n = [(self.state_dim * self.history_size,) for _ in range(1)] # state dimenstion self.action_space = [spaces.Discrete(self.action_dim * 2 + 1) for _ in range(1)] self.trainers = self.get_trainers(1, self.obs_shape_n, self.action_space, self.arglist) # for tensorboard self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary() self.summary_writer = tf.summary.FileWriter('summary/aiwc_maddpg', U.get_session().graph) U.initialize() # Load previous results, if necessary if self.arglist.load_dir == "": self.arglist.load_dir = self.arglist.save_dir if self.arglist.display or self.arglist.restore or self.arglist.benchmark: print('Loading previous state...') U.load_state(self.arglist.load_dir) self.final_ep_rewards = [] # sum of rewards for training curve self.final_ep_ag_rewards = [] # agent rewards for training curve self.agent_info = [[[]]] # placeholder for benchmarking info self.saver = tf.train.Saver() self.obs_n = [np.zeros([self.state_dim * self.history_size]) for _ in range(1)] # histories self.train_step = 0 self.wheels = np.zeros(self.number_of_robots*2) self.action_n = [np.zeros(self.action_dim * 2 + 1) for _ in range(1)] self.save_every_steps = 12000 # save the model every 10 minutes self.stats_steps = 6000 # for tensorboard self.reward_sum = 0 self.score_sum = 0 self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]] self.inner_step = 0 self.done = False self.control_idx = 0 return
def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my_posture = [] self.cur_op_posture = [] self.cur_ball = [] self.pre_ball = [0, 0] self.state_dim = 2 # relative ball self.history_size = 2 # frame history size self.action_dim = 2 # 2 self.arglist = Argument() self.obs_shape_n = [(self.state_dim * self.history_size, ) for _ in range(1)] # state dimenstion self.action_space = [ Discrete(self.action_dim * 2 + 1) for _ in range(1) ] self.trainers = self.get_trainers(1, self.obs_shape_n, self.action_space, self.arglist) U.initialize() # Load previous results, if necessary if self.arglist.load_dir == "": self.arglist.load_dir = self.arglist.save_dir if self.arglist.display or self.arglist.restore or self.arglist.benchmark: print('Loading previous state...') U.load_state(self.arglist.load_dir) self.obs_n = [ np.zeros([self.state_dim * self.history_size]) for _ in range(self.number_of_robots) ] # histories self.wheels = np.zeros(self.number_of_robots * 2) self.action_n = [ np.zeros(self.action_dim * 2 + 1) for _ in range(self.number_of_robots) ] # not np.zeros(2) self.distances = [[i for i in range(5)], [i for i in range(5)]] # distances to the ball self.idxs = [[i for i in range(5)], [i for i in range(5)]] self.shoot_plan = [0 for _ in range(self.number_of_robots)] self.deadlock_cnt = 0 self.avoid_deadlock_cnt = 0 self.global_step = 0 return
def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my_posture = [] self.cur_op_posture = [] self.cur_ball = [] self.pre_ball = [0, 0] self.state_dim = 5 # ball, goal, theta self.history_size = 2 # frame history size self.action_dim = 2 # 2 self.arglist = Argument() self.obs_shape_n = [(self.state_dim * self.history_size, ) for _ in range(1)] # state dimenstion self.action_space = [ Discrete(self.action_dim * 2 + 1) for _ in range(1) ] self.trainers = self.get_trainers(1, self.obs_shape_n, self.action_space, self.arglist) U.initialize() # Load previous results, if necessary if self.arglist.load_dir == "": self.arglist.load_dir = self.arglist.save_dir if self.arglist.display or self.arglist.restore or self.arglist.benchmark: print('Loading previous state...') U.load_state(self.arglist.load_dir) self.episode_rewards = [0.0] # sum of rewards for all agents self.agent_rewards = [[0.0] for _ in range(self.number_of_robots) ] # individual agent reward self.final_ep_rewards = [] # sum of rewards for training curve self.final_ep_ag_rewards = [] # agent rewards for training curve self.agent_info = [[[]]] # placeholder for benchmarking info self.obs_n = [ np.zeros([self.state_dim * self.history_size]) for _ in range(self.number_of_robots) ] # histories self.wheels = np.zeros(self.number_of_robots * 2) self.action_n = [ np.zeros(self.action_dim * 2 + 1) for _ in range(self.number_of_robots) ] # not np.zeros(2) return
def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2*info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False self.cur_my_posture = [] self.cur_op_posture = [] self.cur_ball = [] self.pre_ball = [0, 0] self.arglist = Argument() # Create agent trainers self.obs_shape_n = [3 for i in range(1)] self.num_adversaries = 0 self.num_good = 1 self.state_dim = 3 # 3*my robots, relative to the ball position self.history_size = 4 # frame history size self.action_dim = 2 # 2 self.trainers = get_trainers(self.num_adversaries, self.obs_shape_n, self.action_dim, self.arglist) self.agent = DDPGAgent(self.state_dim * self.history_size, self.action_dim, self.max_linear_velocity) self.global_step = 0 # iteration step self.save_every_steps = 12000 # save the model every 10 minutes self.stats_steps = 6000 # for tensorboard self.reward_sum = 0 self.score_sum = 0 self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]] self.inner_step = 0 self.wheels = np.zeros(self.number_of_robots*2) self.history = np.zeros([self.state_dim, self.history_size]) self.action = np.zeros(self.action_dim) return
def init_variables(self, info): # Here you have the information of the game (virtual init() in random_walk.cpp) # List: game_time, goal, number_of_robots, penalty_area, codewords, # robot_height, robot_radius, max_linear_velocity, field, team_info, # {rating, name}, axle_length, resolution, ball_radius # self.game_time = info['game_time'] self.field = info['field'] self.robot_size = 2 * info['robot_radius'] self.goal = info['goal'] self.max_linear_velocity = info['max_linear_velocity'] self.number_of_robots = info['number_of_robots'] self.end_of_frame = False ################################################################## # team info, 5 robots, (x,y,th,active,touch) self.cur_my = [[] for _ in range(self.number_of_robots)] self.cur_ball = [] # ball (x,y) position self.prev_ball = [0., 0.] # previous ball (x,y) position # distance to the ball self.dist_ball = np.zeros(self.number_of_robots) # index for which robot is close to the ball self.idxs = [i for i in range(self.number_of_robots)] self.dlck_cnt = 0 # deadlock count # how many times avoid deadlock function was called self.avoid_dlck_cnt = 0 self.wheels = np.zeros(self.number_of_robots * 2) ################################################################## self.state_dim = 2 # relative ball self.history_size = 2 # frame history size self.action_dim = 2 # 2 # Histories of five robots. self.state = [np.zeros([self.state_dim * self.history_size]) \ for _ in range(self.number_of_robots)] self.arglist = Argument() # state dimension self.state_shape = (self.state_dim * self.history_size, ) self.act_space = [Discrete(self.action_dim * 2 + 1)] self.trainers = MADDPGAgentTrainer('agent_moving', self.mlp_model, self.state_shape, self.act_space, 0, self.arglist, local_q_func=False) ################################################################## self.load_step_list = np.loadtxt('./test_step_list.txt') self.step_idx = 0 # For self.load_step_list # # Load previous results. if self.arglist.restore: self.printConsole('Loading previous state... %d' % \ self.load_step_list[self.step_idx]) U.load_state('./save_model/aiwc_maddpg-%d' % \ self.load_step_list[self.step_idx]) ################################################################## # for tensorboard self.summary_placeholders, self.update_ops, self.summary_op = \ self.setup_summary() self.summary_writer = \ tf.summary.FileWriter('summary/moving_test', U.get_session().graph) ################################################################## self.test_step = 0 self.stats_steps = 12000 # For tensorboard, about 10 minutes self.scr_my = 0. # my team score self.scr_op = 0. # op team score self.scr_sum = 0 # score sum self.reset = False ################################################################## self.cur_time = time.time() # For check time to take return