Example #1
0
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my = []
            self.cur_ball = []

            self.state_dim = 2  # relative ball
            self.history_size = 2  # frame history size
            self.action_dim = 2  # 2

            self.arglist = Argument()
            self.state_shape = (self.state_dim * self.history_size,
                                )  # state dimension
            self.act_space = [Discrete(self.action_dim * 2 + 1)]
            self.trainers = MADDPGAgentTrainer('agent_moving',
                                               self.mlp_model,
                                               self.state_shape,
                                               self.act_space,
                                               0,
                                               self.arglist,
                                               local_q_func=False)

            # for tensorboard
            self.summary_placeholders, self.update_ops, self.summary_op = \
                                                            self.setup_summary()
            self.summary_writer = \
                tf.summary.FileWriter('summary/moving_test', U.get_session().graph)

            U.initialize()

            # Load previous results, if necessary
            if self.arglist.load_dir == "":
                self.arglist.load_dir = self.arglist.save_dir
            if self.arglist.restore:
                print('Loading previous state... %s' % self.arglist.load_dir)
                U.load_state(self.arglist.load_dir)

            self.saver = tf.train.Saver(max_to_keep=1100)

            self.state = np.zeros([self.state_dim * self.history_size
                                   ])  # histories
            self.train_step = 216000
            self.wheels = np.zeros(self.number_of_robots * 2)
            self.action = np.zeros(self.action_dim * 2 + 1)  # not np.zeros(2)

            self.stats_steps = 6000  # for tensorboard
            self.rwd_sum = 0

            self.done = False
            self.control_idx = 0
            return
Example #2
0
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            self.state_dim = 2 # 3*my robots, relative to the ball position
            self.history_size = 2 # frame history size
            self.action_dim = 2 # 2                    
            
            self.arglist = Argument()
            self.obs_shape_n = [(self.state_dim * self.history_size,) for _ in range(1)] # state dimenstion
            self.action_space = [spaces.Discrete(self.action_dim * 2 + 1) for _ in range(1)]
            self.trainers = self.get_trainers(1, self.obs_shape_n, self.action_space, self.arglist)

            # for tensorboard
            self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary()
            self.summary_writer = tf.summary.FileWriter('summary/aiwc_maddpg', U.get_session().graph)

            U.initialize()
            
            # Load previous results, if necessary
            if self.arglist.load_dir == "":
                self.arglist.load_dir = self.arglist.save_dir
            if self.arglist.display or self.arglist.restore or self.arglist.benchmark:
                print('Loading previous state...')
                U.load_state(self.arglist.load_dir)

            self.final_ep_rewards = []  # sum of rewards for training curve
            self.final_ep_ag_rewards = []  # agent rewards for training curve
            self.agent_info = [[[]]]  # placeholder for benchmarking info
            self.saver = tf.train.Saver()
            self.obs_n = [np.zeros([self.state_dim * self.history_size]) for _ in range(1)] # histories
            self.train_step = 0
            self.wheels = np.zeros(self.number_of_robots*2)
            self.action_n = [np.zeros(self.action_dim * 2 + 1) for _ in range(1)]
                   
            self.save_every_steps = 12000 # save the model every 10 minutes
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]   
            self.inner_step = 0

            self.done = False
            self.control_idx = 0
            return
Example #3
0
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            self.state_dim = 2  # relative ball
            self.history_size = 2  # frame history size
            self.action_dim = 2  # 2

            self.arglist = Argument()
            self.obs_shape_n = [(self.state_dim * self.history_size, )
                                for _ in range(1)]  # state dimenstion
            self.action_space = [
                Discrete(self.action_dim * 2 + 1) for _ in range(1)
            ]
            self.trainers = self.get_trainers(1, self.obs_shape_n,
                                              self.action_space, self.arglist)

            U.initialize()

            # Load previous results, if necessary
            if self.arglist.load_dir == "":
                self.arglist.load_dir = self.arglist.save_dir
            if self.arglist.display or self.arglist.restore or self.arglist.benchmark:
                print('Loading previous state...')
                U.load_state(self.arglist.load_dir)

            self.obs_n = [
                np.zeros([self.state_dim * self.history_size])
                for _ in range(self.number_of_robots)
            ]  # histories
            self.wheels = np.zeros(self.number_of_robots * 2)
            self.action_n = [
                np.zeros(self.action_dim * 2 + 1)
                for _ in range(self.number_of_robots)
            ]  # not np.zeros(2)

            self.distances = [[i for i in range(5)],
                              [i for i in range(5)]]  # distances to the ball
            self.idxs = [[i for i in range(5)], [i for i in range(5)]]
            self.shoot_plan = [0 for _ in range(self.number_of_robots)]
            self.deadlock_cnt = 0
            self.avoid_deadlock_cnt = 0
            self.global_step = 0
            return
Example #4
0
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            self.state_dim = 5  # ball, goal, theta
            self.history_size = 2  # frame history size
            self.action_dim = 2  # 2

            self.arglist = Argument()
            self.obs_shape_n = [(self.state_dim * self.history_size, )
                                for _ in range(1)]  # state dimenstion
            self.action_space = [
                Discrete(self.action_dim * 2 + 1) for _ in range(1)
            ]
            self.trainers = self.get_trainers(1, self.obs_shape_n,
                                              self.action_space, self.arglist)

            U.initialize()

            # Load previous results, if necessary
            if self.arglist.load_dir == "":
                self.arglist.load_dir = self.arglist.save_dir
            if self.arglist.display or self.arglist.restore or self.arglist.benchmark:
                print('Loading previous state...')
                U.load_state(self.arglist.load_dir)

            self.episode_rewards = [0.0]  # sum of rewards for all agents
            self.agent_rewards = [[0.0] for _ in range(self.number_of_robots)
                                  ]  # individual agent reward
            self.final_ep_rewards = []  # sum of rewards for training curve
            self.final_ep_ag_rewards = []  # agent rewards for training curve
            self.agent_info = [[[]]]  # placeholder for benchmarking info
            self.obs_n = [
                np.zeros([self.state_dim * self.history_size])
                for _ in range(self.number_of_robots)
            ]  # histories
            self.wheels = np.zeros(self.number_of_robots * 2)
            self.action_n = [
                np.zeros(self.action_dim * 2 + 1)
                for _ in range(self.number_of_robots)
            ]  # not np.zeros(2)
            return
Example #5
0
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2*info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False
            self.cur_my_posture = []
            self.cur_op_posture = []
            self.cur_ball = []
            self.pre_ball = [0, 0]

            self.arglist = Argument()

            # Create agent trainers
            self.obs_shape_n = [3 for i in range(1)]
            self.num_adversaries = 0
            self.num_good = 1
            self.state_dim = 3 # 3*my robots, relative to the ball position
            self.history_size = 4 # frame history size
            self.action_dim = 2 # 2
            self.trainers = get_trainers(self.num_adversaries, self.obs_shape_n, self.action_dim, self.arglist)

            self.agent = DDPGAgent(self.state_dim * self.history_size, self.action_dim, self.max_linear_velocity)       
            self.global_step = 0 # iteration step            
            self.save_every_steps = 12000 # save the model every 10 minutes
 
            self.stats_steps = 6000 # for tensorboard
            self.reward_sum = 0
            self.score_sum = 0 
            self.active_flag = [[False for _ in range(5)], [False for _ in range(5)]]   
            self.inner_step = 0
            self.wheels = np.zeros(self.number_of_robots*2)
            self.history = np.zeros([self.state_dim, self.history_size])
            self.action = np.zeros(self.action_dim)
            return
        def init_variables(self, info):
            # Here you have the information of the game (virtual init() in random_walk.cpp)
            # List: game_time, goal, number_of_robots, penalty_area, codewords,
            #       robot_height, robot_radius, max_linear_velocity, field, team_info,
            #       {rating, name}, axle_length, resolution, ball_radius
            # self.game_time = info['game_time']
            self.field = info['field']
            self.robot_size = 2 * info['robot_radius']
            self.goal = info['goal']
            self.max_linear_velocity = info['max_linear_velocity']
            self.number_of_robots = info['number_of_robots']
            self.end_of_frame = False

            ##################################################################
            # team info, 5 robots, (x,y,th,active,touch)
            self.cur_my = [[] for _ in range(self.number_of_robots)]

            self.cur_ball = []  # ball (x,y) position
            self.prev_ball = [0., 0.]  # previous ball (x,y) position

            # distance to the ball
            self.dist_ball = np.zeros(self.number_of_robots)
            # index for which robot is close to the ball
            self.idxs = [i for i in range(self.number_of_robots)]

            self.dlck_cnt = 0  # deadlock count
            # how many times avoid deadlock function was called
            self.avoid_dlck_cnt = 0

            self.wheels = np.zeros(self.number_of_robots * 2)
            ##################################################################
            self.state_dim = 2  # relative ball
            self.history_size = 2  # frame history size
            self.action_dim = 2  # 2

            # Histories of five robots.
            self.state = [np.zeros([self.state_dim * self.history_size]) \
                                    for _ in range(self.number_of_robots)]

            self.arglist = Argument()

            # state dimension
            self.state_shape = (self.state_dim * self.history_size, )
            self.act_space = [Discrete(self.action_dim * 2 + 1)]
            self.trainers = MADDPGAgentTrainer('agent_moving',
                                               self.mlp_model,
                                               self.state_shape,
                                               self.act_space,
                                               0,
                                               self.arglist,
                                               local_q_func=False)
            ##################################################################
            self.load_step_list = np.loadtxt('./test_step_list.txt')
            self.step_idx = 0  # For self.load_step_list

            # # Load previous results.
            if self.arglist.restore:
                self.printConsole('Loading previous state... %d' % \
                                                self.load_step_list[self.step_idx])
                U.load_state('./save_model/aiwc_maddpg-%d' % \
                                                self.load_step_list[self.step_idx])
            ##################################################################
            # for tensorboard
            self.summary_placeholders, self.update_ops, self.summary_op = \
                                                            self.setup_summary()
            self.summary_writer = \
                tf.summary.FileWriter('summary/moving_test', U.get_session().graph)
            ##################################################################
            self.test_step = 0
            self.stats_steps = 12000  # For tensorboard, about 10 minutes

            self.scr_my = 0.  # my team score
            self.scr_op = 0.  # op team score
            self.scr_sum = 0  # score sum

            self.reset = False
            ##################################################################
            self.cur_time = time.time()  # For check time to take
            return