def rotate_sticky_actions(sticky_actions_state, config): """Rotate the sticky bits of directional actions. This is used to make a policy believe it is playing from left to right although it is actually playing from right to left. Args: sticky_actions_state: Array of bits corresponding to the active actions. config: config used by the environment Returns: Array of bits corresponding to the same active actions for a player who would play from the opposite side. """ sticky_actions = football_action_set.get_sticky_actions(config) assert len(sticky_actions) == len(sticky_actions_state), len( sticky_actions) action_to_state = {} for i in range(len(sticky_actions)): action_to_state[sticky_actions[i]] = sticky_actions_state[i] rotated_sticky_actions = [] for i in range(len(sticky_actions)): rotated_sticky_actions.append(action_to_state[flip_single_action( sticky_actions[i], config)]) return rotated_sticky_actions
def __init__(self, config): global _unused_engines self._config = config if _unused_engines: self._env = _unused_engines.pop() else: self._env = libgame.GameEnv() self._env.game_config.physics_steps_per_frame = config[ 'physics_steps_per_frame'] self._sticky_actions = football_action_set.get_sticky_actions(config) self._use_rendering_engine = False
def __init__(self, config): global _unused_engines self._config = config self._sticky_actions = football_action_set.get_sticky_actions(config) self._use_rendering_engine = False if _unused_engines: self._env = _unused_engines.pop() else: self._env = self._get_new_env() # Reset is needed here to make sure render() API call before reset() API # call works fine (get/setState makes sure env. config is the same). self.reset(inc=0)
def __init__(self, config): global _unused_engines self._config = config self._sticky_actions = football_action_set.get_sticky_actions(config) self._use_rendering_engine = False if _unused_engines: self._env = _unused_engines.pop() else: self._env = libgame.GameEnv() self._env.game_config.physics_steps_per_frame = config['physics_steps_per_frame'] # Reset is needed here to make sure render() API call before reset() API # call works fine (get/setState makes sure env. config is the same). self.reset(inc=0)
def add_step(self, o): # Write video if requested. if self._video_writer: frame = get_frame(o) frame = frame[..., ::-1] frame = cv2.resize(frame, self._frame_dim, interpolation=cv2.INTER_AREA) writer = TextWriter(frame, self._frame_dim[0] - 300) if self._config['custom_display_stats']: for line in self._config['custom_display_stats']: writer.write(line) if self._config['display_game_stats']: writer.write('SCORE: %d - %d' % (o['score'][0], o['score'][1])) writer.write('BALL OWNED TEAM: %d' % (o['ball_owned_team'])) writer.write('BALL OWNED PLAYER: %d' % (o['ball_owned_player'])) writer.write('REWARD %.4f' % (o['reward'])) writer.write('CUM. REWARD: %.4f' % (o['cumulative_reward'])) writer = TextWriter(frame, 0) writer.write('FRAME: %d' % self._step_cnt) writer.write('TIME: %f' % (o._time - self._last_frame_time)) sticky_actions = football_action_set.get_sticky_actions(self._config) players_info = {} for team in ['left', 'right']: sticky_actions_field = '%s_agent_sticky_actions' % team for player in range(len(o[sticky_actions_field])): assert len(sticky_actions) == len(o[sticky_actions_field][player]) player_idx = o['%s_agent_controlled_player' % team][player] players_info[(team, player_idx)] = {} players_info[(team, player_idx)]['color'] = ( 0, 255, 0) if team == 'left' else (0, 255, 255) players_info[(team, player_idx)]['id'] = 'G' if o[ '%s_team_roles' % team][player_idx] == e_PlayerRole_GK else str(player_idx) active_direction = None for i in range(len(sticky_actions)): if sticky_actions[i]._directional: if o[sticky_actions_field][player][i]: active_direction = sticky_actions[i] else: players_info[(team, player_idx)][sticky_actions[i]._name] = \ o[sticky_actions_field][player][i] # Info about direction players_info[(team, player_idx)]['DIRECTION'] = \ 'O' if active_direction is None else active_direction._name if 'action' in o._trace['debug']: # Info about action players_info[(team, player_idx)]['ACTION'] = \ o['action'][player]._name write_players_state(writer, players_info) if 'baseline' in o._trace['debug']: writer.write('BASELINE: %.5f' % o._trace['debug']['baseline']) if 'logits' in o._trace['debug']: probs = softmax(o._trace['debug']['logits']) action_set = football_action_set.get_action_set(self._config) for action, prob in zip(action_set, probs): writer.write('%s: %.5f' % (action.name, prob), scale_factor=0.5) for d in o._debugs: writer.write(d) self._video_writer.write(frame) # Write the dump. temp_frame = None if 'frame' in o._trace['observation']: temp_frame = o._trace['observation']['frame'] del o._trace['observation']['frame'] # Add config to the first frame for our replay tools to use. if self._step_cnt == 0: o['debug']['config'] = self._config.get_dictionary() six.moves.cPickle.dump(o._trace, self._dump_file) if temp_frame is not None: o._trace['observation']['frame'] = temp_frame self._last_frame_time = o._time self._step_cnt += 1
def write_dump(name, trace, skip_visuals=False, config={}): if not skip_visuals: fd, temp_path = tempfile.mkstemp(suffix='.avi') if HIGH_RES: frame_dim = (1280, 720) fcc = cv2.VideoWriter_fourcc('p', 'n', 'g', ' ') else: fcc = cv2.VideoWriter_fourcc(*'XVID') frame_dim = (800, 600) video = cv2.VideoWriter( temp_path, fcc, constants.PHYSICS_STEPS_PER_SECOND / config['physics_steps_per_frame'], frame_dim) frame_cnt = 0 if len(trace) > 0: time = trace[0]._time for o in trace: frame_cnt += 1 frame = get_frame(o) frame = frame[..., ::-1] frame = cv2.resize(frame, frame_dim, interpolation=cv2.INTER_AREA) if config['display_game_stats']: writer = TextWriter(frame, 950 if HIGH_RES else 500) writer.write('SCORE: %d - %d' % (o['score'][0], o['score'][1])) writer.write('BALL OWNED TEAM: %d' % (o['ball_owned_team'])) writer.write('BALL OWNED PLAYER: %d' % (o['ball_owned_player'])) writer.write('REWARD %.4f' % (o['reward'])) writer.write('CUM. REWARD: %.4f' % (o['cumulative_reward'])) writer = TextWriter(frame, 0) writer.write('FRAME: %d' % frame_cnt) writer.write('TIME: %f' % (o._time - time)) sticky_actions = football_action_set.get_sticky_actions(config) sticky_actions_field = 'left_agent_sticky_actions' if len(o[sticky_actions_field]) == 0: sticky_actions_field = 'right_agent_sticky_actions' assert len(sticky_actions) == len(o[sticky_actions_field][0]) active_direction = None for i in range(len(sticky_actions)): if sticky_actions[i]._directional: if o[sticky_actions_field][0][i]: active_direction = sticky_actions[i] else: writer.write('%s: %d' % (sticky_actions[i]._name, o[sticky_actions_field][0][i])) writer.write('DIRECTION: %s' % ('NONE' if active_direction is None else active_direction._name)) if 'action' in o._trace['debug']: writer.write('ACTION: %s' % (o['action'][0]._name)) if 'baseline' in o._trace['debug']: writer.write('BASELINE: %.5f' % o._trace['debug']['baseline']) if 'logits' in o._trace['debug']: probs = softmax(o._trace['debug']['logits']) action_set = football_action_set.get_action_set(config) for action, prob in zip(action_set, probs): writer.write('%s: %.5f' % (action.name, prob), scale_factor=0.5) for d in o._debugs: writer.write(d) video.write(frame) for frame in o._additional_frames: frame = frame[..., ::-1] frame = cv2.resize(frame, frame_dim, interpolation=cv2.INTER_AREA) video.write(frame) video.release() os.close(fd) try: # For some reason sometimes the file is missing, so the code fails. if WRITE_FILES: shutil.copy2(temp_path, name + '.avi') os.remove(temp_path) except: logging.info(traceback.format_exc()) to_pickle = [] temp_frames = [] for o in trace: if 'frame' in o._trace['observation']: temp_frames.append(o._trace['observation']['frame']) o._trace['observation']['frame'] = REMOVED_FRAME to_pickle.append(o._trace) if WRITE_FILES: with open(name + '.dump', 'wb') as f: six.moves.cPickle.dump(to_pickle, f) for o in trace: if 'frame' in o._trace['observation']: o._trace['observation']['frame'] = temp_frames.pop(0) logging.info('Dump written to %s.dump', name) if not skip_visuals: logging.info('Video written to %s.avi', name) return True
def _retrieve_observation(self): """Constructs observations exposed by the environment. Returns whether game is on or not. """ info = self._env.get_info() if info.done: self._done = True result = {} if self._config['render']: frame = self._env.get_frame() frame = np.frombuffer(frame, dtype=np.uint8) frame = np.reshape(frame, [1280, 720, 3]) frame = np.reshape( np.concatenate( [frame[:, :, 0], frame[:, :, 1], frame[:, :, 2]]), [3, 720, 1280]) frame = np.transpose(frame, [1, 2, 0]) frame = np.flip(frame, 0) result['frame'] = frame result['ball'] = np.array([ info.ball_position[0], info.ball_position[1], info.ball_position[2] ]) # Ball's movement direction represented as [x, y] distance per step. result['ball_direction'] = np.array([ info.ball_direction[0], info.ball_direction[1], info.ball_direction[2] ]) # Ball's rotation represented as [x, y, z] rotation angle per step. result['ball_rotation'] = np.array([ info.ball_rotation[0], info.ball_rotation[1], info.ball_rotation[2] ]) self.convert_players_observation(info.left_team, 'left_team', result) self.convert_players_observation(info.right_team, 'right_team', result) result['left_agent_sticky_actions'] = [] result['left_agent_controlled_player'] = [] result['right_agent_sticky_actions'] = [] result['right_agent_controlled_player'] = [] for i in range(self._scenario_cfg.left_agents): if i >= len(info.left_controllers): result['left_agent_controlled_player'].append(-1) result['left_agent_sticky_actions'].append( np.zeros((len( football_action_set.get_sticky_actions(self._config))), dtype=np.uint8)) continue result['left_agent_controlled_player'].append( info.left_controllers[i].controlled_player) result['left_agent_sticky_actions'].append( np.array(self._left_controllers[i].active_sticky_actions(), dtype=np.uint8)) for i in range(self._scenario_cfg.right_agents): if i >= len(info.right_controllers): result['right_agent_controlled_player'].append(-1) result['right_agent_sticky_actions'].append( np.zeros((len( football_action_set.get_sticky_actions(self._config))), dtype=np.uint8)) continue result['right_agent_controlled_player'].append( info.right_controllers[i].controlled_player) result['right_agent_sticky_actions'].append( np.array(self._right_controllers[i].active_sticky_actions(), dtype=np.uint8)) result['game_mode'] = int(info.game_mode) result['score'] = [info.left_goals, info.right_goals] result['ball_owned_team'] = info.ball_owned_team result['ball_owned_player'] = info.ball_owned_player result['steps_left'] = self._config['game_duration'] - self._step self._observation = result self._info = info return info.is_in_play
def add_step(self, o): # Write video if requested. if self._video_writer: frame = get_frame(o) frame = frame[..., ::-1] frame = cv2.resize(frame, self._frame_dim, interpolation=cv2.INTER_AREA) writer = TextWriter(frame, self._frame_dim[0] - 300) if self._config['custom_display_stats']: for line in self._config['custom_display_stats']: writer.write(line) if self._config['display_game_stats']: writer.write('SCORE: %d - %d' % (o['score'][0], o['score'][1])) writer.write('BALL OWNED TEAM: %d' % (o['ball_owned_team'])) writer.write('BALL OWNED PLAYER: %d' % (o['ball_owned_player'])) writer.write('REWARD %.4f' % (o['reward'])) writer.write('CUM. REWARD: %.4f' % (o['cumulative_reward'])) writer = TextWriter(frame, 0) writer.write('FRAME: %d' % self._step_cnt) writer.write('TIME: %f' % (o._time - self._last_frame_time)) sticky_actions = football_action_set.get_sticky_actions( self._config) sticky_actions_field = 'left_agent_sticky_actions' if len(o[sticky_actions_field]) == 0: sticky_actions_field = 'right_agent_sticky_actions' assert len(sticky_actions) == len(o[sticky_actions_field][0]) active_direction = None for i in range(len(sticky_actions)): if sticky_actions[i]._directional: if o[sticky_actions_field][0][i]: active_direction = sticky_actions[i] else: writer.write('%s: %d' % (sticky_actions[i]._name, o[sticky_actions_field][0][i])) writer.write('DIRECTION: %s' % ('NONE' if active_direction is None else active_direction._name)) if 'action' in o._trace['debug']: writer.write('ACTION: %s' % (o['action'][0]._name)) if 'baseline' in o._trace['debug']: writer.write('BASELINE: %.5f' % o._trace['debug']['baseline']) if 'logits' in o._trace['debug']: probs = softmax(o._trace['debug']['logits']) action_set = football_action_set.get_action_set( self._config) for action, prob in zip(action_set, probs): writer.write('%s: %.5f' % (action.name, prob), scale_factor=0.5) for d in o._debugs: writer.write(d) self._video_writer.write(frame) # Write the dump. temp_frame = None if 'frame' in o._trace['observation']: temp_frame = o._trace['observation']['frame'] del o._trace['observation']['frame'] # Add config to the first frame for our replay tools to use. if self._step_cnt == 0: o['debug']['config'] = self._config.get_dictionary() six.moves.cPickle.dump(o._trace, self._dump_file) if temp_frame is not None: o._trace['observation']['frame'] = temp_frame self._last_frame_time = o._time self._step_cnt += 1
def _retrieve_observation(self): """Constructs observations exposed by the environment. Returns whether game is on or not. """ info = self._env.get_info() if info.done: self._done = True result = {} if self._config['render']: frame = self._env.get_frame() frame = np.frombuffer(frame, dtype=np.uint8) frame = np.reshape(frame, [1280, 720, 3]) frame = np.reshape( np.concatenate( [frame[:, :, 0], frame[:, :, 1], frame[:, :, 2]]), [3, 720, 1280]) frame = np.transpose(frame, [1, 2, 0]) frame = np.flip(frame, 0) result['frame'] = frame result['ball'] = np.array([ info.ball_position[0], info.ball_position[1], info.ball_position[2] ]) # Ball's movement direction represented as [x, y] distance per step. result['ball_direction'] = np.array([ info.ball_direction[0], info.ball_direction[1], info.ball_direction[2] ]) # Ball's rotation represented as [x, y, z] rotation angle per step. result['ball_rotation'] = np.array([ info.ball_rotation[0], info.ball_rotation[1], info.ball_rotation[2] ]) self.convert_players_observation(info.left_team, 'left_team', result) self.convert_players_observation(info.right_team, 'right_team', result) result['left_agent_sticky_actions'] = [] result['left_agent_controlled_player'] = [] result['right_agent_sticky_actions'] = [] result['right_agent_controlled_player'] = [] for i in range(self._scenario_cfg.left_agents): if i >= len(info.left_controllers): result['left_agent_controlled_player'].append(-1) result['left_agent_sticky_actions'].append( np.zeros((len( football_action_set.get_sticky_actions(self._config))), dtype=np.uint8)) continue result['left_agent_controlled_player'].append( info.left_controllers[i].controlled_player) result['left_agent_sticky_actions'].append( np.array(self._left_controllers[i].active_sticky_actions(), dtype=np.uint8)) for i in range(self._scenario_cfg.right_agents): if i >= len(info.right_controllers): result['right_agent_controlled_player'].append(-1) result['right_agent_sticky_actions'].append( np.zeros((len( football_action_set.get_sticky_actions(self._config))), dtype=np.uint8)) continue result['right_agent_controlled_player'].append( info.right_controllers[i].controlled_player) result['right_agent_sticky_actions'].append( np.array(self._right_controllers[i].active_sticky_actions(), dtype=np.uint8)) result['game_mode'] = int(info.game_mode) result['score'] = [info.left_goals, info.right_goals] result['ball_owned_team'] = info.ball_owned_team result['ball_owned_player'] = info.ball_owned_player result['steps_left'] = self._config['game_duration'] - self._step result['relative_position'] = {} for i in range(len(info.object_position_frame)): pos = np.array([ info.object_position_frame[i][0], info.object_position_frame[i][1] ]).astype(int) if i == 0: result['relative_position']['right_goal'] = pos elif i == 1: result['relative_position']['left_goal'] = pos else: result['relative_position'][f'human_{i-2}'] = pos # if not (0 <= pos[0] <= 1280 and 0 <= pos[1] <= 720): # continue # print(f'Object {i} has position {pos}') # frame[pos[1]-10:min(pos[1]+10, 720), pos[0]-10:min(pos[0]+10, 1280), :] = 255 # cv2.imwrite('/home/SENSETIME/zhangzongpu/Documents/Projects/football/{}.jpg'.format(int(time.time())), frame) # print(info.object_position_frame[0].) self._observation = result self._info = info return info.is_in_play