def start(self): _features = features.features_from_game_info( self.controller.game_info()) i = 0 while i < self.info.game_duration_loops: i += self.skip self.controller.step(self.step_mul) obs = self.controller.observe() try: agent_obs = _features.transform_obs(obs) except: pass if obs.player_result: self._state = StepType.LAST discount = 0 else: discount = self.discount self._episode_steps += self.step_mul step = TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) self.agent.step(step, obs.actions) if obs.player_result: break self._state = StepType.MID self.save_data()
def start(self): _features = features.Features(self.controller.game_info()) while True: self.controller.step(self.step_mul) obs = self.controller.observe() agent_obs = _features.transform_obs(obs.observation) if obs.player_result: # Episide over. self._state = StepType.LAST discount = 0 else: discount = self.discount self._episode_steps += self.step_mul step = TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) self.agent.step(step, obs.actions) if obs.player_result: break self._state = StepType.MID
def test_one_input(self): d = 48 # These shapes are what is actually returned from environment dummy_obs = { "screen": np.zeros((16, d, d), dtype="int32"), "minimap": np.zeros((7, d, d), dtype="int32"), "available_actions": np.arange(10) } dummy_ts = TimeStep(StepType.MID, 0.0, 0.0, dummy_obs) p = ObsProcesser() assert p.process_one_input(dummy_ts)["screen_numeric"].shape == ( ObsProcesser.N_SCREEN_CHANNELS, d, d) assert p.process_one_input(dummy_ts)["minimap_numeric"].shape == ( ObsProcesser.N_MINIMAP_CHANNELS, d, d) n_screen_scalar_features = len( [k for k in SCREEN_FEATURES if k.type == FeatureType.SCALAR]) total_screen_dim = n_screen_scalar_features + 3 + 1 # binary flags + visibility_flag assert total_screen_dim == ObsProcesser.N_SCREEN_CHANNELS n_screen_minimap_features = len( [k for k in MINIMAP_FEATURES if k.type == FeatureType.SCALAR]) total_minimap_dim = n_screen_minimap_features + 3 + 1 assert total_minimap_dim == ObsProcesser.N_MINIMAP_CHANNELS
def start(self, replay_file_path): self.openReplay(replay_file_path) _features = features.Features(self.controller.game_info()) while True: self.controller.step(self.step_mul) obs = self.controller.observe() agent_obs = _features.transform_obs(obs.observation) if obs.player_result: self._state = StepType.LAST discount = 0 else: discount = self.discount self._episode_steps += self.step_mul step = TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) self.agent.step(step, obs.actions, self._state == StepType.LAST) if obs.player_result: break self._state = StepType.MID self.closeReplay()
def start(self): _features = features.Features(self.controller.game_info()) frames = random.sample( np.arange(self.info.game_duration_loops).tolist(), self.info.game_duration_loops) # frames = frames[0 : min(self.frames_per_game, self.info.game_duration_loops)] step_mul = 10 frames = frames[0:int(self.info.game_duration_loops) // step_mul] frames.sort() last_frame = 0 i = 0 # for frame in frames: skips = step_mul while i < self.info.game_duration_loops: # skips = frame - last_frame # last_frame = frame i += skips self.controller.step(skips) obs = self.controller.observe() agent_obs = _features.transform_obs(obs.observation) if obs.player_result: # Episode over. self._state = StepType.LAST discount = 0 else: discount = self.discount self._episode_steps += skips step = TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) self.agent.step(step, obs.actions, self.info, _features) if obs.player_result: break self._state = StepType.MID print("Saving data") #print(self.agent.states) pickle.dump({ "info": self.info, "state": self.agent.states }, open("data/" + self.replay_file_name + ".p", "wb")) print("Data successfully saved") self.agent.states = [] print("Data flushed") print("Done")
def convert_step(timestep: TimeStep) -> Tuple[Any, float, bool, Dict]: obs = timestep.observation["feature_screen"][SCREEN_FEATURES.player_relative.index] obs = obs.view(type=np.ndarray) # Get a standard ndarray view instead of pysc2's subclass (NamedNumpyArray) # Reshape from (84, 84) to (84, 84, 1). '...' is for slicing higher-dimensional data structures and means # insert as many full slices (:) to extend the multi-dimensional slice to all dimensions. # Ref: https://stackoverflow.com/questions/118370/how-do-you-use-the-ellipsis-slicing-syntax-in-python obs = obs[..., np.newaxis] done = timestep.last() info = {} return obs, timestep.reward, done, info
def train(): env_args = dict( map_name=FLAGS.map_name, step_mul=FLAGS.step_mul, game_steps_per_episode=0, screen_size_px=(FLAGS.resolution,) * 2, minimap_size_px=(FLAGS.resolution,) * 2, visualize=FLAGS.visualize ) max_frames = FLAGS.frames * 1e6 total_frames = 0 vis = visdom.Visdom() vis.close(env=FLAGS.expirement_name, win=None) envs = SC2ProcVec([partial(SC2TorchEnv, env_args) for i in range(FLAGS.n_envs)]) print(f"Starting {FLAGS.n_envs} workers") try: agent = A2CAgent(screen_width=FLAGS.resolution, screen_height=FLAGS.resolution, expirement_name=FLAGS.expirement_name, learning_rate=FLAGS.learning_rate, num_processes=FLAGS.n_envs, value_coef=FLAGS.value_weight, entropy_coef=FLAGS.entropy_weight, continue_training=FLAGS.continue_training, horizon=FLAGS.horizon) num_processes = FLAGS.n_envs horizon = FLAGS.horizon timesteps = envs.reset() agent.reset() while total_frames * num_processes <= max_frames: total_frames += 1 step = total_frames % horizon agent.finish_step() actions = [agent.step(step, p, TimeStep(*t)) for p, t in enumerate(timesteps.reshape(num_processes, 4))] if step == 0: agent.rollout() agent.reset() timesteps = envs.step(actions) except KeyboardInterrupt: pass finally: envs.close() print(f"Training done after {total_frames} steps")
def step(self, ep, step, pb_obs, agent_obs, agent_actions): """ Puts the given observations in the queue. :param int ep: the episode that this observation was made. :param int step: the episode time-step in which this observation was made. :param ResponseObservation pb_obs: the observation in protobuf form. :param TimeStep agent_obs: the observation in pysc2 features form. :param list[FunctionCall] agent_actions: list of actions executed by the agent between the previous observation and the current observation. :return: """ if self._ignore_replay: return # checks new episode if step == 0: self._total_eps += 1 # force step type in observation agent_obs = TimeStep(StepType.FIRST, agent_obs.reward, agent_obs.discount, agent_obs.observation) # put sample in queue and wait for ack self._samples_queue.put((agent_obs, agent_actions, step == 0, self._total_steps)) self._samples_queue.join() self._total_steps += 1
def start(self): _features = features.features_from_game_info( self.controller.game_info()) _features.init_camera( features.Dimensions(self.screen_size_px, self.minimap_size_px), point.Point(*self.map_size), self.camera_width) while True: self.controller.step(self.step_mul) obs = self.controller.observe() try: agent_obs = _features.transform_obs(obs) except: pass #screenpoint = (42, 42) #screenpoint = point.Point(*screenpoint) if (len(obs.actions) == 0): continue #else: # print(obs.actions) #if obs.observation.game_loop in config.actions: #func = config.actions[o.game_loop](obs) #_features.reverse_action(obs.actions[1]) #action = _features.transform_action(obs.observation, actions.FUNCTIONS.move_camera([42,42])) #self.controller.act(action) #self.assertEqual(actions.FUNCTIONS.move_camera.id, func.function) #s2clientprotocol_dot_common__pb2._POINT2D #actions.FUNCTIONS.move_camera(screenpoint) #remote_controller.RemoteController.act(actions.move_camera(actions.FUNCTIONS.move_camera,['FEATURES'], screenpoint)) #action_observer_camera_move = (sc_pb.ActionObserverCameraMove(world_pos = screenpoint)) #sc_pb.RequestObserverAction #screenpoint.assign_to(action_observer_camera_move.world_pos) #self.controller.act(sc_pb.ActionObserverCameraMove(world_pos=screenpoint)) #sc_pb.RequestObserverAction(actions=[sc_pb.ObserverAction(player_perspective=sc_pb.ActionObserverPlayerPerspective(player_id=2))]) #obsAction = self.controller.act(sc_pb.RequestObserverAction(actions=[sc_pb.ObserverAction(player_perspective=sc_pb.ActionObserverPlayerPerspective(player_id=2))]))# [sc_pb.ActionObserverCameraMove(distance=50)])) #screenpoint.assign_to(obsAction.camera_move.world_pos) #remote_controller.RemoteController.actions if obs.player_result: # Episide over. self._state = StepType.LAST discount = 0 else: discount = self.discount #if (_features.reverse_action(obs.actions[0]).function == actions.FUNCTIONS.select_rect.id): agent_obs = _features.transform_obs(obs) #self._episode_steps += self.step_mul step = TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) acts = [] for action in obs.actions: for num in self.agent.action_dict.keys(): if (format( _features.reverse_action(action).function) == num): acts.append(_features.reverse_action(action)) break data = self.controller.data() self.agent.step(step, self.info, acts) #offset = self.agent.step(step, self.info) #print(_features.reverse_action(obs.actions[0])) #print ("+") #print(offset) if obs.player_result: break self._state = StepType.MID
def get_random_trajectory(self): function_dict = {} for _FUNCTION in actions._FUNCTIONS: #print(_FUNCTION) function_dict[_FUNCTION.ability_id] = _FUNCTION.name race_list = ['Terran', 'Zerg', 'Protoss'] """How many agent steps the agent has been trained for.""" run_config = run_configs.get() sc2_proc = run_config.start() controller = sc2_proc.controller #print ("source: {}".format(source)) #root_path = '/media/kimbring2/Steam/StarCraftII/Replays/4.8.2.71663-20190123_035823-1' root_path = self.source file_list = glob.glob(root_path + '*.*') #print ("file_list: {}".format(file_list)) for i in range(0, 500): #print("i: " + str(i)) replay_file_path = random.choice(file_list) #print ("replay_file_path: {}".format(replay_file_path)) #replay_file_path = root_path + '0a0f62052fe4311368910ad38c662bf979e292b86ad02b49b41a87013e58c432.SC2Replay' #replay_file_path = root_path + '/0a1b09abc9e98f4e0c3921ae0a427c27e97c2bbdcf34f50df18dc41cea3f3249.SC2Replay' #replay_file_path_2 = root_path + '/0a01d32e9a98e1596b88bc2cdec7752249b22aca774e3305dae2e93efef34be3.SC2Replay' #replay_file_path_0 = human_data #print ("replay_file_path: {}".format(replay_file_path)) try: replay_data = run_config.replay_data(replay_file_path) ping = controller.ping() info = controller.replay_info(replay_data) print("ping: " + str(ping)) print("replay_info: " + str(info)) player0_race = info.player_info[0].player_info.race_actual player0_mmr = info.player_info[0].player_mmr player0_apm = info.player_info[0].player_apm player0_result = info.player_info[0].player_result.result print("player0_race: " + str(player0_race)) print("player0_mmr: " + str(player0_mmr)) print("player0_apm: " + str(player0_apm)) print("player0_result: " + str(player0_result)) home_race = race_list.index(self.home_race_name) + 1 if (home_race == player0_race): print("player0_race pass") else: print("player0_race fail") continue if (player0_mmr >= self.replay_filter): print("player0_mmr pass ") else: print("player0_mmr fail") continue player1_race = info.player_info[0].player_info.race_actual player1_mmr = info.player_info[0].player_mmr player1_apm = info.player_info[0].player_apm player1_result = info.player_info[0].player_result.result print("player1_race: " + str(player1_race)) print("player1_mmr: " + str(player1_mmr)) print("player1_apm: " + str(player1_apm)) print("player1_result: " + str(player1_result)) away_race = race_list.index(self.away_race_name) + 1 if (away_race == player1_race): print("player1_race pass ") else: print("player1_race fail ") continue if (player1_mmr >= self.replay_filter): print("player1_mmr pass ") else: print("player1_mmr fail") continue screen_size_px = (128, 128) minimap_size_px = (64, 64) player_id = 1 discount = 1. step_mul = 8 screen_size_px = point.Point(*screen_size_px) minimap_size_px = point.Point(*minimap_size_px) interface = sc_pb.InterfaceOptions( raw=False, score=True, feature_layer=sc_pb.SpatialCameraSetup(width=24)) screen_size_px.assign_to(interface.feature_layer.resolution) minimap_size_px.assign_to( interface.feature_layer.minimap_resolution) map_data = None if info.local_map_path: map_data = run_config.map_data(info.local_map_path) _episode_length = info.game_duration_loops _episode_steps = 0 controller.start_replay( sc_pb.RequestStartReplay(replay_data=replay_data, map_data=map_data, options=interface, observed_player_id=player_id)) _state = StepType.FIRST if (info.HasField("error") or info.base_build != ping.base_build or # different game version info.game_duration_loops < 1000 or len(info.player_info) != 2): # Probably corrupt, or just not interesting. print("error") continue feature_screen_size = 128 feature_minimap_size = 64 rgb_screen_size = None rgb_minimap_size = None action_space = None use_feature_units = True agent_interface_format = sc2_env.parse_agent_interface_format( feature_screen=feature_screen_size, feature_minimap=feature_minimap_size, rgb_screen=rgb_screen_size, rgb_minimap=rgb_minimap_size, action_space=action_space, use_feature_units=use_feature_units) _features = features.features_from_game_info( controller.game_info()) build_info = [] build_name = [] replay_step = 0 print("True loop") while True: replay_step += 1 print("replay_step: " + str(replay_step)) controller.step(step_mul) obs = controller.observe() self.home_trajectory.append(obs) if (len(obs.actions) != 0): action = (obs.actions)[0] action_spatial = action.action_feature_layer unit_command = action_spatial.unit_command ability_id = unit_command.ability_id function_name = function_dict[ability_id] if (function_name != 'build_queue'): function_name_parse = function_name.split('_') function_name_first = function_name_parse[0] #print("function_name_first: " + str(function_name_first)) if (function_name_first == 'Build' or function_name_first == 'Train'): unit_name = function_name_parse[1] unit_info = int( units_new.get_unit_type( self.home_race_name, unit_name)) #print("unit_name: " + str(unit_name)) #print("unit_info: " + str(unit_info)) #print("function_name_parse[1]: " + str(function_name_parse[1])) build_name.append(unit_name) build_info.append(unit_info) if obs.player_result: # Episide over. _state = StepType.LAST discount = 0 else: discount = discount _episode_steps += step_mul agent_obs = _features.transform_obs(obs) step = TimeStep(step_type=_state, reward=0, discount=discount, observation=agent_obs) score_cumulative = agent_obs['score_cumulative'] score_cumulative_dict = {} score_cumulative_dict['score'] = score_cumulative.score score_cumulative_dict[ 'idle_production_time'] = score_cumulative.idle_production_time score_cumulative_dict[ 'idle_worker_time'] = score_cumulative.idle_worker_time score_cumulative_dict[ 'total_value_units'] = score_cumulative.total_value_units score_cumulative_dict[ 'total_value_structures'] = score_cumulative.total_value_structures score_cumulative_dict[ 'killed_value_units'] = score_cumulative.killed_value_units score_cumulative_dict[ 'killed_value_structures'] = score_cumulative.killed_value_structures score_cumulative_dict[ 'collected_minerals'] = score_cumulative.collected_minerals score_cumulative_dict[ 'collected_vespene'] = score_cumulative.collected_vespene score_cumulative_dict[ 'collection_rate_minerals'] = score_cumulative.collection_rate_minerals score_cumulative_dict[ 'collection_rate_vespene'] = score_cumulative.collection_rate_vespene score_cumulative_dict[ 'spent_minerals'] = score_cumulative.spent_minerals score_cumulative_dict[ 'spent_vespene'] = score_cumulative.spent_vespene if obs.player_result: break _state = StepType.MID self.home_BO = build_info self.away_BU = score_cumulative_dict break except: continue
build_name.append(unit_name) build_info.append(unit_info) if obs.player_result: # Episide over. _state = StepType.LAST discount = 0 else: discount = discount _episode_steps += step_mul agent_obs = _features.transform_obs(obs) #print("agent_obs['feature_units']: " + str(agent_obs['feature_units'])) step = TimeStep(step_type=_state, reward=0, discount=discount, observation=agent_obs) score_cumulative = agent_obs['score_cumulative'] score_cumulative_dict = {} score_cumulative_dict['score'] = score_cumulative.score score_cumulative_dict[ 'idle_production_time'] = score_cumulative.idle_production_time score_cumulative_dict[ 'idle_worker_time'] = score_cumulative.idle_worker_time score_cumulative_dict[ 'total_value_units'] = score_cumulative.total_value_units score_cumulative_dict[ 'total_value_structures'] = score_cumulative.total_value_structures score_cumulative_dict[ 'killed_value_units'] = score_cumulative.killed_value_units
def start(self): print("Hello we are in Start") step_mul = 1 trainingDataPath = 'C:\\Users\\Charlie\\training_data\\4101\\' _features = features.features_from_game_info( self.controller.game_info(), use_camera_position=True) #print("world_tl_to_world_camera_rel: {}\n\nworld_to_feature_screen_px: {}\n\nworld_to_world_tl: {}".format(_features._world_tl_to_world_camera_rel, # _features._world_to_feature_screen_px, # _features._world_to_world_tl)) # _features.init_camera(features.Dimensions(self.screen_size_px, self.minimap_size_px), # point.Point(*const.WorldSize()), # self.camera_width) packageCounter = 0 fileName = trainingDataPath + self.replay_file_name + "/" + str( packageCounter) + '.csv' npFileName = trainingDataPath + self.replay_file_name + "/" + str( packageCounter) + '.npy' npFileNameComp = trainingDataPath + self.replay_file_name + "/" + str( packageCounter) dirname = os.path.dirname(fileName) if not os.path.exists(dirname): os.makedirs(dirname) # keyboard = Controller() # time.sleep(1) # keyboard.press(str(self.player_id)) # time.sleep(0.5) # keyboard.release(str(self.player_id)) while True: #Takes one step through the replay self.controller.step(step_mul) #Converts visual data into abstract data obs = self.controller.observe() if obs.player_result: # Episide over. self._state = StepType.LAST print("Episode Over") break discount = 0 else: discount = self.discount if (len(obs.actions) == 0): continue agent_obs = _features.transform_obs(obs) step = TimeStep(step_type=self._state, reward=0, discount=discount, observation=agent_obs) for action in obs.actions: for num in self.agent.action_dict.keys(): # If action is worth recording if (int(_features.reverse_action(action).function) == num): # Check if the action is on a Micro Unit if (const.IsMicroUnit(agent_obs.single_select) or const.IsMicroUnit(agent_obs.multi_select)): # Record action #print(_features._world_tl_to_world_camera_rel.offset) #self.agent.states.append(self.agent.step(step, self.info, _features.reverse_action(action))) state = self.agent.step( step, self.info, _features.reverse_action(action)) if state != 0: npFileNameComp = trainingDataPath + self.replay_file_name + "/" + str( packageCounter) np.savez_compressed( npFileNameComp, action=translate_outputs_to_NN( state["action"][0]), feature_layers=np.moveaxis( (np.array(state["feature_layers"])), 0, 2)) packageCounter += 1 break #print("%s: %s" % (len(agent_obs.multi_select), units.Zerg(agent_obs.multi_select[0][0]))) #print(action) #print(units.Zerg(agent_obs.single_select[0][0])) #self.agent.step(step, self.info, acts) #print(_features.reverse_action(obs.actions[0])) #print ("+") #print(offset) #screenpoint = (84, 84) #screenpoint = point.Point(*screenpoint) if obs.player_result: os.remove(replay_file_path) print("Game Ended, File Removed") break self._state = StepType.MID