Ejemplo n.º 1
0
    def start(self):
        _features = features.features_from_game_info(
            self.controller.game_info())

        i = 0
        while i < self.info.game_duration_loops:
            i += self.skip
            self.controller.step(self.step_mul)
            obs = self.controller.observe()
            try:
                agent_obs = _features.transform_obs(obs)
            except:
                pass

            if obs.player_result:
                self._state = StepType.LAST
                discount = 0
            else:
                discount = self.discount

            self._episode_steps += self.step_mul

            step = TimeStep(step_type=self._state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)

            self.agent.step(step, obs.actions)

            if obs.player_result:
                break

            self._state = StepType.MID

        self.save_data()
Ejemplo n.º 2
0
    def start(self):
        _features = features.Features(self.controller.game_info())

        while True:
            self.controller.step(self.step_mul)
            obs = self.controller.observe()
            agent_obs = _features.transform_obs(obs.observation)

            if obs.player_result:  # Episide over.
                self._state = StepType.LAST
                discount = 0
            else:
                discount = self.discount

            self._episode_steps += self.step_mul

            step = TimeStep(step_type=self._state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)

            self.agent.step(step, obs.actions)

            if obs.player_result:
                break

            self._state = StepType.MID
Ejemplo n.º 3
0
    def test_one_input(self):
        d = 48

        # These shapes are what is actually returned from environment
        dummy_obs = {
            "screen": np.zeros((16, d, d), dtype="int32"),
            "minimap": np.zeros((7, d, d), dtype="int32"),
            "available_actions": np.arange(10)
        }

        dummy_ts = TimeStep(StepType.MID, 0.0, 0.0, dummy_obs)

        p = ObsProcesser()

        assert p.process_one_input(dummy_ts)["screen_numeric"].shape == (
            ObsProcesser.N_SCREEN_CHANNELS, d, d)
        assert p.process_one_input(dummy_ts)["minimap_numeric"].shape == (
            ObsProcesser.N_MINIMAP_CHANNELS, d, d)

        n_screen_scalar_features = len(
            [k for k in SCREEN_FEATURES if k.type == FeatureType.SCALAR])

        total_screen_dim = n_screen_scalar_features + 3 + 1  # binary flags + visibility_flag
        assert total_screen_dim == ObsProcesser.N_SCREEN_CHANNELS

        n_screen_minimap_features = len(
            [k for k in MINIMAP_FEATURES if k.type == FeatureType.SCALAR])
        total_minimap_dim = n_screen_minimap_features + 3 + 1
        assert total_minimap_dim == ObsProcesser.N_MINIMAP_CHANNELS
Ejemplo n.º 4
0
    def start(self, replay_file_path):
        self.openReplay(replay_file_path)

        _features = features.Features(self.controller.game_info())

        while True:
            self.controller.step(self.step_mul)
            obs = self.controller.observe()
            agent_obs = _features.transform_obs(obs.observation)

            if obs.player_result:
                self._state = StepType.LAST
                discount = 0
            else:
                discount = self.discount

            self._episode_steps += self.step_mul

            step = TimeStep(step_type=self._state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)

            self.agent.step(step, obs.actions, self._state == StepType.LAST)

            if obs.player_result:
                break

            self._state = StepType.MID

        self.closeReplay()
    def start(self):
        _features = features.Features(self.controller.game_info())

        frames = random.sample(
            np.arange(self.info.game_duration_loops).tolist(),
            self.info.game_duration_loops)
        # frames = frames[0 : min(self.frames_per_game, self.info.game_duration_loops)]
        step_mul = 10
        frames = frames[0:int(self.info.game_duration_loops) // step_mul]
        frames.sort()

        last_frame = 0
        i = 0
        # for frame in frames:
        skips = step_mul
        while i < self.info.game_duration_loops:
            # skips = frame - last_frame
            # last_frame = frame
            i += skips
            self.controller.step(skips)
            obs = self.controller.observe()
            agent_obs = _features.transform_obs(obs.observation)

            if obs.player_result:  # Episode over.
                self._state = StepType.LAST
                discount = 0
            else:
                discount = self.discount

            self._episode_steps += skips

            step = TimeStep(step_type=self._state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)

            self.agent.step(step, obs.actions, self.info, _features)

            if obs.player_result:
                break

            self._state = StepType.MID

        print("Saving data")
        #print(self.agent.states)
        pickle.dump({
            "info": self.info,
            "state": self.agent.states
        }, open("data/" + self.replay_file_name + ".p", "wb"))
        print("Data successfully saved")
        self.agent.states = []
        print("Data flushed")

        print("Done")
Ejemplo n.º 6
0
    def convert_step(timestep: TimeStep) -> Tuple[Any, float, bool, Dict]:
        obs = timestep.observation["feature_screen"][SCREEN_FEATURES.player_relative.index]
        obs = obs.view(type=np.ndarray)  # Get a standard ndarray view instead of pysc2's subclass (NamedNumpyArray)

        # Reshape from (84, 84) to (84, 84, 1). '...' is for slicing higher-dimensional data structures and means
        # insert as many full slices (:) to extend the multi-dimensional slice to all dimensions.
        # Ref: https://stackoverflow.com/questions/118370/how-do-you-use-the-ellipsis-slicing-syntax-in-python
        obs = obs[..., np.newaxis]

        done = timestep.last()
        info = {}
        return obs, timestep.reward, done, info
Ejemplo n.º 7
0
def train():
    env_args = dict(
        map_name=FLAGS.map_name,
        step_mul=FLAGS.step_mul,
        game_steps_per_episode=0,
        screen_size_px=(FLAGS.resolution,) * 2,
        minimap_size_px=(FLAGS.resolution,) * 2,
        visualize=FLAGS.visualize
    )
    max_frames = FLAGS.frames * 1e6
    total_frames = 0
    vis = visdom.Visdom()
    vis.close(env=FLAGS.expirement_name, win=None)

    envs = SC2ProcVec([partial(SC2TorchEnv, env_args) for i in range(FLAGS.n_envs)])
    print(f"Starting {FLAGS.n_envs} workers")

    try:
            agent = A2CAgent(screen_width=FLAGS.resolution,
                             screen_height=FLAGS.resolution,
                             expirement_name=FLAGS.expirement_name,
                             learning_rate=FLAGS.learning_rate,
                             num_processes=FLAGS.n_envs,
                             value_coef=FLAGS.value_weight,
                             entropy_coef=FLAGS.entropy_weight,
                             continue_training=FLAGS.continue_training,
                             horizon=FLAGS.horizon)

            num_processes = FLAGS.n_envs
            horizon = FLAGS.horizon

            timesteps = envs.reset()
            agent.reset()

            while total_frames * num_processes <= max_frames:
                total_frames += 1
                step = total_frames % horizon
                agent.finish_step()
                actions = [agent.step(step, p, TimeStep(*t))
                           for p, t in enumerate(timesteps.reshape(num_processes, 4))]

                if step == 0:
                    agent.rollout()
                    agent.reset()

                timesteps = envs.step(actions)

    except KeyboardInterrupt:
        pass
    finally:
        envs.close()

    print(f"Training done after {total_frames} steps")
Ejemplo n.º 8
0
    def step(self, ep, step, pb_obs, agent_obs, agent_actions):
        """
        Puts the given observations in the queue.
        :param int ep: the episode that this observation was made.
        :param int step: the episode time-step in which this observation was made.
        :param ResponseObservation pb_obs: the observation in protobuf form.
        :param TimeStep agent_obs: the observation in pysc2 features form.
        :param list[FunctionCall] agent_actions: list of actions executed by the agent between the previous observation
        and the current observation.
        :return:
        """
        if self._ignore_replay:
            return

        # checks new episode
        if step == 0:
            self._total_eps += 1
            # force step type in observation
            agent_obs = TimeStep(StepType.FIRST, agent_obs.reward, agent_obs.discount, agent_obs.observation)

        # put sample in queue and wait for ack
        self._samples_queue.put((agent_obs, agent_actions, step == 0, self._total_steps))
        self._samples_queue.join()
        self._total_steps += 1
    def start(self):
        _features = features.features_from_game_info(
            self.controller.game_info())

        _features.init_camera(
            features.Dimensions(self.screen_size_px, self.minimap_size_px),
            point.Point(*self.map_size), self.camera_width)
        while True:
            self.controller.step(self.step_mul)
            obs = self.controller.observe()
            try:
                agent_obs = _features.transform_obs(obs)
            except:
                pass

            #screenpoint = (42, 42)
            #screenpoint = point.Point(*screenpoint)
            if (len(obs.actions) == 0):
                continue
            #else:
            #    print(obs.actions)

            #if obs.observation.game_loop in config.actions:
            #func = config.actions[o.game_loop](obs)

        #_features.reverse_action(obs.actions[1])

        #action = _features.transform_action(obs.observation, actions.FUNCTIONS.move_camera([42,42]))
        #self.controller.act(action)

        #self.assertEqual(actions.FUNCTIONS.move_camera.id, func.function)

        #s2clientprotocol_dot_common__pb2._POINT2D
        #actions.FUNCTIONS.move_camera(screenpoint)
        #remote_controller.RemoteController.act(actions.move_camera(actions.FUNCTIONS.move_camera,['FEATURES'], screenpoint))
        #action_observer_camera_move = (sc_pb.ActionObserverCameraMove(world_pos = screenpoint))
        #sc_pb.RequestObserverAction
        #screenpoint.assign_to(action_observer_camera_move.world_pos)
        #self.controller.act(sc_pb.ActionObserverCameraMove(world_pos=screenpoint))
        #sc_pb.RequestObserverAction(actions=[sc_pb.ObserverAction(player_perspective=sc_pb.ActionObserverPlayerPerspective(player_id=2))])
        #obsAction = self.controller.act(sc_pb.RequestObserverAction(actions=[sc_pb.ObserverAction(player_perspective=sc_pb.ActionObserverPlayerPerspective(player_id=2))]))#   [sc_pb.ActionObserverCameraMove(distance=50)]))
        #screenpoint.assign_to(obsAction.camera_move.world_pos)
        #remote_controller.RemoteController.actions
            if obs.player_result:  # Episide over.
                self._state = StepType.LAST
                discount = 0
            else:
                discount = self.discount

            #if (_features.reverse_action(obs.actions[0]).function == actions.FUNCTIONS.select_rect.id):
            agent_obs = _features.transform_obs(obs)

            #self._episode_steps += self.step_mul

            step = TimeStep(step_type=self._state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)
            acts = []
            for action in obs.actions:
                for num in self.agent.action_dict.keys():
                    if (format(
                            _features.reverse_action(action).function) == num):
                        acts.append(_features.reverse_action(action))
                        break

            data = self.controller.data()
            self.agent.step(step, self.info, acts)
            #offset = self.agent.step(step, self.info)
            #print(_features.reverse_action(obs.actions[0]))
            #print ("+")
            #print(offset)
            if obs.player_result:
                break

            self._state = StepType.MID
Ejemplo n.º 10
0
    def get_random_trajectory(self):
        function_dict = {}
        for _FUNCTION in actions._FUNCTIONS:
            #print(_FUNCTION)
            function_dict[_FUNCTION.ability_id] = _FUNCTION.name

        race_list = ['Terran', 'Zerg', 'Protoss']
        """How many agent steps the agent has been trained for."""
        run_config = run_configs.get()
        sc2_proc = run_config.start()
        controller = sc2_proc.controller

        #print ("source: {}".format(source))
        #root_path = '/media/kimbring2/Steam/StarCraftII/Replays/4.8.2.71663-20190123_035823-1'
        root_path = self.source
        file_list = glob.glob(root_path + '*.*')
        #print ("file_list: {}".format(file_list))

        for i in range(0, 500):
            #print("i: " + str(i))

            replay_file_path = random.choice(file_list)
            #print ("replay_file_path: {}".format(replay_file_path))
            #replay_file_path = root_path + '0a0f62052fe4311368910ad38c662bf979e292b86ad02b49b41a87013e58c432.SC2Replay'
            #replay_file_path = root_path + '/0a1b09abc9e98f4e0c3921ae0a427c27e97c2bbdcf34f50df18dc41cea3f3249.SC2Replay'
            #replay_file_path_2 = root_path + '/0a01d32e9a98e1596b88bc2cdec7752249b22aca774e3305dae2e93efef34be3.SC2Replay'
            #replay_file_path_0 = human_data
            #print ("replay_file_path: {}".format(replay_file_path))
            try:
                replay_data = run_config.replay_data(replay_file_path)
                ping = controller.ping()
                info = controller.replay_info(replay_data)
                print("ping: " + str(ping))
                print("replay_info: " + str(info))

                player0_race = info.player_info[0].player_info.race_actual
                player0_mmr = info.player_info[0].player_mmr
                player0_apm = info.player_info[0].player_apm
                player0_result = info.player_info[0].player_result.result
                print("player0_race: " + str(player0_race))
                print("player0_mmr: " + str(player0_mmr))
                print("player0_apm: " + str(player0_apm))
                print("player0_result: " + str(player0_result))

                home_race = race_list.index(self.home_race_name) + 1
                if (home_race == player0_race):
                    print("player0_race pass")
                else:
                    print("player0_race fail")
                    continue

                if (player0_mmr >= self.replay_filter):
                    print("player0_mmr pass ")
                else:
                    print("player0_mmr fail")
                    continue

                player1_race = info.player_info[0].player_info.race_actual
                player1_mmr = info.player_info[0].player_mmr
                player1_apm = info.player_info[0].player_apm
                player1_result = info.player_info[0].player_result.result
                print("player1_race: " + str(player1_race))
                print("player1_mmr: " + str(player1_mmr))
                print("player1_apm: " + str(player1_apm))
                print("player1_result: " + str(player1_result))

                away_race = race_list.index(self.away_race_name) + 1
                if (away_race == player1_race):
                    print("player1_race pass ")
                else:
                    print("player1_race fail ")
                    continue

                if (player1_mmr >= self.replay_filter):
                    print("player1_mmr pass ")
                else:
                    print("player1_mmr fail")
                    continue

                screen_size_px = (128, 128)
                minimap_size_px = (64, 64)
                player_id = 1
                discount = 1.
                step_mul = 8

                screen_size_px = point.Point(*screen_size_px)
                minimap_size_px = point.Point(*minimap_size_px)
                interface = sc_pb.InterfaceOptions(
                    raw=False,
                    score=True,
                    feature_layer=sc_pb.SpatialCameraSetup(width=24))
                screen_size_px.assign_to(interface.feature_layer.resolution)
                minimap_size_px.assign_to(
                    interface.feature_layer.minimap_resolution)

                map_data = None
                if info.local_map_path:
                    map_data = run_config.map_data(info.local_map_path)

                _episode_length = info.game_duration_loops
                _episode_steps = 0

                controller.start_replay(
                    sc_pb.RequestStartReplay(replay_data=replay_data,
                                             map_data=map_data,
                                             options=interface,
                                             observed_player_id=player_id))

                _state = StepType.FIRST

                if (info.HasField("error")
                        or info.base_build != ping.base_build
                        or  # different game version
                        info.game_duration_loops < 1000 or
                        len(info.player_info) != 2):
                    # Probably corrupt, or just not interesting.
                    print("error")
                    continue

                feature_screen_size = 128
                feature_minimap_size = 64
                rgb_screen_size = None
                rgb_minimap_size = None
                action_space = None
                use_feature_units = True
                agent_interface_format = sc2_env.parse_agent_interface_format(
                    feature_screen=feature_screen_size,
                    feature_minimap=feature_minimap_size,
                    rgb_screen=rgb_screen_size,
                    rgb_minimap=rgb_minimap_size,
                    action_space=action_space,
                    use_feature_units=use_feature_units)

                _features = features.features_from_game_info(
                    controller.game_info())

                build_info = []
                build_name = []
                replay_step = 0

                print("True loop")
                while True:
                    replay_step += 1
                    print("replay_step: " + str(replay_step))

                    controller.step(step_mul)
                    obs = controller.observe()
                    self.home_trajectory.append(obs)

                    if (len(obs.actions) != 0):
                        action = (obs.actions)[0]
                        action_spatial = action.action_feature_layer
                        unit_command = action_spatial.unit_command
                        ability_id = unit_command.ability_id
                        function_name = function_dict[ability_id]
                        if (function_name != 'build_queue'):
                            function_name_parse = function_name.split('_')

                            function_name_first = function_name_parse[0]
                            #print("function_name_first: " + str(function_name_first))
                            if (function_name_first == 'Build'
                                    or function_name_first == 'Train'):
                                unit_name = function_name_parse[1]
                                unit_info = int(
                                    units_new.get_unit_type(
                                        self.home_race_name, unit_name))
                                #print("unit_name: " + str(unit_name))
                                #print("unit_info: " + str(unit_info))

                                #print("function_name_parse[1]: " + str(function_name_parse[1]))
                                build_name.append(unit_name)
                                build_info.append(unit_info)

                    if obs.player_result:  # Episide over.
                        _state = StepType.LAST
                        discount = 0

                    else:
                        discount = discount

                        _episode_steps += step_mul

                    agent_obs = _features.transform_obs(obs)
                    step = TimeStep(step_type=_state,
                                    reward=0,
                                    discount=discount,
                                    observation=agent_obs)

                    score_cumulative = agent_obs['score_cumulative']
                    score_cumulative_dict = {}
                    score_cumulative_dict['score'] = score_cumulative.score
                    score_cumulative_dict[
                        'idle_production_time'] = score_cumulative.idle_production_time
                    score_cumulative_dict[
                        'idle_worker_time'] = score_cumulative.idle_worker_time
                    score_cumulative_dict[
                        'total_value_units'] = score_cumulative.total_value_units
                    score_cumulative_dict[
                        'total_value_structures'] = score_cumulative.total_value_structures
                    score_cumulative_dict[
                        'killed_value_units'] = score_cumulative.killed_value_units
                    score_cumulative_dict[
                        'killed_value_structures'] = score_cumulative.killed_value_structures
                    score_cumulative_dict[
                        'collected_minerals'] = score_cumulative.collected_minerals
                    score_cumulative_dict[
                        'collected_vespene'] = score_cumulative.collected_vespene
                    score_cumulative_dict[
                        'collection_rate_minerals'] = score_cumulative.collection_rate_minerals
                    score_cumulative_dict[
                        'collection_rate_vespene'] = score_cumulative.collection_rate_vespene
                    score_cumulative_dict[
                        'spent_minerals'] = score_cumulative.spent_minerals
                    score_cumulative_dict[
                        'spent_vespene'] = score_cumulative.spent_vespene

                    if obs.player_result:
                        break

                    _state = StepType.MID

                self.home_BO = build_info
                self.away_BU = score_cumulative_dict
                break
            except:
                continue
                        build_name.append(unit_name)
                        build_info.append(unit_info)

            if obs.player_result:  # Episide over.
                _state = StepType.LAST
                discount = 0
            else:
                discount = discount
                _episode_steps += step_mul

            agent_obs = _features.transform_obs(obs)
            #print("agent_obs['feature_units']: " + str(agent_obs['feature_units']))

            step = TimeStep(step_type=_state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)

            score_cumulative = agent_obs['score_cumulative']
            score_cumulative_dict = {}
            score_cumulative_dict['score'] = score_cumulative.score
            score_cumulative_dict[
                'idle_production_time'] = score_cumulative.idle_production_time
            score_cumulative_dict[
                'idle_worker_time'] = score_cumulative.idle_worker_time
            score_cumulative_dict[
                'total_value_units'] = score_cumulative.total_value_units
            score_cumulative_dict[
                'total_value_structures'] = score_cumulative.total_value_structures
            score_cumulative_dict[
                'killed_value_units'] = score_cumulative.killed_value_units
Ejemplo n.º 12
0
    def start(self):
        print("Hello we are in Start")
        step_mul = 1
        trainingDataPath = 'C:\\Users\\Charlie\\training_data\\4101\\'
        _features = features.features_from_game_info(
            self.controller.game_info(), use_camera_position=True)
        #print("world_tl_to_world_camera_rel: {}\n\nworld_to_feature_screen_px: {}\n\nworld_to_world_tl: {}".format(_features._world_tl_to_world_camera_rel,
        #                                                                              _features._world_to_feature_screen_px,
        #                                                                              _features._world_to_world_tl))
        # _features.init_camera(features.Dimensions(self.screen_size_px, self.minimap_size_px),
        #                       point.Point(*const.WorldSize()),
        #                       self.camera_width)
        packageCounter = 0
        fileName = trainingDataPath + self.replay_file_name + "/" + str(
            packageCounter) + '.csv'
        npFileName = trainingDataPath + self.replay_file_name + "/" + str(
            packageCounter) + '.npy'
        npFileNameComp = trainingDataPath + self.replay_file_name + "/" + str(
            packageCounter)
        dirname = os.path.dirname(fileName)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        # keyboard = Controller()
        # time.sleep(1)
        # keyboard.press(str(self.player_id))
        # time.sleep(0.5)
        # keyboard.release(str(self.player_id))
        while True:

            #Takes one step through the replay
            self.controller.step(step_mul)
            #Converts visual data into abstract data
            obs = self.controller.observe()

            if obs.player_result:  # Episide over.
                self._state = StepType.LAST
                print("Episode Over")
                break
                discount = 0
            else:
                discount = self.discount

            if (len(obs.actions) == 0):
                continue

            agent_obs = _features.transform_obs(obs)
            step = TimeStep(step_type=self._state,
                            reward=0,
                            discount=discount,
                            observation=agent_obs)

            for action in obs.actions:
                for num in self.agent.action_dict.keys():
                    # If action is worth recording
                    if (int(_features.reverse_action(action).function) == num):
                        # Check if the action is on a Micro Unit
                        if (const.IsMicroUnit(agent_obs.single_select)
                                or const.IsMicroUnit(agent_obs.multi_select)):
                            # Record action
                            #print(_features._world_tl_to_world_camera_rel.offset)
                            #self.agent.states.append(self.agent.step(step, self.info, _features.reverse_action(action)))
                            state = self.agent.step(
                                step, self.info,
                                _features.reverse_action(action))
                            if state != 0:
                                npFileNameComp = trainingDataPath + self.replay_file_name + "/" + str(
                                    packageCounter)
                                np.savez_compressed(
                                    npFileNameComp,
                                    action=translate_outputs_to_NN(
                                        state["action"][0]),
                                    feature_layers=np.moveaxis(
                                        (np.array(state["feature_layers"])), 0,
                                        2))
                                packageCounter += 1
                        break
                        #print("%s: %s" % (len(agent_obs.multi_select), units.Zerg(agent_obs.multi_select[0][0])))
                        #print(action)
                        #print(units.Zerg(agent_obs.single_select[0][0]))

            #self.agent.step(step, self.info, acts)
            #print(_features.reverse_action(obs.actions[0]))
            #print ("+")
            #print(offset)
            #screenpoint = (84, 84)
            #screenpoint = point.Point(*screenpoint)

            if obs.player_result:
                os.remove(replay_file_path)
                print("Game Ended, File Removed")
                break

            self._state = StepType.MID