def main():
    FLAGS(sys.argv)
    with sc2_env.SC2Env(map_name="CollectMineralShards",
                        step_mul=step_mul,
                        visualize=True,
                        game_steps_per_episode=steps * step_mul) as env:

        model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2),
                                               (64, 3, 1)],
                                        hiddens=[256],
                                        dueling=True)

        def make_obs_ph(name):
            return U.BatchInput((64, 64), name=name)

        act_params = {
            'make_obs_ph': make_obs_ph,
            'q_func': model,
            'num_actions': 4,
        }

        act = deepq_mineral_shards.load("mineral_shards.pkl",
                                        act_params=act_params)
        num = 0

        while True:

            obs = env.reset()
            episode_rew = 0

            num += 1
            done = False

            # MODIFIED CODE BELOW

            # ------------------------------------------------------------------------------------------------------

            # Access an observation for the current state of the game
            envobs = env.observation_raw()

            # Accessing set of unit objects
            units = envobs.units

            # Storing the units that belong to the player (use list comprehension lol)
            self_units = []
            for unit in units:
                if unit.owner == 1:  # owner is player
                    self_units.append(unit)

            # Accessing the first unit object
            own_id = self_units[0].tag
            target_id = self_units[1].tag  # Unit id
            print(own_id, target_id)

            # Send a raw action to kill other marine
            step_result = env.step(actions=[
                sc2_actions.FunctionCall(_SELECT_UNIT,
                                         [_NOT_QUEUED, [own_id], [target_id]])
            ])
Esempio n. 2
0
def main():
    FLAGS(sys.argv)
    with sc2_env.SC2Env(map_name="CollectMineralShards",
                        step_mul=step_mul,
                        visualize=True,
                        game_steps_per_episode=steps * step_mul) as env:

        model = deepq.models.cnn_to_mlp(convs=[(32, 8, 4), (64, 4, 2),
                                               (64, 3, 1)],
                                        hiddens=[256],
                                        dueling=True)

        def make_obs_ph(name):
            return U.BatchInput((64, 64), name=name)

        act_params = {
            'make_obs_ph': make_obs_ph,
            'q_func': model,
            'num_actions': 4,
        }

        act = deepq_mineral_shards.load("mineral_shards.pkl",
                                        act_params=act_params)

        while True:

            obs = env.reset()
            episode_rew = 0

            done = False

            step_result = env.step(actions=[
                sc2_actions.FunctionCall(_SELECT_ARMY, [_SELECT_ALL])
            ])

            while not done:

                player_relative = step_result[0].observation["screen"][
                    _PLAYER_RELATIVE]

                obs = player_relative

                player_y, player_x = (
                    player_relative == _PLAYER_FRIENDLY).nonzero()
                player = [int(player_x.mean()), int(player_y.mean())]

                if (player[0] > 32):
                    obs = shift(LEFT, player[0] - 32, obs)
                elif (player[0] < 32):
                    obs = shift(RIGHT, 32 - player[0], obs)

                if (player[1] > 32):
                    obs = shift(UP, player[1] - 32, obs)
                elif (player[1] < 32):
                    obs = shift(DOWN, 32 - player[1], obs)

                action = act(obs[None])[0]
                coord = [player[0], player[1]]

                if (action == 0):  #UP

                    if (player[1] >= 16):
                        coord = [player[0], player[1] - 16]
                    elif (player[1] > 0):
                        coord = [player[0], 0]

                elif (action == 1):  #DOWN

                    if (player[1] <= 47):
                        coord = [player[0], player[1] + 16]
                    elif (player[1] > 47):
                        coord = [player[0], 63]

                elif (action == 2):  #LEFT

                    if (player[0] >= 16):
                        coord = [player[0] - 16, player[1]]
                    elif (player[0] < 16):
                        coord = [0, player[1]]

                elif (action == 3):  #RIGHT

                    if (player[0] <= 47):
                        coord = [player[0] + 16, player[1]]
                    elif (player[0] > 47):
                        coord = [63, player[1]]

                new_action = [
                    sc2_actions.FunctionCall(_MOVE_SCREEN,
                                             [_NOT_QUEUED, coord])
                ]

                step_result = env.step(actions=new_action)

                rew = step_result[0].reward
                done = step_result[0].step_type == environment.StepType.LAST

                episode_rew += rew
            print("Episode reward", episode_rew)
def main():
    FLAGS(sys.argv)
    AGENT_INTERFACE_FORMAT = sc2_env.AgentInterfaceFormat(
        feature_dimensions=sc2_env.Dimensions(screen=16, minimap=16))
    with sc2_env.SC2Env(map_name="CollectMineralShards",
                        players=[sc2_env.Agent(sc2_env.Race.terran)],
                        step_mul=step_mul,
                        visualize=True,
                        agent_interface_format=AGENT_INTERFACE_FORMAT) as env:

        # model = cnn_to_mlp(
        #   convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        #   hiddens=[256],
        #   dueling=True)

        # def make_obs_ph(name):
        #   return BatchInput((1, 64, 64), name=name)

        model = cnn_to_mlp(convs=[(16, 8, 4), (32, 4, 2)],
                           hiddens=[256],
                           dueling=True)

        def make_obs_ph(name):
            return BatchInput((1, 16, 16), name=name)

        # Using deepq_x here instead of deepq for agent x
        act_params = {
            'make_obs_ph': make_obs_ph,
            'q_func': model,
            'num_actions': 16,
            'scope': "deepq_x"
        }

        # This needs to be the saved model for deepq_x
        # You can change the scope to deepq_y for agent y
        act = deepq_mineral_shards.load("mineral_shards.pkl",
                                        act_params=act_params)

        while True:

            obs = env.reset()
            episode_rew = 0

            done = False

            step_result = env.step(actions=[
                sc2_actions.FunctionCall(_SELECT_ARMY, [_SELECT_ALL])
            ])

            while not done:

                player_relative = step_result[0].observation["feature_screen"][
                    _PLAYER_RELATIVE]

                obs = player_relative

                player_y, player_x = (
                    player_relative == _PLAYER_FRIENDLY).nonzero()
                player = [int(player_x.mean()), int(player_y.mean())]

                if (player[0] > 32):
                    obs = shift(LEFT, player[0] - 32, obs)
                elif (player[0] < 32):
                    obs = shift(RIGHT, 32 - player[0], obs)

                if (player[1] > 32):
                    obs = shift(UP, player[1] - 32, obs)
                elif (player[1] < 32):
                    obs = shift(DOWN, 32 - player[1], obs)

                action = act(np.expand_dims(obs[None], axis=0))[0]
                coord = [player[0], player[1]]

                if (action == 0):  #UP

                    if (player[1] >= 16):
                        coord = [player[0], player[1] - 16]
                    elif (player[1] > 0):
                        coord = [player[0], 0]

                elif (action == 1):  #DOWN

                    if (player[1] <= 47):
                        coord = [player[0], player[1] + 16]
                    elif (player[1] > 47):
                        coord = [player[0], 63]

                elif (action == 2):  #LEFT

                    if (player[0] >= 16):
                        coord = [player[0] - 16, player[1]]
                    elif (player[0] < 16):
                        coord = [0, player[1]]

                elif (action == 3):  #RIGHT

                    if (player[0] <= 47):
                        coord = [player[0] + 16, player[1]]
                    elif (player[0] > 47):
                        coord = [63, player[1]]

                new_action = [
                    sc2_actions.FunctionCall(_MOVE_SCREEN,
                                             [_NOT_QUEUED, coord])
                ]

                step_result = env.step(actions=new_action)

                rew = step_result[0].reward
                done = step_result[0].step_type == environment.StepType.LAST

                episode_rew += rew
            print("Episode reward", episode_rew)