Ejemplo n.º 1
0
    def get_disallowed_actions(self, obs):
        disallowed_actions = []

        refinery_y, refinery_x = get_refineries(self.unit_type)
        geyser_top_left = geysers[0]
        if geyser_top_left[0] in refinery_x and geyser_top_left[1] in refinery_y:
            disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_TOP_LEFT))

        geyser_top_right = geysers[1]
        if geyser_top_right[0] in refinery_x and geyser_top_right[1] in refinery_y:
            disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_TOP_RIGHT))

        geyser_bottom_left = geysers[2]
        if geyser_bottom_left[0] in refinery_x and geyser_bottom_left[1] in refinery_y:
            disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_BOTTOM_LEFT))

        geyser_bottom_right = geysers[3]
        if geyser_bottom_right[0] in refinery_x and geyser_bottom_right[1] in refinery_y:
            disallowed_actions.append(get_action_id(ACTION_BUILD_REFINERY_BOTTOM_RIGHT))

        if self.supply_depots >= len(depots):
            disallowed_actions.append(get_action_id(ACTION_BUILD_SUPPLY_DEPOT))

        if p.get_food_cap(obs) == p.get_food_used(obs):
            disallowed_actions.append(get_action_id(ACTION_BUILD_SCV_START_CC))
            disallowed_actions.append(get_action_id(ACTION_BUILD_SCV_NEW_CC))

        if get_command_center_amount(self.unit_type) == 2:
            disallowed_actions.append(get_action_id(ACTION_BUILD_CC))

        return disallowed_actions
Ejemplo n.º 2
0
    def handle_last_action(self, obs):
        cm = str(p.get_minerals(obs))
        cv = str(p.get_vespene(obs))
        iwc = str(p.get_idle_worker_count(obs))
        depot_count = str(self.supply_depots)
        refinery_count = str(self.refineries)
        command_centers = str(get_command_center_amount(self.unit_type))
        food_used = str(p.get_food_used(obs))
        food_cap = str(p.get_food_cap(obs))
        sp_attempts = str(self.build_supply_depot_attempts)
        sp_attempts_f = str(self.build_supply_depot_attempts_failed)
        refinery_attempts = str(self.build_refinery_attempts)
        refinery_attempts_f = str(self.build_refinery_attempts_failed)
        cc_attempts = str(self.build_cc_attempts)
        cc_attempts_f = str(self.build_cc_attempt_failed)
        score = cs.get_score(obs)
        data = [sp_attempts, sp_attempts_f, depot_count, refinery_attempts, refinery_attempts_f, refinery_count, cc_attempts, cc_attempts_f, cm, cv, iwc, command_centers, food_used + "/" + food_cap, str(score)]
        with open('/home/kenn/Development/sc2-bot/CustomAgents/scores.txt', 'a+') as f:
            f.write('{0[0]:<15}{0[1]:<15}{0[2]:<15}{0[3]:<15}{0[4]:<15}{0[5]:<15}{0[6]:<15}{0[7]:<15}{0[8]:<15}{0[9]:<15}{0[10]:<15}{0[11]:<15}{0[12]:<15}{0[13]:<15}\n'.format(data))

        # If we score less than 4000 we are doing so poorly, we want to learn that it was very bad.
        # Symbolizes the ultimate loss.
        self.qlearn.learn(str(self.previous_state), self.previous_action, int(score) - 4000, 'terminal')

        self.previous_state = None
        self.previous_action = None
        self.move_number = 0

        self.supply_depots = 0
        self.refineries = 0
        self.builder_iterator = 0

        self.build_supply_depot_attempts = 0
        self.build_supply_depot_attempts_failed = 0
        self.build_refinery_attempts = 0
        self.build_refinery_attempts_failed = 0
        self.build_cc_attempts = 0
        self.build_cc_attempt_failed = 0

        self.initializing = 0

        self.qlearn.q_table.to_pickle(DATA_FILE + '.gz', 'gzip')

        return actions.FunctionCall(_NOOP, [])
Ejemplo n.º 3
0
    def step(self, obs):
        super(CollectMineralsAndGas, self).step(obs)

        self.unit_type = obs.observation['screen'][_UNIT_TYPE]

        if obs.last():
            return self.handle_last_action(obs)

        if obs.first():
            return self.handle_first_action(obs)

        if self.initializing < 2:
            return self.handle_initial_action(obs)

        if self.move_number == 0:
            self.update_buildings_built()

            supply_used = p.get_food_used(obs)
            supply_cap = p.get_food_cap(obs)

            scvs_left = get_approx_scvs_in_rectangle_count(self.unit_type, (0, 0), (41, 83))
            scvs_right = get_approx_scvs_in_rectangle_count(self.unit_type, (42, 0), (83, 83))

            current_state = np.zeros(6)
            # Available supply min to reduce state space
            current_state[0] = min(supply_cap - supply_used, 7)
            # Detect how many times we have 100 minerals up to maximum 4 times (Afford a CC) to reduce our state space
            current_state[1] = min(int(p.get_minerals(obs) / 100.0), 4)
            # Any idle workers min to reduce state space
            current_state[2] = min(p.get_idle_worker_count(obs), 3)
            # scvs left side to scvs right side difference
            scv_side_diff = scvs_left - scvs_right
            current_state[3] = max(-7, scv_side_diff) if scv_side_diff < 0 else min(8, scv_side_diff)
            # Number of CCs
            current_state[4] = get_command_center_amount(self.unit_type)
            # Number of supply depots
            current_state[5] = get_supply_depot_amount(self.unit_type)

            if self.previous_action is not None:
                # reward = self.get_reward(obs)
                self.qlearn.learn(str(self.previous_state), self.previous_action, 0, str(current_state))

            disallowed_actions = self.get_disallowed_actions(obs)

            rl_action = self.qlearn.choose_action(str(current_state), disallowed_actions)

            action_name = get_smart_action(int(rl_action))
            action_function, actual_action = self.apply_action(obs, action_name)

            action_id = rl_action
            if actual_action is None:
                action_function = actions.FunctionCall(_NOOP, [])
                action_id = get_action_id(ACTION_DO_NOTHING)

            self.previous_state = current_state
            self.previous_action = action_id

            self.move_number += 1
            return action_function
        elif self.move_number == 1:
            action_name = get_smart_action(self.previous_action)
            action_function, adjusted_action = self.apply_action(obs, action_name)

            if adjusted_action is None:
                action_function = actions.FunctionCall(_NOOP, [])
                self.previous_action = get_action_id(ACTION_DO_NOTHING)

            self.move_number += 1
            return action_function
        elif self.move_number == 2:
            action_name = get_smart_action(self.previous_action)
            action_function, adjusted_action = self.apply_action(obs, action_name)

            if adjusted_action is None:
                action_function = actions.FunctionCall(_NOOP, [])
                self.previous_action = get_action_id(ACTION_DO_NOTHING)

            self.move_number = 0
            return action_function

        return actions.FunctionCall(_NOOP, [])