コード例 #1
0
    def getMaxQAndActionsIn(self, QValue, size, ava_actions):
        max_Q = None
        actions = []
        for y in range(self.y_space, size - self.y_space):
            for x in range(self.x_space, size - self.x_space):
                for a in ava_actions:
                    if max_Q == None or QValue[y][x][a] > max_Q:
                        max_Q = QValue[y][x][a]
                        actions = [[[x, y], a]]
                    elif QValue[y][x][a] == max_Q:
                        actions.append([[x, y], a])

        if (self.model.time_step + 1) % 50 == 0 and size == 84:
            for y in range(0, size, 4):
                for x in range(0, size, 4):
                    #hit_points_selected
                    print(int(self.status.input_scr[y][x][2]), end=" ")
                print()

            for y in range(0, size, 4):
                for x in range(0, size, 4):
                    print(int(QValue[y][x][0] * 10) / 10, end=" ")
                print()

        return max_Q, actions
コード例 #2
0
    def update_master_policy(self,rbs,disc, lr, cter):
        samples =random.sample(rbs,batch_size)
        minimaps = []
        screens = []
        infos = []
        next_minimaps = []
        next_screens = []
        next_infos = []
        actions = []
        rewards = []
        for i,[obs,_,action,_,next_obs] in enumerate(samples):
            minimap = np.array(obs.observation['minimap'], dtype=np.float32)
            minimap = np.expand_dims(U.preprocess_minimap(minimap), axis=0)
            screen = np.array(obs.observation['screen'], dtype=np.float32)
            screen = np.expand_dims(U.preprocess_screen(screen), axis=0)
            info = np.zeros([1, self.isize], dtype=np.float32)
            info[0, obs.observation['available_actions']] = 1

            next_minimap = np.array(next_obs.observation['minimap'], dtype=np.float32)
            next_minimap = np.expand_dims(U.preprocess_minimap(next_minimap), axis=0)
            next_screen = np.array(obs.observation['screen'], dtype=np.float32)
            next_screen = np.expand_dims(U.preprocess_screen(next_screen), axis=0)
            next_info = np.zeros([1, self.isize], dtype=np.float32)
            next_info[0, obs.observation['available_actions']] = 1
            reward = next_obs.reward

            minimaps.append(minimap)
            screens.append(screen)
            infos.append(info)
            next_minimaps.append(next_minimap)
            next_screens.append(next_screen)
            next_infos.append(next_info)
            cur_action = np.zeros(num_subpolicies)
            cur_action[action]=1
            actions.append(cur_action)
            rewards.append(reward)

        minimaps = np.concatenate(minimaps, axis=0)
        screens = np.concatenate(screens, axis=0)
        infos = np.concatenate(infos, axis=0)
        next_minimaps = np.concatenate(next_minimaps, axis=0)
        next_screens = np.concatenate(next_screens, axis=0)
        next_infos = np.concatenate(next_infos, axis=0)
        y_batch = []
        Qvalue_batch =self.sess_master.run(self.subpolicy_Q,feed_dict = {self.minimap: next_minimaps,
                       self.screen: next_screens,
                       self.info: next_infos})
        for i in range(0, batch_size):
            terminal = samples[i][3]
            if terminal:
                y_batch.append(rewards[i])
            else:
                y_batch.append(rewards[i] + disc * np.max(Qvalue_batch[i]))

        self.sess_master.run(self.master_train_op, feed_dict={self.minimap:minimaps,
            self.screen:screens,
            self.info:infos,
            self.y_input:y_batch,
            self.action_input:actions,
            self.learning_rate:lr})
コード例 #3
0
    def run(self):
        observations = []
        self.rewards = []
        actions = []
        actions_spatial = []
        actions_spatial_mask = []
        available_actions = []
        batch_dones = []
        self.values = []
        probs_spatial = []
        probs = []

        for _ in range(self.num_steps):
            observations.append(self.observation)

            action_ids, spatial_indexes, value, prob, prob_spatial = self.model.predict(
                np.asarray([self.observation]).swapaxes(0, 1),
                [self.available_actions_mask])
            self.values.append(value)
            probs.append(prob)
            probs_spatial.append(prob_spatial)
            batch_dones.append(self.terminal)
            action, spatial_mask = self.make_action(action_ids[0],
                                                    spatial_indexes[0])
            actions.append(action_ids[0])
            actions_spatial.append(spatial_indexes[0])
            actions_spatial_mask.append(spatial_mask)
            available_actions.append(self.available_actions_mask)

            self.observation, reward, self.terminal, self.available_actions_mask = self.env.step(
                action)
            self.stats_recorder.after_step(reward=reward,
                                           terminal=self.terminal)
            self.rewards.append(reward)

        advantage_estimations = np.zeros_like(self.rewards)
        last_value = self.model.predict_value(self.observation)[0]

        for t in reversed(range(self.num_steps)):
            if t == self.num_steps - 1:
                self.advantage_estimation = self.estimate_advantage(
                    t, self.terminal, last_value)
            else:
                self.advantage_estimation = self.estimate_advantage(
                    t, batch_dones[t + 1], self.values[t + 1])
            advantage_estimations[t] = self.advantage_estimation

        observations = np.asarray(observations).swapaxes(0, 1)

        return observations, \
               actions, \
               available_actions, \
               actions_spatial, \
               actions_spatial_mask,\
               advantage_estimations,\
               self.values,\
               probs,\
               probs_spatial
コード例 #4
0
    def multistep(self, obs):
        super(MineralShardsMultiAgent, self).step(obs)

        # Uncomment this to see the agent taking actions step-by-step.
        # time.sleep(0.8)

        # Get list of marines
        marines = [
            unit for unit in obs.observation.feature_units
            if unit.alliance == _PLAYER_SELF
        ]

        # Bail if no marines
        if not marines:
            return [FUNCTIONS.no_op()]

        # Get list of mineral locations
        minerals = [[unit.x, unit.y] for unit in obs.observation.feature_units
                    if unit.alliance == _PLAYER_NEUTRAL]

        # Bail if no minerals
        if not minerals:
            return [FUNCTIONS.no_op()]

        # Loop through marines
        actions = []
        for marine in marines:
            marine_xy = [marine.x, marine.y]

            # Remove the previous target of the other marine from consideration
            other_targets = [
                target_xy for (tag, target_xy) in self._marine_targets.items()
                if tag != marine.tag
            ]
            other_target_xy = other_targets[0] if other_targets else (-1, -1)
            minerals_noprevious = [
                x for x in minerals if x != other_target_xy
            ] if len(minerals) > 1 else minerals

            # Find the closest mineral.
            distances = numpy.linalg.norm(numpy.array(minerals_noprevious) -
                                          numpy.array(marine_xy),
                                          axis=1)
            closest_mineral_xy = minerals_noprevious[numpy.argmin(distances)]

            # Update the target of this marine
            self._marine_targets[marine.tag.item()] = closest_mineral_xy

            # Make the action
            action = FUNCTIONS.move_unit(
                marine.tag.item(), "now", closest_mineral_xy
            )  # .item() to convert numpy.int64 to native python type (int)
            actions.append(action)

        return actions if actions else [FUNCTIONS.no_op()]
コード例 #5
0
 def move_drone_random_round_hatchery(self, drone_ids, pos):
     length = len(drone_ids)
     actions = []
     for drone in drone_ids:
         action = sc_pb.Action()
         action.action_raw.unit_command.ability_id = self._move_ability
         x = pos[0] + random.randint(self._range_low, self._range_high)
         y = pos[1] + random.randint(self._range_low, self._range_high)
         action.action_raw.unit_command.target_world_space_pos.x = x
         action.action_raw.unit_command.target_world_space_pos.y = y
         action.action_raw.unit_command.unit_tags.append(drone)
         actions.append(action)
     return actions
コード例 #6
0
    def run(self):
        observations = []
        rewards = []
        actions = []
        actions_spatial = []
        actions_spatial_mask = []
        available_actions = []
        terminals = []
        values = []

        for _ in range(self.batch_size):
            observations.append(self.observation)

            action_ids, spatial_indexes, value = self.model.predict(
                np.asarray([self.observation]).swapaxes(0, 1),
                [self.available_actions_masks])

            values.append(value)

            action, spatial_mask = self.make_action(action_ids[0],
                                                    spatial_indexes[0])
            actions.append(action_ids[0])
            actions_spatial.append(spatial_indexes[0])
            actions_spatial_mask.append(spatial_mask)
            available_actions.append(self.available_actions_masks)

            self.observation, reward, terminal, self.available_actions_masks = self.env.step(
                action)
            self.stats_recorder.after_step(reward=reward, terminal=terminal)
            rewards.append(reward)
            terminals.append(terminal)

        if terminals[-1] == 0:
            next_value = self.model.predict_value(self.observation)[0]
            discounted_rewards = self.discount(rewards + [next_value],
                                               terminals + [False],
                                               self.discount_rate)[:-1]
        else:
            discounted_rewards = self.discount(rewards, terminals,
                                               self.discount_rate)

        observations = np.asarray(observations).swapaxes(0, 1)
        self.model.train(observations, actions, available_actions,
                         actions_spatial, actions_spatial_mask,
                         discounted_rewards, values)
def prepare_training_inputs(sampled_exps, device='cpu'):
    states = []
    actions = []
    rewards = []
    next_states = []
    dones = []
    for sampled_exp in sampled_exps:
        states.append(sampled_exp[0])
        actions.append(sampled_exp[1])
        rewards.append(sampled_exp[2])
        next_states.append(sampled_exp[3])
        dones.append(sampled_exp[4])

    states = torch.cat(states, dim=0).float().to(device)
    actions = torch.cat(actions, dim=0).to(device)
    rewards = torch.cat(rewards, dim=0).float().to(device)
    next_states = torch.cat(next_states, dim=0).float().to(device)
    dones = torch.cat(dones, dim=0).float().to(device)
    return states, actions, rewards, next_states, dones
コード例 #8
0
ファイル: my_agent.py プロジェクト: rsun0/sc2rl
    def gradient_step(self, batch_size):
        states = []
        actions = []
        rewards = []
        next_states = []
        length = len(self.replay_memory)
        for i in range(batch_size):
            index = random.randint(0, length - 1)
            length2 = len(self.replay_memory[index])
            while (len(self.replay_memory[index]) == 0):
                index = random.randint(0, length - 1)
                length2 = len(self.replay_memory[index])
            index2 = random.randint(0, length2 - 1)
            states.append(self.replay_memory[index][index2][0])
            actions.append(self.replay_memory[index][index2][1])
            rewards.append(self.replay_memory[index][index2][2])
            next_states.append(self.replay_memory[index][index2][3])
        states = np.array(states)
        actions = self.action_mask(actions)
        rewards = np.array(rewards)
        next_states = np.array(next_states)

        s = states.shape
        states = states.reshape((s[0], 1, s[1], s[2], s[3]))
        next_states = next_states.reshape((s[0], 1, s[1], s[2], s[3]))

        states = Variable(torch.from_numpy(states))
        states = states.type(torch.DoubleTensor)
        next_states = Variable(torch.from_numpy(next_states))
        next_states = next_states.type(torch.DoubleTensor)

        targets = self.compute_targets(rewards, next_states)
        targets = Variable(torch.from_numpy(targets))
        targets = targets.type(torch.DoubleTensor)

        output = self.policy(states)
        output = torch.masked_select(output, actions)
        self.optimizer.zero_grad()
        error = self.policy.loss(output, targets)
        error.backward()
        self.optimizer.step()
        print("The error is " + str(error.data.numpy()[0]))
        return error.data.numpy()[0]
コード例 #9
0
ファイル: runner.py プロジェクト: saschaschramm/MoveToBeacon
    def run(self):
        observations = []
        rewards = []
        actions = []
        actions_spatial = []
        actions_spatial_mask = []
        available_action_masks = []
        terminals = []

        for _ in range(self.batch_size):
            observations.append(self.observation)
            action_ids, spatial_indexes = self.model.predict(
                np.asarray([self.observation]).swapaxes(0, 1),
                [self.available_actions_mask])

            action, spatial_mask = self.make_action(action_ids[0],
                                                    spatial_indexes[0])
            actions.append(action_ids[0])
            actions_spatial.append(spatial_indexes[0])
            actions_spatial_mask.append(spatial_mask)
            available_action_masks.append(self.available_actions_mask)
            self.observation, reward, terminal, self.available_actions_mask = self.env.step(
                action)
            self.stats_recorder.after_step(reward=reward, terminal=terminal)
            rewards.append(reward)
            terminals.append(terminal)

        rewards = self.discount(rewards, terminals, self.discount_rate)
        observations = np.asarray(observations).swapaxes(0, 1)

        self.model.train(observations=observations,
                         actions=actions,
                         available_actions_masks=available_action_masks,
                         actions_spatial=actions_spatial,
                         actions_spatial_masks=actions_spatial_mask,
                         rewards=rewards)
コード例 #10
0
def group_init_queue(player_relative):

    actions = []

    player_x, player_y = (player_relative == _PLAYER_FRIENDLY).nonzero()
    # try:
    #
    #   player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
    #   actions.append({"base_action":_SELECT_ARMY, "sub7":_SELECT_ALL})
    #
    # except Exception as e:
    #   print(e)
    # for i in range(len(player_x)):
    #   if i % 4 != 0:
    #     continue
    #
    #   xy = [player_x[i], player_y[i]]
    #   actions.append({"base_action":_SELECT_POINT, "sub6":0, "x0":xy[0], "y0":xy[1]})

    group_id = 0
    group_list = []
    unit_xy_list = []
    for i in range(len(player_x)):

        if group_id > 9:
            break

        xy = [player_x[i], player_y[i]]
        unit_xy_list.append(xy)
        # 2/select_point (6/select_point_act [4]; 0/screen [84, 84])
        # 4/select_control_group (4/control_group_act [5]; 5/control_group_id [10])
        if (len(unit_xy_list) >= 1):
            for idx, xy in enumerate(unit_xy_list):
                if (idx == 0):
                    actions.append({
                        "base_action": _SELECT_POINT,
                        "sub6": 0,
                        "x0": xy[0],
                        "y0": xy[1]
                    })
                else:
                    actions.append({
                        "base_action": _SELECT_POINT,
                        "sub6": 1,
                        "x0": xy[0],
                        "y0": xy[1]
                    })

            actions.append({
                "base_action": _SELECT_CONTROL_GROUP,
                "sub4": _CONTROL_GROUP_SET,
                "sub5": group_id
            })
            unit_xy_list = []

            group_list.append(group_id)
            group_id += 1

    if (len(unit_xy_list) >= 1):
        for idx, xy in enumerate(unit_xy_list):
            if (idx == 0):
                actions.append({
                    "base_action": _SELECT_POINT,
                    "sub6": 0,
                    "x0": xy[0],
                    "y0": xy[1]
                })
            else:
                actions.append({
                    "base_action": _SELECT_POINT,
                    "sub6": 1,
                    "x0": xy[0],
                    "y0": xy[1]
                })

        actions.append({
            "base_action": _SELECT_CONTROL_GROUP,
            "sub4": _CONTROL_GROUP_SET,
            "sub5": group_id
        })

        group_list.append(group_id)
        group_id += 1

    return actions
コード例 #11
0
def solve_tsp(player_relative, selected, group_list, group_id, dest_per_marine,
              xy_per_marine):

    my_dest = None
    other_dest = None
    closest, min_dist = None, None
    actions = []
    neutral_y, neutral_x = (player_relative == 1).nonzero()
    player_y, player_x = (selected == 1).nonzero()

    #for group_id in group_list:
    if "0" in dest_per_marine and "1" in dest_per_marine:
        if group_id == 0:
            my_dest = dest_per_marine["0"]
            other_dest = dest_per_marine["1"]
        else:
            my_dest = dest_per_marine["1"]
            other_dest = dest_per_marine["0"]

    if len(player_x) > 0:
        if group_id == 0:
            xy_per_marine["1"] = [int(player_x.mean()), int(player_y.mean())]
        else:
            xy_per_marine["0"] = [int(player_x.mean()), int(player_y.mean())]

        player = xy_per_marine[str(group_id)]
        points = [player]

        for p in zip(neutral_x, neutral_y):

            if other_dest:
                dist = np.linalg.norm(np.array(other_dest) - np.array(p))
                if dist < 10:
                    # print("continue since partner will take care of it ", p)
                    continue

            pp = [p[0], p[1]]
            if pp not in points:
                points.append(pp)

            dist = np.linalg.norm(np.array(player) - np.array(p))
            if not min_dist or dist < min_dist:
                closest, min_dist = p, dist

        solve_tsp = False
        if my_dest:
            dist = np.linalg.norm(np.array(player) - np.array(my_dest))
            if dist < 0.5:
                solve_tsp = True

        if my_dest is None:
            solve_tsp = True

        if len(points) < 2:
            solve_tsp = False

        if solve_tsp:
            # function for printing best found solution when it is found
            from time import clock
            init = clock()

            def report_sol(obj, s=""):
                print("cpu:%g\tobj:%g\ttour:%s" % \
                      (clock(), obj, s))

            n, D = mk_matrix(points, distL2)
            niter = 50
            tour, z = multistart_localsearch(niter, n, D)

            left, right = None, None
            for idx in tour:
                if tour[idx] == 0:
                    if idx == len(tour) - 1:
                        right = points[tour[0]]
                        left = points[tour[idx - 1]]
                    elif idx == 0:
                        right = points[tour[idx + 1]]
                        left = points[tour[len(tour) - 1]]
                    else:
                        right = points[tour[idx + 1]]
                        left = points[tour[idx - 1]]

            left_d = np.linalg.norm(np.array(player) - np.array(left))
            right_d = np.linalg.norm(np.array(player) - np.array(right))
            if right_d > left_d:
                closest = left
            else:
                closest = right

        #print("optimal next :" , closest)
        dest_per_marine[str(group_id)] = closest
        #print("dest_per_marine", self.dest_per_marine)
        #dest_per_marine {'0': [56, 26], '1': [52, 6]}

        if closest:
            if group_id == 0:
                actions.append({
                    "base_action": group_id,
                    "x0": closest[0],
                    "y0": closest[1]
                })
            else:
                actions.append({
                    "base_action": group_id,
                    "x1": closest[0],
                    "y1": closest[1]
                })

        elif my_dest:
            if group_id == 0:
                actions.append({
                    "base_action": group_id,
                    "x0": my_dest[0],
                    "y0": my_dest[1]
                })
            else:
                actions.append({
                    "base_action": group_id,
                    "x1": my_dest[0],
                    "y1": my_dest[1]
                })
        else:
            if group_id == 0:
                actions.append({"base_action": 2, "x0": 0, "y0": 0})
            else:
                actions.append({"base_action": 2, "x1": 0, "y1": 0})

    # elif(len(group_list)>0):
    #
    #   group_id = random.randint(0,len(group_list)-1)
    #   actions.append({"base_action":group_id})

    if group_id == 0:
        group_id = 1
    else:
        group_id = 0

    if "0" not in xy_per_marine:
        xy_per_marine["0"] = [0, 0]
    if "1" not in xy_per_marine:
        xy_per_marine["1"] = [0, 0]

    return actions, group_id, dest_per_marine, xy_per_marine
コード例 #12
0
ファイル: common.py プロジェクト: moon0823/pysc2-examples
def group_init_queue(player_relative):

  actions = []

  player_x, player_y = (player_relative == _PLAYER_FRIENDLY).nonzero()
  # try:
  #
  #   player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
  #   actions.append({"base_action":_SELECT_ARMY, "sub7":_SELECT_ALL})
  #
  # except Exception as e:
  #   print(e)
  # for i in range(len(player_x)):
  #   if i % 4 != 0:
  #     continue
  #
  #   xy = [player_x[i], player_y[i]]
  #   actions.append({"base_action":_SELECT_POINT, "sub6":0, "x0":xy[0], "y0":xy[1]})

  group_id = 0
  group_list = []
  unit_xy_list = []
  for i in range(len(player_x)):

    if group_id > 9:
      break

    xy = [player_x[i], player_y[i]]
    unit_xy_list.append(xy)
    # 2/select_point (6/select_point_act [4]; 0/screen [84, 84])
    # 4/select_control_group (4/control_group_act [5]; 5/control_group_id [10])
    if (len(unit_xy_list) >= 1):
      for idx, xy in enumerate(unit_xy_list):
        if (idx == 0):
          actions.append({
            "base_action": _SELECT_POINT,
            "sub6": 0,
            "x0": xy[0],
            "y0": xy[1]
          })
        else:
          actions.append({
            "base_action": _SELECT_POINT,
            "sub6": 1,
            "x0": xy[0],
            "y0": xy[1]
          })

      actions.append({
        "base_action": _SELECT_CONTROL_GROUP,
        "sub4": _CONTROL_GROUP_SET,
        "sub5": group_id
      })
      unit_xy_list = []

      group_list.append(group_id)
      group_id += 1

  if (len(unit_xy_list) >= 1):
    for idx, xy in enumerate(unit_xy_list):
      if (idx == 0):
        actions.append({
          "base_action": _SELECT_POINT,
          "sub6": 0,
          "x0": xy[0],
          "y0": xy[1]
        })
      else:
        actions.append({
          "base_action": _SELECT_POINT,
          "sub6": 1,
          "x0": xy[0],
          "y0": xy[1]
        })

    actions.append({
      "base_action": _SELECT_CONTROL_GROUP,
      "sub4": _CONTROL_GROUP_SET,
      "sub5": group_id
    })

    group_list.append(group_id)
    group_id += 1

  return actions
コード例 #13
0
ファイル: common.py プロジェクト: moon0823/pysc2-examples
def solve_tsp(
    player_relative, 
    selected, 
    group_list, 
    group_id, 
    dest_per_marine,
    xy_per_marine):
  
  my_dest = None
  other_dest = None
  closest, min_dist = None, None
  actions = []
  neutral_y, neutral_x = (player_relative == 1).nonzero()
  player_y, player_x = (selected == 1).nonzero()

  #for group_id in group_list:
  if "0" in dest_per_marine and "1" in dest_per_marine:
    if group_id == 0:
      my_dest = dest_per_marine["0"]
      other_dest = dest_per_marine["1"]
    else:
      my_dest = dest_per_marine["1"]
      other_dest = dest_per_marine["0"]

  if len(player_x) > 0:
    if group_id == 0:
      xy_per_marine["1"] = [int(player_x.mean()), int(player_y.mean())]
    else:
      xy_per_marine["0"] = [int(player_x.mean()), int(player_y.mean())]

    player = xy_per_marine[str(group_id)]
    points = [player]

    for p in zip(neutral_x, neutral_y):

      if other_dest:
        dist = np.linalg.norm(np.array(other_dest) - np.array(p))
        if dist < 10:
          # print("continue since partner will take care of it ", p)
          continue

      pp = [p[0], p[1]]
      if pp not in points:
        points.append(pp)

      dist = np.linalg.norm(np.array(player) - np.array(p))
      if not min_dist or dist < min_dist:
        closest, min_dist = p, dist

    solve_tsp = False
    if my_dest:
      dist = np.linalg.norm(np.array(player) - np.array(my_dest))
      if dist < 0.5:
        solve_tsp = True

    if my_dest is None:
      solve_tsp = True

    if len(points) < 2:
      solve_tsp = False

    if solve_tsp:
      # function for printing best found solution when it is found
      from time import clock
      init = clock()

      def report_sol(obj, s=""):
        print("cpu:%g\tobj:%g\ttour:%s" % \
              (clock(), obj, s))

      n, D = mk_matrix(points, distL2)
      niter = 50
      tour, z = multistart_localsearch(niter, n, D)

      left, right = None, None
      for idx in tour:
        if tour[idx] == 0:
          if idx == len(tour) - 1:
            right = points[tour[0]]
            left = points[tour[idx - 1]]
          elif idx == 0:
            right = points[tour[idx + 1]]
            left = points[tour[len(tour) - 1]]
          else:
            right = points[tour[idx + 1]]
            left = points[tour[idx - 1]]

      left_d = np.linalg.norm(np.array(player) - np.array(left))
      right_d = np.linalg.norm(np.array(player) - np.array(right))
      if right_d > left_d:
        closest = left
      else:
        closest = right

    #print("optimal next :" , closest)
    dest_per_marine[str(group_id)] = closest
    #print("dest_per_marine", self.dest_per_marine)
    #dest_per_marine {'0': [56, 26], '1': [52, 6]}

    if closest:
      if group_id == 0:
        actions.append({
            "base_action": group_id,
            "x0": closest[0],
            "y0": closest[1]
        })
      else:
        actions.append({
            "base_action": group_id,
            "x1": closest[0],
            "y1": closest[1]
        })

    elif my_dest:
      if group_id == 0:
        actions.append({
            "base_action": group_id,
            "x0": my_dest[0],
            "y0": my_dest[1]
        })
      else:
        actions.append({
            "base_action": group_id,
            "x1": my_dest[0],
            "y1": my_dest[1]
        })
    else:
      if group_id == 0:
        actions.append({
            "base_action": 2,
            "x0": 0,
            "y0": 0
        })
      else:
        actions.append({
            "base_action": 2,
            "x1": 0,
            "y1": 0
        })

  # elif(len(group_list)>0):
  #
  #   group_id = random.randint(0,len(group_list)-1)
  #   actions.append({"base_action":group_id})

  if group_id == 0:
    group_id = 1
  else:
    group_id = 0

  if "0" not in xy_per_marine:
    xy_per_marine["0"] = [0, 0]
  if "1" not in xy_per_marine:
    xy_per_marine["1"] = [0, 0]

  return actions, group_id, dest_per_marine, xy_per_marine
コード例 #14
0
def solve_tsp(player_relative, selected, group_list, group_id, dest_per_marine):
  my_dest = None
  other_dest = None
  closest, min_dist = None, None
  actions = []
  neutral_y, neutral_x = (player_relative == 1).nonzero()
  player_y, player_x = (selected == 1).nonzero()

  #for group_id in group_list:
  if("0" in dest_per_marine and "1" in dest_per_marine):
    if(group_id == 0):
      my_dest = dest_per_marine["0"]
      other_dest = dest_per_marine["1"]
    else:
      my_dest = dest_per_marine["1"]
      other_dest = dest_per_marine["0"]

  r = random.randint(0,1)

  if(len(player_x)>0) and r == 0 :

    player = [int(player_x.mean()), int(player_y.mean())]
    points = [player]

    for p in zip(neutral_x, neutral_y):

      if(other_dest):
        dist = np.linalg.norm(np.array(other_dest) - np.array(p))
        if(dist<10):
          # print("continue since partner will take care of it ", p)
          continue

      pp = [p[0]//2*2, p[1]//2*2]
      if(pp not in points):
        points.append(pp)

      dist = np.linalg.norm(np.array(player) - np.array(p))
      if not min_dist or dist < min_dist:
        closest, min_dist = p, dist


    solve_tsp = False
    if(my_dest):
      dist = np.linalg.norm(np.array(player) - np.array(my_dest))
      if(dist < 2):
        solve_tsp = True

    if(my_dest is None):
      solve_tsp = True

    if(len(points)< 2):
      solve_tsp = False

    if(solve_tsp):
      # function for printing best found solution when it is found
      from time import clock
      init = clock()
      def report_sol(obj, s=""):
        print("cpu:%g\tobj:%g\ttour:%s" % \
              (clock(), obj, s))

      #print("points: %s" % points)
      n, D = mk_matrix(points, distL2)
      # multi-start local search
      #print("random start local search:", n)
      niter = 50
      tour,z = multistart_localsearch(niter, n, D)

      #print("best found solution (%d iterations): z = %g" % (niter, z))
      #print(tour)

      left, right = None, None
      for idx in tour:
        if(tour[idx] == 0):
          if(idx == len(tour) - 1):
            #print("optimal next : ", tour[0])
            right = points[tour[0]]
            left = points[tour[idx-1]]
          elif(idx==0):
            #print("optimal next : ", tour[idx+1])
            right = points[tour[idx+1]]
            left = points[tour[len(tour)-1]]
          else:
            #print("optimal next : ", tour[idx+1])
            right = points[tour[idx+1]]
            left = points[tour[idx-1]]

      left_d = np.linalg.norm(np.array(player) - np.array(left))
      right_d = np.linalg.norm(np.array(player) - np.array(right))
      if(right_d > left_d):
        closest = left
      else:
        closest = right

    #print("optimal next :" , closest)
    dest_per_marine[str(group_id)] = closest
    #print("dest_per_marine", self.dest_per_marine)
    #dest_per_marine {'0': [56, 26], '1': [52, 6]}

    if(closest):
      actions.append({"base_action":2,
                      "x0": closest[0], "y0": closest[1]})
  elif(len(group_list)>0):

    group_id = random.randint(0,len(group_list)-1)
    actions.append({"base_action":group_id})
  return actions, group_id, dest_per_marine
コード例 #15
0
def main(argv):
    import a3c.common.a3c
    scenarios.load_scenarios()
    run_config = run_configs.get()

    interface = sc_pb.InterfaceOptions()
    interface.raw = False
    interface.score = True
    interface.feature_layer.width = 24
    interface.feature_layer.resolution.x = FLAGS.screen_resolution
    interface.feature_layer.resolution.y = FLAGS.screen_resolution
    interface.feature_layer.minimap_resolution.x = 64
    interface.feature_layer.minimap_resolution.y = 64

    queue = FakeQueue()
    #
    shared.gamma_n = FLAGS.gamma ** FLAGS.n_step_return
    env = helpers.get_env_wrapper(False)
    s_space = env.observation_space.shape

    none_state = np.zeros(s_space)
    none_state = none_state.reshape(s_space)
    replay_agent = Agent(env.action_space.n, t_queue=queue, none_state=none_state)

    for fname in glob.glob(os.path.join(FLAGS.dir, '*.SC2Replay')):
        replay_data = run_config.replay_data(fname)
        start_replay = sc_pb.RequestStartReplay(
            replay_data=replay_data,
            options=interface,
            disable_fog=True,
            observed_player_id=1)
        game_version = get_game_version(replay_data)
        with run_config.start(game_version=game_version,
                              full_screen=False) as controller:


            controller.start_replay(start_replay)
            feat = features.Features(controller.game_info())

            obs = controller.observe()
            s = get_obs(env._input_layers, obs)
            results = 0
            last_reward = 0
            while True:
                actions = []
                for a in obs.actions:
                    try:
                        temp = feat.reverse_action(a)
                        x = 0
                        y = 0
                        if temp[0] not in [0, 7]:
                            x = temp.arguments[1][0]
                            y = temp.arguments[1][1]
                        actions.append([env._actions.index(temp[0]), x ,y])
                    except ValueError:
                        pass

                if len(actions) < 1:
                    try:
                        controller.step(FLAGS.step_mul)
                    except ProtocolError:
                        break;

                    obs = controller.observe()
                    s = get_obs(env._input_layers, obs)
                    continue

                r = obs.observation.score.score

                controller.step(FLAGS.step_mul)
                obs = controller.observe()

                s_ = get_obs(env._input_layers, obs)

                if r == 0 and last_reward != 0:
                    s_ = None
                    print('Episode end')

                results += 1

                if not FLAGS.raw_rewards:
                    replay_agent.train(s, actions[0][0], actions[0][1], actions[0][2], r, s_)
                else:
                    queue.put([s, actions[0][0], actions[0][1], actions[0][2], r, s_])

                if obs.player_result:
                    break

                if r == 0 and last_reward != 0:
                    last_reward = 0
                else:
                    s = s_
                    last_reward = r

    with gzip.open('./replay_info/info.gz', 'wb+') as outfile:
        print('pushed: {}'.format(results))
        #json.dump(queue.get(), outfile)
        pickle.dump(queue.get(), outfile)