Ejemplo n.º 1
0
def learn(env,
          q_func,
          num_actions=3,
          lr=5e-4,
          max_timesteps=100000,
          buffer_size=50000,
          exploration_fraction=0.1,
          exploration_final_eps=0.02,
          train_freq=1,
          batch_size=32,
          print_freq=1,
          checkpoint_freq=10000,
          learning_starts=1000,
          gamma=1.0,
          target_network_update_freq=500,
          prioritized_replay=False,
          prioritized_replay_alpha=0.6,
          prioritized_replay_beta0=0.4,
          prioritized_replay_beta_iters=None,
          prioritized_replay_eps=1e-6,
          num_cpu=16,
          param_noise=False,
          param_noise_threshold=0.05,
          callback=None,
          demo_replay=[]
          ):
  """Train a deepq model.

  Parameters
  -------
  env: pysc2.env.SC2Env
      environment to train on
  q_func: (tf.Variable, int, str, bool) -> tf.Variable
      the model that takes the following inputs:
          observation_in: object
              the output of observation placeholder
          num_actions: int
              number of actions
          scope: str
          reuse: bool
              should be passed to outer variable scope
      and returns a tensor of shape (batch_size, num_actions) with values of every action.
  lr: float
      learning rate for adam optimizer
  max_timesteps: int
      number of env steps to optimizer for
  buffer_size: int
      size of the replay buffer
  exploration_fraction: float
      fraction of entire training period over which the exploration rate is annealed
  exploration_final_eps: float
      final value of random action probability
  train_freq: int
      update the model every `train_freq` steps.
      set to None to disable printing
  batch_size: int
      size of a batched sampled from replay buffer for training
  print_freq: int
      how often to print out training progress
      set to None to disable printing
  checkpoint_freq: int
      how often to save the model. This is so that the best version is restored
      at the end of the training. If you do not wish to restore the best version at
      the end of the training set this variable to None.
  learning_starts: int
      how many steps of the model to collect transitions for before learning starts
  gamma: float
      discount factor
  target_network_update_freq: int
      update the target network every `target_network_update_freq` steps.
  prioritized_replay: True
      if True prioritized replay buffer will be used.
  prioritized_replay_alpha: float
      alpha parameter for prioritized replay buffer
  prioritized_replay_beta0: float
      initial value of beta for prioritized replay buffer
  prioritized_replay_beta_iters: int
      number of iterations over which beta will be annealed from initial value
      to 1.0. If set to None equals to max_timesteps.
  prioritized_replay_eps: float
      epsilon to add to the TD errors when updating priorities.
  num_cpu: int
      number of cpus to use for training
  callback: (locals, globals) -> None
      function called at every steps with state of the algorithm.
      If callback returns true training stops.

  Returns
  -------
  act: ActWrapper
      Wrapper over act function. Adds ability to save it and load it.
      See header of baselines/deepq/categorical.py for details on the act function.
  """
  # Create all the functions necessary to train the model

  sess = U.make_session(num_cpu=num_cpu)
  sess.__enter__()

  def make_obs_ph(name):
    return U.BatchInput((64, 64), name=name)

  act, train, update_target, debug = deepq.build_train(
    make_obs_ph=make_obs_ph,
    q_func=q_func,
    num_actions=num_actions,
    optimizer=tf.train.AdamOptimizer(learning_rate=lr),
    gamma=gamma,
    grad_norm_clipping=10
  )
  act_params = {
    'make_obs_ph': make_obs_ph,
    'q_func': q_func,
    'num_actions': num_actions,
  }

  # Create the replay buffer
  if prioritized_replay:
    replay_buffer = PrioritizedReplayBuffer(buffer_size, alpha=prioritized_replay_alpha)
    if prioritized_replay_beta_iters is None:
      prioritized_replay_beta_iters = max_timesteps
    beta_schedule = LinearSchedule(prioritized_replay_beta_iters,
                                   initial_p=prioritized_replay_beta0,
                                   final_p=1.0)
  else:
    replay_buffer = ReplayBuffer(buffer_size)
    beta_schedule = None
  # Create the schedule for exploration starting from 1.
  exploration = LinearSchedule(schedule_timesteps=int(exploration_fraction * max_timesteps),
                               initial_p=1.0,
                               final_p=exploration_final_eps)

  # Initialize the parameters and copy them to the target network.
  U.initialize()
  update_target()

  episode_rewards = [0.0]
  saved_mean_reward = None

  obs = env.reset()
  # Select all marines first

  player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE]

  screen = player_relative

  obs = common.init(env, obs)

  group_id = 0
  reset = True
  with tempfile.TemporaryDirectory() as td:
    model_saved = False
    model_file = os.path.join(td, "model")

    for t in range(max_timesteps):
      if callback is not None:
        if callback(locals(), globals()):
          break
      # Take action and update exploration to the newest value
      kwargs = {}
      if not param_noise:
        update_eps = exploration.value(t)
        update_param_noise_threshold = 0.
      else:
        update_eps = 0.
        if param_noise_threshold >= 0.:
          update_param_noise_threshold = param_noise_threshold
        else:
          # Compute the threshold such that the KL divergence between perturbed and non-perturbed
          # policy is comparable to eps-greedy exploration with eps = exploration.value(t).
          # See Appendix C.1 in Parameter Space Noise for Exploration, Plappert et al., 2017
          # for detailed explanation.
          update_param_noise_threshold = -np.log(1. - exploration.value(t) + exploration.value(t) / float(num_actions))
        kwargs['reset'] = reset
        kwargs['update_param_noise_threshold'] = update_param_noise_threshold
        kwargs['update_param_noise_scale'] = True

      # custom process for DefeatZerglingsAndBanelings

      obs, screen, player = common.select_marine(env, obs)

      action = act(np.array(screen)[None], update_eps=update_eps, **kwargs)[0]
      reset = False
      rew = 0

      new_action = None

      obs, new_action = common.marine_action(env, obs, player, action)
      army_count = env._obs.observation.player_common.army_count

      try:
        if army_count > 0 and _ATTACK_SCREEN in obs[0].observation["available_actions"]:
          obs = env.step(actions=new_action)
        else:
          new_action = [sc2_actions.FunctionCall(_NO_OP, [])]
          obs = env.step(actions=new_action)
      except Exception as e:
        #print(e)
        1 # Do nothing

      player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE]
      new_screen = player_relative

      rew += obs[0].reward

      done = obs[0].step_type == environment.StepType.LAST

      selected = obs[0].observation["screen"][_SELECTED]
      player_y, player_x = (selected == _PLAYER_FRIENDLY).nonzero()

      if(len(player_y)>0):
        player = [int(player_x.mean()), int(player_y.mean())]

      if(len(player) == 2):

        if(player[0]>32):
          new_screen = common.shift(LEFT, player[0]-32, new_screen)
        elif(player[0]<32):
          new_screen = common.shift(RIGHT, 32 - player[0], new_screen)

        if(player[1]>32):
          new_screen = common.shift(UP, player[1]-32, new_screen)
        elif(player[1]<32):
          new_screen = common.shift(DOWN, 32 - player[1], new_screen)

      # Store transition in the replay buffer.
      replay_buffer.add(screen, action, rew, new_screen, float(done))
      screen = new_screen

      episode_rewards[-1] += rew
      reward = episode_rewards[-1]

      if done:
        print("Episode Reward : %s" % episode_rewards[-1])
        obs = env.reset()
        player_relative = obs[0].observation["screen"][_PLAYER_RELATIVE]

        screen = player_relative

        group_list = common.init(env, obs)

        # Select all marines first
        #env.step(actions=[sc2_actions.FunctionCall(_SELECT_UNIT, [_SELECT_ALL])])
        episode_rewards.append(0.0)

        reset = True

      if t > learning_starts and t % train_freq == 0:
        # Minimize the error in Bellman's equation on a batch sampled from replay buffer.
        if prioritized_replay:
          experience = replay_buffer.sample(batch_size, beta=beta_schedule.value(t))
          (obses_t, actions, rewards, obses_tp1, dones, weights, batch_idxes) = experience
        else:
          obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(batch_size)
          weights, batch_idxes = np.ones_like(rewards), None
        td_errors = train(obses_t, actions, rewards, obses_tp1, dones, weights)
        if prioritized_replay:
          new_priorities = np.abs(td_errors) + prioritized_replay_eps
          replay_buffer.update_priorities(batch_idxes, new_priorities)

      if t > learning_starts and t % target_network_update_freq == 0:
        # Update target network periodically.
        update_target()

      mean_100ep_reward = round(np.mean(episode_rewards[-101:-1]), 1)
      num_episodes = len(episode_rewards)
      if done and print_freq is not None and len(episode_rewards) % print_freq == 0:
        logger.record_tabular("steps", t)
        logger.record_tabular("episodes", num_episodes)
        logger.record_tabular("reward", reward)
        logger.record_tabular("mean 100 episode reward", mean_100ep_reward)
        logger.record_tabular("% time spent exploring", int(100 * exploration.value(t)))
        logger.dump_tabular()

      if (checkpoint_freq is not None and t > learning_starts and
              num_episodes > 100 and t % checkpoint_freq == 0):
        if saved_mean_reward is None or mean_100ep_reward > saved_mean_reward:
          if print_freq is not None:
            logger.log("Saving model due to mean reward increase: {} -> {}".format(
              saved_mean_reward, mean_100ep_reward))
          U.save_state(model_file)
          model_saved = True
          saved_mean_reward = mean_100ep_reward
    if model_saved:
      if print_freq is not None:
        logger.log("Restored model with mean reward: {}".format(saved_mean_reward))
      U.load_state(model_file)

  return ActWrapper(act)
Ejemplo n.º 2
0
    def step(self, obs):
        super(CollectMineralShards, self).step(obs)

        if (len(self.group_list) == 0
                or common.check_group_list(self.env, [obs])):
            print("init group list")
            obs = common.init(self.env, [obs])[0]
            self.group_list = common.update_group_list([obs])

        #print("group_list ", self.group_list)
        player_relative = obs.observation["screen"][_PLAYER_RELATIVE]
        neutral_y, neutral_x = (player_relative == _PLAYER_NEUTRAL).nonzero()
        player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
        if not neutral_y.any() or not player_y.any():
            return actions.FunctionCall(_NO_OP, [])

        r = random.randint(0, 1)

        if _MOVE_SCREEN in obs.observation["available_actions"] and r == 0:

            selected = obs.observation["screen"][_SELECTED]
            player_y, player_x = (selected == _PLAYER_FRIENDLY).nonzero()

            player = [int(player_x.mean()), int(player_y.mean())]
            points = [player]
            closest, min_dist = None, None
            other_dest = None
            my_dest = None
            if ("0" in self.dest_per_marine and "1" in self.dest_per_marine):
                if (self.group_id == 0):
                    my_dest = self.dest_per_marine["0"]
                    other_dest = self.dest_per_marine["1"]
                elif (self.group_id == 1):
                    other_dest = self.dest_per_marine["0"]
                    my_dest = self.dest_per_marine["1"]

            for p in zip(neutral_x, neutral_y):

                if (other_dest):
                    dist = numpy.linalg.norm(
                        numpy.array(other_dest) - numpy.array(p))
                    if (dist < 5):
                        #print("continue since partner will take care of it ", p)
                        continue

                pp = [p[0] // 2 * 2, p[1] // 2 * 2]
                if (pp not in points):
                    points.append(pp)

                dist = numpy.linalg.norm(numpy.array(player) - numpy.array(p))
                if not min_dist or dist < min_dist:
                    closest, min_dist = p, dist

            solve_tsp = False
            if (my_dest):
                dist = numpy.linalg.norm(
                    numpy.array(player) - numpy.array(my_dest))
                if (dist < 2):
                    solve_tsp = True

            if (my_dest is None):
                solve_tsp = True

            if (len(points) < 2):
                solve_tsp = False

            if (solve_tsp):
                # function for printing best found solution when it is found
                from time import clock
                init = clock()

                def report_sol(obj, s=""):
                    print("cpu:%g\tobj:%g\ttour:%s" % \
                          (clock(), obj, s))

                #print("points: %s" % points)
                n, D = mk_matrix(points, distL2)
                # multi-start local search
                #print("random start local search:", n)
                niter = 50
                tour, z = multistart_localsearch(niter, n, D)

                #print("best found solution (%d iterations): z = %g" % (niter, z))
                #print(tour)

                left, right = None, None
                for idx in tour:
                    if (tour[idx] == 0):
                        if (idx == len(tour) - 1):
                            #print("optimal next : ", tour[0])
                            right = points[tour[0]]
                            left = points[tour[idx - 1]]
                        elif (idx == 0):
                            #print("optimal next : ", tour[idx+1])
                            right = points[tour[idx + 1]]
                            left = points[tour[len(tour) - 1]]
                        else:
                            #print("optimal next : ", tour[idx+1])
                            right = points[tour[idx + 1]]
                            left = points[tour[idx - 1]]

                left_d = numpy.linalg.norm(
                    numpy.array(player) - numpy.array(left))
                right_d = numpy.linalg.norm(
                    numpy.array(player) - numpy.array(right))
                if (right_d > left_d):
                    closest = left
                else:
                    closest = right

            #print("optimal next :" , closest)
            self.dest_per_marine[str(self.group_id)] = closest
            #print("dest_per_marine", self.dest_per_marine)
            #dest_per_marine {'0': [56, 26], '1': [52, 6]}

            if (closest is None):
                return actions.FunctionCall(_NO_OP, [])

            return actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, closest])
        elif (len(self.group_list) > 0):
            player_p = []
            for p in zip(player_x, player_y):
                if (p not in player_p):
                    player_p.append(p)

            self.group_id = random.randint(0, len(self.group_list) - 1)
            return actions.FunctionCall(
                _SELECT_CONTROL_GROUP,
                [[_CONTROL_GROUP_RECALL], [int(self.group_id)]])
        else:
            return actions.FunctionCall(_NO_OP, [])
def worker(remote, map_name, i):

    with sc2_env.SC2Env(map_name=map_name,
                        step_mul=1,
                        screen_size_px=(32, 32),
                        minimap_size_px=(32, 32)) as env:
        available_actions = None
        result = None
        group_list = []
        while True:
            cmd, data = remote.recv()
            if cmd == 'step':
                # if(common.check_group_list(env, result)):
                #   result, xy_per_marine = common.init(env,result)

                reward = 0

                if (len(group_list) == 0
                        or common.check_group_list(env, result)):
                    print("init group list")
                    result, xy_per_marine = common.init(env, result)
                    group_list = common.update_group_list(result)

                action1 = data[0][0]
                action2 = data[0][1]
                func = actions.FUNCTIONS[action1[0]]
                #print("agent(",i," ) action : ", action1, " func : ", func)
                func = actions.FUNCTIONS[action2[0]]
                #print("agent(",i," ) action : ", action2, " func : ", func, "xy :", action2[1][1])
                x, y = action2[1][1]
                move = True
                if (x == 0 and y == 0):
                    move = False
                result = env.step(actions=[action1])
                reward += result[0].reward
                done = result[0].step_type == environment.StepType.LAST
                if (331 in available_actions and move and not done):
                    try:
                        result = env.step(actions=[action2])
                        reward += result[0].reward
                        done = result[0].step_type == environment.StepType.LAST
                    except Exception as e:
                        print("e :", e)

                ob = (result[0].observation["screen"]
                      [_PLAYER_RELATIVE:_PLAYER_RELATIVE + 1] == 3).astype(
                          int)  #  (1, 32, 32)
                selected = result[0].observation["screen"][
                    _SELECTED:_SELECTED + 1]  #  (1, 32, 32)
                # extra = np.zeros((1, 32, 32))
                control_groups = result[0].observation["control_groups"]
                army_count = env._obs[0].observation.player_common.army_count
                # extra[0,0,0] = army_count
                # for id, group in enumerate(control_groups):
                #   control_group_id = id
                #   unit_id = group[0]
                #   count = group[1]
                #   #print("control_group_id :", control_group_id, " unit_id :", unit_id, " count :", count)
                #   extra[0,1, control_group_id] = unit_id
                #   extra[0,2, control_group_id] = count
                #ob = np.append(ob, selected, axis=0) #  (2, 32, 32)
                #ob = np.append(ob, extra, axis=0) # (3, 32, 32)

                available_actions = result[0].observation["available_actions"]
                info = result[0].observation["available_actions"]
                if done:
                    result = env.reset()

                    if (len(group_list) == 0
                            or common.check_group_list(env, result)):
                        print("init group list")
                        result, xy_per_marine = common.init(env, result)
                        group_list = common.update_group_list(result)

                    # ob = result[0].observation["screen"]
                    # reward = result[0].reward
                    # done = result[0].step_type == environment.StepType.LAST
                    info = result[0].observation["available_actions"]
                remote.send((ob, reward, done, info, army_count,
                             control_groups, selected, xy_per_marine))
            elif cmd == 'reset':
                result = env.reset()
                reward = 0

                if (len(group_list) == 0
                        or common.check_group_list(env, result)):
                    print("init group list")
                    result, xy_per_marine = common.init(env, result)
                    group_list = common.update_group_list(result)

                reward += result[0].reward
                ob = (result[0].observation["screen"]
                      [_PLAYER_RELATIVE:_PLAYER_RELATIVE + 1] == 3).astype(int)
                selected = result[0].observation["screen"][
                    _SELECTED:_SELECTED + 1]  #  (1, 32, 32)
                # extra = np.zeros((1, 32, 32))
                control_groups = result[0].observation["control_groups"]
                army_count = env._obs[0].observation.player_common.army_count
                # extra[0,0,0] = army_count
                # for id, group in enumerate(control_groups):
                #   control_group_id = id
                #   unit_id = group[0]
                #   count = group[1]
                #   #print("control_group_id :", control_group_id, " unit_id :", unit_id, " count :", count)
                #   extra[0,1, control_group_id] = unit_id
                #   extra[0,2, control_group_id] = count
                # ob = np.append(ob, selected, axis=0) #  (2, 32, 32)
                # ob = np.append(ob, extra, axis=0) # (3, 32, 32)

                done = result[0].step_type == environment.StepType.LAST
                info = result[0].observation["available_actions"]
                available_actions = result[0].observation["available_actions"]
                remote.send((ob, reward, done, info, army_count,
                             control_groups, selected, xy_per_marine))
            elif cmd == 'close':
                remote.close()
                break
            elif cmd == 'get_spaces':
                remote.send((env.action_spec().functions[data], ""))
            elif cmd == "action_spec":
                remote.send((env.action_spec().functions[data]))
            else:
                raise NotImplementedError
Ejemplo n.º 4
0
def worker(remote, map_name, i):

  with sc2_env.SC2Env(
      map_name=map_name,
      step_mul=1,
      screen_size_px=(32,32),
      minimap_size_px=(32,32)
  ) as env:

    while True:
      cmd, data = remote.recv()
      if cmd == 'step':
        func = actions.FUNCTIONS[data[0][0]]
        print("agent(",i," ) action : ", data, " func : ", func)
        result = env.step(actions=data)
        ob = (result[0].observation["screen"][_PLAYER_RELATIVE:_PLAYER_RELATIVE+1] == 3).astype(int) #  (1, 32, 32)
        selected = result[0].observation["screen"][_SELECTED:_SELECTED+1] #  (1, 32, 32)
        # extra = np.zeros((1, 32, 32))
        control_groups = result[0].observation["control_groups"]
        army_count = env._obs[0].observation.player_common.army_count
        # extra[0,0,0] = army_count
        # for id, group in enumerate(control_groups):
        #   control_group_id = id
        #   unit_id = group[0]
        #   count = group[1]
        #   #print("control_group_id :", control_group_id, " unit_id :", unit_id, " count :", count)
        #   extra[0,1, control_group_id] = unit_id
        #   extra[0,2, control_group_id] = count
        #ob = np.append(ob, selected, axis=0) #  (2, 32, 32)
        #ob = np.append(ob, extra, axis=0) # (3, 32, 32)
        reward = result[0].reward
        done = result[0].step_type == environment.StepType.LAST
        info = result[0].observation["available_actions"]
        if done:
          result = env.reset()
          common.init(env, result)
          # ob = result[0].observation["screen"]
          # reward = result[0].reward
          # done = result[0].step_type == environment.StepType.LAST
          info = result[0].observation["available_actions"]
        remote.send((ob, reward, done, info, army_count, control_groups, selected))
      elif cmd == 'reset':
        result = env.reset()
        common.init(env, result)
        ob = (result[0].observation["screen"][_PLAYER_RELATIVE:_PLAYER_RELATIVE+1] == 3).astype(int)
        selected = result[0].observation["screen"][_SELECTED:_SELECTED+1] #  (1, 32, 32)
        # extra = np.zeros((1, 32, 32))
        control_groups = result[0].observation["control_groups"]
        army_count = env._obs[0].observation.player_common.army_count
        # extra[0,0,0] = army_count
        # for id, group in enumerate(control_groups):
        #   control_group_id = id
        #   unit_id = group[0]
        #   count = group[1]
        #   #print("control_group_id :", control_group_id, " unit_id :", unit_id, " count :", count)
        #   extra[0,1, control_group_id] = unit_id
        #   extra[0,2, control_group_id] = count
        # ob = np.append(ob, selected, axis=0) #  (2, 32, 32)
        # ob = np.append(ob, extra, axis=0) # (3, 32, 32)
        reward = result[0].reward
        done = result[0].step_type == environment.StepType.LAST
        info = result[0].observation["available_actions"]
        remote.send((ob, reward, done, info, army_count, control_groups, selected))
      elif cmd == 'close':
        remote.close()
        break
      elif cmd == 'get_spaces':
        remote.send((env.action_spec().functions[data], ""))
      elif cmd == "action_spec":
        remote.send((env.action_spec().functions[data]))
      else:
        raise NotImplementedError
Ejemplo n.º 5
0
  def step(self, obs):
    super(CollectMineralShards, self).step(obs)

    if (len(self.group_list) == 0
        or common.check_group_list(self.env, [obs])):
      print("init group list")
      obs, xy_per_marine = common.init(self.env, [obs])
      obs = obs[0]
      self.group_list = common.update_group_list([obs])

    #print("group_list ", self.group_list)
    player_relative = obs.observation["screen"][_PLAYER_RELATIVE]
    neutral_y, neutral_x = (player_relative == _PLAYER_NEUTRAL).nonzero()
    player_y, player_x = (player_relative == _PLAYER_FRIENDLY).nonzero()
    if not neutral_y.any() or not player_y.any():
      return actions.FunctionCall(_NO_OP, [])

    r = random.randint(0, 1)

    if _MOVE_SCREEN in obs.observation["available_actions"] and r == 0:

      selected = obs.observation["screen"][_SELECTED]
      player_y, player_x = (selected == _PLAYER_FRIENDLY).nonzero()
      if(len(player_x) == 0):
        return actions.FunctionCall(_NO_OP, [])
      
      player = [int(player_x.mean()), int(player_y.mean())]
      points = [player]
      closest, min_dist = None, None
      other_dest = None
      my_dest = None
      if ("0" in self.dest_per_marine and "1" in self.dest_per_marine):
        if (self.group_id == 0):
          my_dest = self.dest_per_marine["0"]
          other_dest = self.dest_per_marine["1"]
        elif (self.group_id == 1):
          other_dest = self.dest_per_marine["0"]
          my_dest = self.dest_per_marine["1"]

      for p in zip(neutral_x, neutral_y):

        if (other_dest):
          dist = numpy.linalg.norm(
            numpy.array(other_dest) - numpy.array(p))
          if (dist < 5):
            #print("continue since partner will take care of it ", p)
            continue

        pp = [p[0], p[1]]
        if (pp not in points):
          points.append(pp)

        dist = numpy.linalg.norm(numpy.array(player) - numpy.array(p))
        if not min_dist or dist < min_dist:
          closest, min_dist = p, dist

      solve_tsp = False
      if (my_dest):
        dist = numpy.linalg.norm(
          numpy.array(player) - numpy.array(my_dest))
        if (dist < 2):
          solve_tsp = True

      if (my_dest is None):
        solve_tsp = True

      if (len(points) < 2):
        solve_tsp = False

      if (solve_tsp):
        # function for printing best found solution when it is found
        from time import clock
        init = clock()

        def report_sol(obj, s=""):
          print("cpu:%g\tobj:%g\ttour:%s" % \
                (clock(), obj, s))

        #print("points: %s" % points)
        n, D = mk_matrix(points, distL2)
        # multi-start local search
        #print("random start local search:", n)
        niter = 50
        tour, z = multistart_localsearch(niter, n, D)

        #print("best found solution (%d iterations): z = %g" % (niter, z))
        #print(tour)

        left, right = None, None
        for idx in tour:
          if (tour[idx] == 0):
            if (idx == len(tour) - 1):
              #print("optimal next : ", tour[0])
              right = points[tour[0]]
              left = points[tour[idx - 1]]
            elif (idx == 0):
              #print("optimal next : ", tour[idx+1])
              right = points[tour[idx + 1]]
              left = points[tour[len(tour) - 1]]
            else:
              #print("optimal next : ", tour[idx+1])
              right = points[tour[idx + 1]]
              left = points[tour[idx - 1]]

        left_d = numpy.linalg.norm(
          numpy.array(player) - numpy.array(left))
        right_d = numpy.linalg.norm(
          numpy.array(player) - numpy.array(right))
        if (right_d > left_d):
          closest = left
        else:
          closest = right

      #print("optimal next :" , closest)
      self.dest_per_marine[str(self.group_id)] = closest
      #print("dest_per_marine", self.dest_per_marine)
      #dest_per_marine {'0': [56, 26], '1': [52, 6]}

      if (closest is None):
        return actions.FunctionCall(_NO_OP, [])

      return actions.FunctionCall(_MOVE_SCREEN, [_NOT_QUEUED, closest])
    elif (len(self.group_list) > 0):
      player_p = []
      for p in zip(player_x, player_y):
        if (p not in player_p):
          player_p.append(p)

      self.group_id = random.randint(0, len(self.group_list) - 1)
      return actions.FunctionCall(
        _SELECT_CONTROL_GROUP,
        [[_CONTROL_GROUP_RECALL], [int(self.group_id)]])
    else:
      return actions.FunctionCall(_NO_OP, [])