Ejemplo n.º 1
0
    def __init__(self, simulation_mode=False):
        """:cvar
        """

        env = get_environment("blobwar")()
        env.seed(10)
        self.images = {1: "blob_blue", 2: "blob_orange"}
        self.images_selected = {
            1: "blob_blue_selected",
            2: "blob_orange_selected"
        }
        self.images_candidates = {
            1: "blob_blue_candidate",
            2: "blob_orange_candidate"
        }
        self.env = env
        self.human_player = Human()
        try:

            self.ai_player_1 = PPO(self.env, size=self.env.core.board.shape[0])
        except Exception as e:
            print("PPO not yet trained for : ", self.env.core.board.shape[0])
            self.ai_player_1 = GreedyPlayer()

        self.ai_player_2 = GreedyPlayer()
        self.player1 = self.player2 = None

        if simulation_mode:
            self.state = GameState.WAITING_FIRST_CLICK
            self.player1 = self.ai_player_1
            self.player2 = self.player1
            return

        self.game = Board(self.env.core.board.shape[0],
                          self.env.core.board.shape[1])

        self.game.cell_size = 70

        self.game.cell_spacing = 1
        self.game.fill(None)

        self.game.margin_color = self.game.grid_color = "wheat4"
        self.game.cell_color = "white"
        self.game.title = "Blobwar"
        self.game.create_output(font_size=11)
        self.state = GameState.GAME_OVER

        self.move_start_case = None
        self.move_candidates_case = []
Ejemplo n.º 2
0
def main(args):

    logger.configure(config.LOGDIR)

    if args.debug:
        logger.set_level(config.DEBUG)
    else:
        logger.set_level(config.INFO)

    # make environment
    env = get_environment(args.env_name)(verbose=args.verbose,
                                         manual=args.manual)
    env.seed(args.seed)

    total_rewards = {}

    first_time = True

    if args.recommend:
        ppo_model = load_model(env, 'best_model.zip')
        ppo_agent = Agent('best_model', ppo_model)
    else:
        ppo_agent = None

    agents = []

    # load the agents
    if len(args.agents) != env.n_players:
        raise Exception(
            f'{len(args.agents)} players specified but this is a {env.n_players} player game!'
        )

    for i, agent in enumerate(args.agents):
        if agent == 'human':
            agent_obj = Agent('human')
        elif agent == 'rules':
            agent_obj = Agent('rules')
        elif agent == 'json':
            # Start mq server
            context = zmq.Context()
            socket = context.socket(zmq.REP)
            socket.bind("tcp://*:5555")
            logger.debug("zaq server start at 5555")
            agent_obj = Agent('json')
        elif agent == 'base':
            base_model = load_model(env, 'base.zip')
            agent_obj = Agent('base', base_model)
        else:
            ppo_model = load_model(env, f'{agent}.zip')
            agent_obj = Agent(agent, ppo_model)
        agents.append(agent_obj)
        total_rewards[agent_obj.id] = 0

    # play games
    logger.info(f'\nPlaying {args.games} games...')
    for game in range(args.games):
        players = agents[:]

        if args.randomise_players:
            random.shuffle(players)

        obs = env.reset()
        done = False

        for i, p in enumerate(players):
            logger.debug(f'Player {i+1} = {p.name}')

        while not done:

            current_player = players[env.current_player_num]
            env.render()
            logger.debug(f'\nCurrent player name: {current_player.name}')

            if args.recommend and current_player.name in [
                    'human', 'rules', 'json'
            ]:
                # show recommendation from last loaded model
                logger.debug(f'\nRecommendation by {ppo_agent.name}:')
                action = ppo_agent.choose_action(env,
                                                 choose_best_action=True,
                                                 mask_invalid_actions=True)

            if current_player.name == 'human':
                action = input('\nPlease choose an action: ')
                try:
                    # for int actions
                    action = int(action)
                except:
                    # for MulitDiscrete action input as list TODO
                    action = eval(action)

            if current_player.name == 'json':
                if (not first_time):
                    game_state = {
                        "legal_action":
                        [i for i, o in enumerate(env.legal_actions) if o != 0],
                        "tableCard":
                        env.tableCard.id
                    }

                    socket.send_json(game_state)

                action = socket.recv_json()
                first_time = False
                logger.debug(f'\nReceived {action}')

                #  action = input('\n JSON!!! Please choose an action: ')
                try:
                    # for int actions
                    action = int(action)
                except:
                    # for MulitDiscrete action input as list TODO
                    action = eval(action)
            elif current_player.name == 'rules':
                logger.debug(f'\n{current_player.name} model choices')
                action = current_player.choose_action(
                    env, choose_best_action=False, mask_invalid_actions=True)
            else:
                logger.debug(f'\n{current_player.name} model choices')
                action = current_player.choose_action(
                    env,
                    choose_best_action=args.best,
                    mask_invalid_actions=True)

            obs, reward, done, _ = env.step(action)

            for r, player in zip(reward, players):
                total_rewards[player.id] += r
                player.points += r

            if args.cont:
                input('Press any key to continue')

        env.render()

        logger.info(f"Played {game + 1} games: {total_rewards}")

        if args.write_results:
            write_results(players, game, args.games, env.turns_taken)

        for p in players:
            p.points = 0

    env.close()
Ejemplo n.º 3
0
def main(args):

    rank = MPI.COMM_WORLD.Get_rank()

    model_dir = os.path.join(config.MODELDIR, args.env_name)

    if rank == 0:
        try:
            os.makedirs(model_dir)
        except:
            pass
        if args.reset:
            reset_files(model_dir)
        logger.configure(config.LOGDIR)
    else:
        logger.configure(format_strs=[])

    if args.debug:
        logger.set_level(config.DEBUG)
    else:
        time.sleep(5)
        logger.set_level(config.INFO)

    workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)

    logger.info('\nSetting up the selfplay training environment opponents...')
    base_env = get_environment(args.env_name)
    env = selfplay_wrapper(base_env)(opponent_type=args.opponent_type,
                                     verbose=args.verbose)
    env.seed(workerseed)

    CustomPolicy = get_network_arch(args.env_name)

    params = {
        'gamma': args.gamma,
        'timesteps_per_actorbatch': args.timesteps_per_actorbatch,
        'clip_param': args.clip_param,
        'entcoeff': args.entcoeff,
        'optim_epochs': args.optim_epochs,
        'optim_stepsize': args.optim_stepsize,
        'optim_batchsize': args.optim_batchsize,
        'lam': args.lam,
        'adam_epsilon': args.adam_epsilon,
        'schedule': 'linear',
        'verbose': 1,
        'tensorboard_log': config.LOGDIR
    }

    time.sleep(
        5
    )  # allow time for the base model to be saved out when the environment is created

    if args.reset or not os.path.exists(
            os.path.join(model_dir, 'best_model.zip')):
        logger.info('\nLoading the base PPO agent to train...')
        model = PPO1.load(os.path.join(model_dir, 'base.zip'), env, **params)
    else:
        logger.info(
            '\nLoading the best_model.zip PPO agent to continue training...')
        model = PPO1.load(os.path.join(model_dir, 'best_model.zip'), env,
                          **params)

    #Callbacks
    logger.info(
        '\nSetting up the selfplay evaluation environment opponents...')
    callback_args = {
        'eval_env':
        selfplay_wrapper(base_env)(opponent_type=args.opponent_type,
                                   verbose=args.verbose),
        'best_model_save_path':
        config.TMPMODELDIR,
        'log_path':
        config.LOGDIR,
        'eval_freq':
        args.eval_freq,
        'n_eval_episodes':
        args.n_eval_episodes,
        'deterministic':
        False,
        'render':
        True,
        'verbose':
        0
    }

    if args.rules:
        logger.info(
            '\nSetting up the evaluation environment against the rules-based agent...'
        )
        # Evaluate against a 'rules' agent as well
        eval_actual_callback = EvalCallback(
            eval_env=selfplay_wrapper(base_env)(opponent_type='rules',
                                                verbose=args.verbose),
            eval_freq=1,
            n_eval_episodes=args.n_eval_episodes,
            deterministic=args.best,
            render=True,
            verbose=0)
        callback_args['callback_on_new_best'] = eval_actual_callback

    # Evaluate the agent against previous versions
    eval_callback = SelfPlayCallback(args.opponent_type, args.threshold,
                                     args.env_name, **callback_args)

    logger.info('\nSetup complete - commencing learning...\n')

    model.learn(total_timesteps=int(1e9),
                callback=[eval_callback],
                reset_num_timesteps=False,
                tb_log_name="tb")

    env.close()
    del env
Ejemplo n.º 4
0
def main(args):

  logger.configure(config.LOGDIR)

  if args.debug:
    logger.set_level(config.DEBUG)
  else:
    logger.set_level(config.INFO)
    
  #make environment
  env = get_environment(args.env_name)(verbose = args.verbose, manual = args.manual)
  env.seed(args.seed)






  total_rewards = {}

  if args.recommend:
    ppo_model = load_model(env, 'best_model.zip')
    ppo_agent = Agent('best_model', ppo_model)
  else:
    ppo_agent = None


  agents = []

  #load the agents
  if len(args.agents) != env.n_players:
    raise Exception(f'{len(args.agents)} players specified but this is a {env.n_players} player game!')


  for i, agent in enumerate(args.agents):
    if agent == 'human':
      agent_obj = Agent('human')
    elif agent== 'greedy':
      agent_obj = Agent('greedy')


    elif agent == 'rules':
      agent_obj = Agent('rules')
    elif agent == 'base':
      base_model = load_model(env, 'base.zip')
      agent_obj = Agent('base', base_model)   
    else:
      ppo_model = load_model(env, f'{agent}.zip')
      agent_obj = Agent(agent, ppo_model)
    agents.append(agent_obj)
    total_rewards[agent_obj.id] = 0

  if args.env_name == "blobwar":
    human_blobwar = Human()

  #play games
  logger.info(f'\nPlaying {args.games} games...')
  for game in range(args.games):
    players = agents[:]

    if args.randomise_players:
      random.shuffle(players)

    obs = env.reset()
    done = False
    
    for i, p in enumerate(players):
      logger.debug(f'Player {i+1} = {p.name}')

    while not done:

      current_player = players[env.current_player_num]
      env.render()
      logger.debug(f'Current player name: {current_player.name}')

      if args.recommend and current_player.name in ['human', 'rules']:
        # show recommendation from last loaded model
        logger.debug(f'\nRecommendation by {ppo_agent.name}:')
        action = ppo_agent.choose_action(env, choose_best_action = True, mask_invalid_actions = True)

      if current_player.name == 'human':
        if args.env_name == "blobwar":

          move= human_blobwar.compute_next_move(env.core)
          action=env.encode_action(move)
        else:
          action = input('\nPlease choose an action: ')


        try:
          action = int(action)
        except:
          # for MulitDiscrete action input as list TODO
          action = eval(action)
      elif current_player.name == 'rules':
        logger.debug(f'\n{current_player.name} model choices')
        action = current_player.choose_action(env, choose_best_action = False, mask_invalid_actions = True)
      else:
        logger.debug(f'\n{current_player.name} model choices')
        action = current_player.choose_action(env, choose_best_action = args.best, mask_invalid_actions = True)

      obs, reward, done, _ = env.step(action)

      for r, player in zip(reward, players):
        total_rewards[player.id] += r
        player.points += r

      if args.cont:
        input('Press any key to continue')
    
    env.render()

    logger.info(f"Played {game + 1} games: {total_rewards}")

    if args.write_results:
      write_results(players, game, args.games, env.turns_taken)

    for p in players:
      p.points = 0

  env.close()