예제 #1
0
def main(_):
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    config = get_config(FLAGS) or FLAGS

    if config.env_type == 'simple':
      env = SimpleGymEnvironment(config)
    else:
      env = GymEnvironment(config)

    if FLAGS.cpu:
      config.cnn_format = 'NHWC'

    agent = Agent(config, env, sess)

    if FLAGS.save_weight:
      agent.save_weight_to_pkl()
    if FLAGS.load_weight:
      agent.load_weight_from_pkl(cpu_mode=FLAGS.cpu)

    if FLAGS.is_train:
      agent.train()
    else:
      agent.play()
예제 #2
0
def main(_):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        ACPconfig = ACPConfig(env)

        if not tf.test.is_gpu_available() and FLAGS.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'

        # Becuase of code shittines, these steps should be after each other!
        acpAgent = acp.acp(sess, ACPconfig)
        agentDQN = Agent(config, env, acpAgent, sess)
        acpAgent.setdir(agentDQN.model_dir)

        sess.run(tf.initializers.global_variables())
        # Load both models if exist any checkpoint
        acpAgent.load()
        agentDQN.load()
        if FLAGS.is_train:
            agentDQN.train()
        else:
            raise Exception('agentDQN.play() is Not Implemented')
            agentDQN.play()
예제 #3
0
파일: main.py 프로젝트: sangjin-park/ERL
def main(_):
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True
    #sess = tf.Session(config=config)

    with tf.Session(config=config) as sess:
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available() and FLAGS.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'

        agent = Agent(config, env, sess)
        if FLAGS.is_train:
            agent.train()
        else:
            agent.play()
예제 #4
0
def main(_):
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    config = get_config(FLAGS) or FLAGS

    if config.env_type == 'simple':
      env = SimpleGymEnvironment(config)
    else:
      env = GymEnvironment(config)

    if not tf.test.is_gpu_available() and FLAGS.use_gpu:
      raise Exception("use_gpu flag is true when no GPUs are available")

    if not FLAGS.use_gpu:
      config.cnn_format = 'NHWC'

    agent = Agent(config, env, sess)

    if FLAGS.mode == "train":
      agent.train()
    elif FLAGS.mode == "test":
      agent.play()
    elif FLAGS.mode == "ale":
      agent.play2()
예제 #5
0
파일: main.py 프로젝트: savingtools/AI
def main(_):
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        config = get_config(FLAGS) or FLAGS  # 通过config.py的get_config方法加载配置选项

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available(
        ) and FLAGS.use_gpu:  # 如果能检查到就使用GPU;如果设置了使用GPU但是没有检测到GPU则报错。
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'  # 输入的格式:[batch, in_height, in_width, in_channels]
        # 另一种数据输入格式NCHW:[batch, in_channels, in_height, in_width]

        agent = Agent(config, env, sess)  # 新建DQN的智能体

        if FLAGS.is_train:
            agent.train()
        else:
            agent.play()  # 不进行训练(仅仅演示)
예제 #6
0
def main(_):
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7)

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        K.set_session(sess)
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available() and FLAGS.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'

        # Create a single instance of Agent to be multi-threaded
        agent = Agent(config, env, sess, threading.Lock())

        if FLAGS.is_train:
            init_threads(agent, config)
        else:
            agent.play(env)
예제 #7
0
def main(_):
    with tf.Session() as sess:
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        config.cnn_format = 'NHWC'

        agent = MyAgent(config, env, sess)

        if FLAGS.is_train:
            agent.train()
        else:
            agent.play()
예제 #8
0
def main(_):
  if FLAGS.gpu_fraction == "1/1":
    FLAGS.gpu_fraction = "0.999/1.0"
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
  #Set ratio of usage for GPU or tensorflow would report error

  #config = tf.ConfigProto()
  #config.gpu_options.allow_growth = True
  #with tf.Session(config=config) as sess:

    config = get_config(FLAGS) or FLAGS

    if config.env_type == 'simple':
      env = SimpleGymEnvironment(config)
    else:
      env = GymEnvironment(config)

    if FLAGS.poison:
      config.poison_line = input("input the number of poison line:")




    if not tf.test.is_gpu_available() and FLAGS.use_gpu:
      raise Exception("use_gpu flag is true when no GPUs are available")

    if not FLAGS.use_gpu:
      config.cnn_format = 'NHWC'

    agent = Agent(config, env, sess)

    if FLAGS.is_train:
      if FLAGS.poison:
      	agent.train_poison()
      else:
      	agent.train()
    else:
      if FLAGS.poison:
      	agent.play_poison()
      else:
      	agent.play()
예제 #9
0
파일: main.py 프로젝트: guoyijie/ERL
def main(_):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', help='RNG seed', type=int, default=123)
    parser.add_argument('--test', action="store_true")
    parser.add_argument("--use-gpu", action="store_true")
    parser.add_argument("--mode", help="Bonus mode", default="pixelcnn")
    args = parser.parse_args()

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        config = get_config(args)

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available() and args.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if args.mode == "pixelcnn":
            from dqn.agent import Agent
            agent = Agent(config, env, sess)
        elif args.mode == "autoencoder":
            from dqn.agent_model import Agent
            agent = Agent(config, env, sess)
        elif args.mode == "top-pixelcnn":
            from dqn.agent_top import Agent
            agent = Agent(config, env, sess)
        else:
            raise ValueError("No such mode")

        print("CNN format", config.cnn_format)
        if not args.test:
            print("training ...")
            agent.train()
        else:
            print("testing ...")
            agent.play()
예제 #10
0
def main(_):
  #设置每个进程所占用的GPU内存比例
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: 
    config = get_config(FLAGS) or FLAGS

    if config.env_type == 'simple':
      env = SimpleGymEnvironment(config)
    else:
      env = GymEnvironment(config)

    if not FLAGS.use_gpu:
      config.cnn_format = 'NHWC'

    agent = Agent(config, env, sess)

    if FLAGS.is_train:
      agent.train()
    else:
      agent.play()
예제 #11
0
def main(_):
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(FLAGS.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True, log_device_placement=True)) as sess:
  # with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    config = get_config(FLAGS) or FLAGS

    if config.env_type == 'simple':
      env = SimpleGymEnvironment(config)
    else:
      env = GymEnvironment(config)

    if not FLAGS.use_gpu:
      config.cnn_format = 'NHWC'
    
    with tf.device('/gpu:2'):
        agent = Agent(config, env, sess)

    if FLAGS.is_train:
      agent.train()
    else:
      agent.play()
예제 #12
0
def main(_):
    gpu_options = tf.GPUOptions(allow_growth=True, visible_device_list='0')

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available() and FLAGS.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'

        agent = Agent(config, env, sess)

        if FLAGS.is_train:
            agent.train()
        else:
            agent.play()
def main(_):

  with tf.Session() as sess:
    config = get_config(FLAGS) or FLAGS

    if config.env_type == 'simple':
      env = SimpleGymEnvironment(config)
    else:
      env = GymEnvironment(config)

    if not tf.test.is_gpu_available() and FLAGS.use_gpu:
      raise Exception("use_gpu flag is true when no GPUs are available")

    if not FLAGS.use_gpu:
      config.cnn_format = 'NHWC'

    roms = 'roms/Pong2PlayerVS.bin'
    ale = ALEInterface(roms.encode('utf-8'))
    width = ale.ale_getScreenWidth()
    height = ale.ale_getScreenHeight()
    game_screen = GameScreen()
    ale.ale_resetGame()
    (display_width, display_height) = (width * 2, height * 2)

    pygame.init()
    screen_ale = pygame.display.set_mode((display_width, display_height))
    pygame.display.set_caption("Arcade Learning Environment Random Agent Display")
    pygame.display.flip()

    game_surface = pygame.Surface((width, height), depth=8)
    clock = pygame.time.Clock()

    # Clear screen
    screen_ale.fill((0, 0, 0))

    agent = Agent(config, env, sess, 'A')
    agent2 = Agent2(config, env, sess, 'B')

    if FLAGS.is_train:
      start_epoch = agent.epoch_op.eval()
      start_step = agent.step_op.eval()
      start_time = time.time()

      # Loop for epochs
      for agent.epoch in range(start_epoch, agent.max_epoch):
        agent2.epoch = agent.epoch

        # Initialize information of gameplay
        num_game, agent.update_count, agent2.update_count, ep_rewardA, ep_rewardB = 0, 0, 0, 0., 0.
        total_rewardA, total_rewardB, agent.total_loss, agent2.total_loss, agent.total_q, agent2.total_q = 0., 0., 0., 0., 0., 0.
        max_avg_ep_rewardA, max_avg_ep_rewardB = 0, 0
        ep_rewardsA, ep_rewardsB, actionsA, actionsB = [], [], [], []

        # Get first frame of gameplay
        numpy_surface = np.frombuffer(game_surface.get_buffer(), dtype=np.uint8)
        rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
        del numpy_surface        
        game_screen.paint(rgb)
        pooled_screen = game_screen.grab()
        scaled_pooled_screen = scale_image(pooled_screen)

        # Add first frame of gameplay into both agents' replay history
        for _ in range(agent.history_length):
          agent.history.add(scaled_pooled_screen)
          agent2.history.add(scaled_pooled_screen)

        # Loop for training iterations
        for agent.step in tqdm(range(start_step, agent.max_step), ncols=70, initial=start_step):
          agent2.step = agent.step

          # End of burn in period, start to learn from frames
          if agent.step == agent.learn_start:
            num_game, agent.update_count, agent2.update_count, ep_rewardA, ep_rewardB = 0, 0, 0, 0., 0.
            total_rewardA, total_rewardB, agent.total_loss, agent2.total_loss, agent.total_q, agent2.total_q = 0., 0., 0., 0., 0., 0.
            max_avg_ep_rewardA, max_avg_ep_rewardB = 0, 0
            ep_rewardsA, ep_rewardsB, actionsA, actionsB = [], [], [], []
          
          # 1. predict
          action1 = agent.predict(agent.history.get())
          action2 = agent2.predict(agent2.history.get())

          # 2. act
          ale.ale_act2(action1, action2)
          terminal = ale.ale_isGameOver()
          # End of end epoch, finish up training so that game statistics can be collected without training data being messed up
          if agent.step == agent.max_step - 1:
            terminal = True
          rewardA = ale.ale_getRewardA()
          rewardB = ale.ale_getRewardB()
          
          # Fill buffer of game screen with current frame
          numpy_surface = np.frombuffer(game_surface.get_buffer(), dtype=np.uint8)
          rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
          del numpy_surface        
          game_screen.paint(rgb)
          pooled_screen = game_screen.grab()
          scaled_pooled_screen = scale_image(pooled_screen)
          agent.observe(scaled_pooled_screen, rewardA, action1, terminal)
          agent2.observe(scaled_pooled_screen, rewardB, action2, terminal)

          # Print frame onto display screen
          screen_ale.blit(pygame.transform.scale2x(game_surface), (0, 0))

          # Update the display screen
          pygame.display.flip()

          # Check if current episode ended
          if terminal:
            ale.ale_resetGame()
            terminal = ale.ale_isGameOver()
            rewardA = ale.ale_getRewardA()
            rewardB = ale.ale_getRewardB()
            numpy_surface = np.frombuffer(game_surface.get_buffer(), dtype=np.uint8)

            rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
            del numpy_surface        
            game_screen.paint(rgb)
            pooled_screen = game_screen.grab()
            scaled_pooled_screen = scale_image(pooled_screen)

            # End of an episode
            num_game += 1
            ep_rewardsA.append(ep_rewardA)
            ep_rewardsB.append(ep_rewardB)
            ep_rewardA = 0.
            ep_rewardB = 0.
          else:
            ep_rewardA += rewardA
            ep_rewardB += rewardB

          actionsA.append(action1)
          actionsB.append(action2)
          total_rewardA += rewardA
          total_rewardB += rewardB

          # Do a test to get statistics so far
          if agent.step >= agent.learn_start:
            if agent.step % agent.test_step == agent.test_step - 1:
              avg_rewardA = total_rewardA / agent.test_step
              avg_rewardB = total_rewardB / agent2.test_step
              avg_lossA = agent.total_loss / agent.update_count
              avg_lossB = agent2.total_loss / agent2.update_count
              avg_qA = agent.total_q / agent.update_count
              avg_qB = agent2.total_q / agent2.update_count

              try:
                max_ep_rewardA = np.max(ep_rewardsA)
                min_ep_rewardA = np.min(ep_rewardsA)
                avg_ep_rewardA = np.mean(ep_rewardsA)
                max_ep_rewardB = np.max(ep_rewardsB)
                min_ep_rewardB = np.min(ep_rewardsB)
                avg_ep_rewardB = np.mean(ep_rewardsB)
              except:
                max_ep_rewardA, min_ep_rewardA, avg_ep_rewardA, max_ep_rewardB, min_ep_rewardB, avg_ep_rewardB = 0, 0, 0, 0, 0, 0

              print('\nFor Agent A at Epoch %d: avg_r: %.4f, avg_l: %.6f, avg_q: %3.6f, avg_ep_r: %.4f, max_ep_r: %.4f, min_ep_r: %.4f, # game: %d' \
                  % (agent.epoch, avg_rewardA, avg_lossA, avg_qA, avg_ep_rewardA, max_ep_rewardA, min_ep_rewardA, num_game))
              print('\nFor Agent B at Epoch %d: avg_r: %.4f, avg_l: %.6f, avg_q: %3.6f, avg_ep_r: %.4f, max_ep_r: %.4f, min_ep_r: %.4f, # game: %d' \
                  % (agent2.epoch, avg_rewardB, avg_lossB, avg_qB, avg_ep_rewardB, max_ep_rewardB, min_ep_rewardB, num_game))

              if max_avg_ep_rewardA * 0.9 <= avg_ep_rewardA:
                agent.step_assign_op.eval({agent.step_input: agent.step + 1})
                agent.save_model(agent.step + 1)

                max_avg_ep_rewardA = max(max_avg_ep_rewardA, avg_ep_rewardA)

              if max_avg_ep_rewardB * 0.9 <= avg_ep_rewardB:
                agent2.step_assign_op.eval({agent2.step_input: agent2.step + 1})
                agent2.save_model(agent2.step + 1)

                max_avg_ep_rewardB = max(max_avg_ep_rewardB, avg_ep_rewardB)

              if agent.step > 180:
                agent.inject_summary({
                    'average.reward': avg_rewardA,
                    'average.loss': avg_lossA,
                    'average.q': avg_qA,
                    'episode.max reward': max_ep_rewardA,
                    'episode.min reward': min_ep_rewardA,
                    'episode.avg reward': avg_ep_rewardA,
                    'episode.num of game': num_game,
                    'episode.rewards': ep_rewardsA,
                    'episode.actions': actionsA,
                    'training.learning_rate': agent.learning_rate_op.eval({agent.learning_rate_step: agent.step}),
                  }, agent.step)

              if agent2.step > 180:
                agent2.inject_summary({
                    'average.reward': avg_rewardB,
                    'average.loss': avg_lossB,
                    'average.q': avg_qB,
                    'episode.max reward': max_ep_rewardB,
                    'episode.min reward': min_ep_rewardB,
                    'episode.avg reward': avg_ep_rewardB,
                    'episode.num of game': num_game,
                    'episode.rewards': ep_rewardsB,
                    'episode.actions': actionsB,
                    'training.learning_rate': agent2.learning_rate_op.eval({agent2.learning_rate_step: agent2.step}),
                  }, agent2.step)

              # Reset statistics
              num_game = 0
              total_rewardA, total_rewardB = 0., 0.
              agent.total_loss, agent2.total_loss = 0., 0.
              agent.total_q, agent2.total_q = 0., 0.
              agent.update_count, agent2.update_count = 0, 0
              ep_rewardA, ep_rewardB = 0., 0.
              ep_rewardsA, ep_rewardsB = [], []
              actionsA, actionsB = [], []

        # Play 10 games at the end of epoch to get game statistics
        total_points, paddle_bounce, wall_bounce, serving_time = [], [], [], []
        for _ in range(10):
          cur_total_points, cur_paddle_bounce, cur_wall_bounce, cur_serving_time = 0, 0, 0, 0

          # Restart game
          ale.ale_resetGame()

          # Get first frame of gameplay
          numpy_surface = np.frombuffer(game_surface.get_buffer(), dtype=np.uint8)
          rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
          del numpy_surface        
          game_screen.paint(rgb)
          pooled_screen = game_screen.grab()
          scaled_pooled_screen = scale_image(pooled_screen)

          # Create history for testing purposes
          test_history = History(config)

          # Fill first 4 images with initial screen
          for _ in range(agent.history_length):
            test_history.add(scaled_pooled_screen)

          while not ale.ale_isGameOver():
            # 1. predict
            action1 = agent.predict(agent.history.get())
            action2 = agent2.predict(agent2.history.get())

            # 2. act
            ale.ale_act2(action1, action2)
            terminal = ale.ale_isGameOver()
            rewardA = ale.ale_getRewardA()
            rewardB = ale.ale_getRewardB()

            # Record game statistics of current episode
            cur_total_points = ale.ale_getPoints()
            cur_paddle_bounce = ale.ale_getSideBouncing()
            if ale.ale_getWallBouncing():
              cur_wall_bounce += 1
            if ale.ale_getServing():
              cur_serving_time += 1

            # Fill buffer of game screen with current frame
            numpy_surface = np.frombuffer(game_surface.get_buffer(), dtype=np.uint8)
            rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
            del numpy_surface        
            game_screen.paint(rgb)
            pooled_screen = game_screen.grab()
            scaled_pooled_screen = scale_image(pooled_screen)
            agent.observe(scaled_pooled_screen, rewardA, action1, terminal)
            agent2.observe(scaled_pooled_screen, rewardB, action2, terminal)

            # Print frame onto display screen
            screen_ale.blit(pygame.transform.scale2x(game_surface), (0, 0))

            # Update the display screen
            pygame.display.flip()

          # Append current episode's statistics into list
          total_points.append(cur_total_points)
          paddle_bounce.append(cur_paddle_bounce / cur_total_points)
          if cur_paddle_bounce == 0:
            wall_bounce.append(cur_wall_bounce / (cur_paddle_bounce + 1))
          else:
            wall_bounce.append(cur_wall_bounce / cur_paddle_bounce)
          serving_time.append(cur_serving_time / cur_total_points)

        # Save results of test after current epoch
        cur_paddle_op = agent.paddle_op.eval()
        cur_paddle_op[agent.epoch] = sum(paddle_bounce) / len(paddle_bounce)
        agent.paddle_assign_op.eval({agent.paddle_input: cur_paddle_op})

        cur_wall_op = agent.wall_op.eval()
        cur_wall_op[agent.epoch] = sum(wall_bounce) / len(wall_bounce)
        agent.wall_assign_op.eval({agent.wall_input: cur_wall_op})

        cur_serving_op = agent.serving_op.eval()
        cur_serving_op[agent.epoch] = sum(serving_time) / len(serving_time)
        agent.serving_assign_op.eval({agent.serving_input: cur_serving_op})

        agent.save_model(agent.step + 1)
    else:
      agent.play()
      agent2.play()
예제 #14
0
def main(_):
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--seed', help='RNG seed', type=int, default=123)
    parser.add_argument("--use-gpu", action="store_true")
    parser.add_argument("--mode", help="Bonus mode", default="autoencoder")
    parser.add_argument("--model-dir",
                        help="the path of the model",
                        default="ae_model/model.p")
    parser.add_argument("--img-dir",
                        help="the path to save image",
                        default="imgs/")
    parser.add_argument("--n", help="the number of episodes", default=10)

    args = parser.parse_args()

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)
    config.gpu_options.allow_growth = True

    with tf.Session(config=config) as sess:
        config = get_config(args)

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available() and args.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        # Build the density model
        density_model = AutoEncoder("ae", sess, config)
        loadFromFlat(density_model.get_variables(), args.model_dir)

        na = config.n_action
        last_screen, reward, action, terminal = env.new_random_game()
        last_screen42x42 = imresize(last_screen, (42, 42), order=1)
        pi = RandomPolicy(na)

        if not os.path.exists(args.img_dir):
            os.mkdir(args.img_dir)

        # At first, use random action taker.
        for i in tqdm(range(args.n)):
            ep_steps = 0
            prefix = args.img_dir + "ep%i/" % i
            if not os.path.exists(prefix):
                os.mkdir(prefix)
            prefix = prefix + 'img'
            while True:
                action = pi.action(last_screen)
                screen, reward, terminal = env.act(action)
                screen42x42 = imresize(screen, (42, 42), order=1)

                oh_action = np.zeros(na)
                oh_action[action] = 1

                density_model.memory.add_sample(last_screen42x42, action,
                                                terminal)
                ep_steps += 1

                if ep_steps >= 4:
                    pscreen42x42 = density_model.predict().reshape(42, 42)
                    img = concat2imgs(screen42x42, pscreen42x42)
                    saveimg(img, ep_steps, prefix)

                # Update
                last_screen42x42 = screen42x42
                last_screen = screen

                if terminal:
                    last_screen, reward, action, terminal = env.new_random_game(
                    )
                    last_screen42x42 = imresize(last_screen, (42, 42), order=1)
                    break
예제 #15
0
def main(_):

    with tf.Session() as sess:
        config = get_config(FLAGS) or FLAGS

        if config.env_type == 'simple':
            env = SimpleGymEnvironment(config)
        else:
            env = GymEnvironment(config)

        if not tf.test.is_gpu_available() and FLAGS.use_gpu:
            raise Exception("use_gpu flag is true when no GPUs are available")

        if not FLAGS.use_gpu:
            config.cnn_format = 'NHWC'

        roms = 'roms/Pong2PlayerVS.bin'
        ale = ALEInterface(roms.encode('utf-8'))
        width = ale.ale_getScreenWidth()
        height = ale.ale_getScreenHeight()
        game_screen = GameScreen()
        ale.ale_resetGame()
        (display_width, display_height) = (width * 2, height * 2)

        pygame.init()
        screen_ale = pygame.display.set_mode((display_width, display_height))
        pygame.display.set_caption(
            "Arcade Learning Environment Random Agent Display")
        pygame.display.flip()

        game_surface = pygame.Surface((width, height), depth=8)
        clock = pygame.time.Clock()

        # Clear screen
        screen_ale.fill((0, 0, 0))
        agent = Agent(config, env, sess)

        if FLAGS.is_train:
            start_step = agent.step_op.eval()
            start_time = time.time()

            num_game, agent.update_count, ep_reward = 0, 0, 0.
            total_reward, agent.total_loss, agent.total_q = 0., 0., 0.
            max_avg_ep_reward = 0
            ep_rewards, actions = [], []

            numpy_surface = np.frombuffer(game_surface.get_buffer(),
                                          dtype=np.uint8)
            rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
            del numpy_surface
            game_screen.paint(rgb)
            pooled_screen = game_screen.grab()
            scaled_pooled_screen = scale_image(pooled_screen)

            for _ in range(agent.history_length):
                agent.history.add(scaled_pooled_screen)

            for agent.step in tqdm(range(start_step, agent.max_step),
                                   ncols=70,
                                   initial=start_step):

                if agent.step == agent.learn_start:
                    num_game, agent.update_count, ep_reward = 0, 0, 0.
                    total_reward, agent.total_loss, agent.total_q = 0., 0., 0.
                    ep_rewards, actions = [], []

                # 1. predict
                action = agent.predict(agent.history.get())
                # 2. act
                ale.ale_act2(action, np.random.choice([20, 21, 23, 24]))
                terminal = ale.ale_isGameOver()
                reward = ale.ale_getRewardA()

                # screen, reward, terminal = agent.env.act(action, is_training=True)
                # 3. observe
                # Both agents perform random actions
                # Agent A : [NOOP, FIRE, RIGHT, LEFT]
                # Agent B : [NOOP, FIRE, RIGHT, LEFT]

                # Fill buffer of game screen with current frame
                numpy_surface = np.frombuffer(game_surface.get_buffer(),
                                              dtype=np.uint8)
                rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
                del numpy_surface
                game_screen.paint(rgb)
                pooled_screen = game_screen.grab()
                scaled_pooled_screen = scale_image(pooled_screen)
                agent.observe(scaled_pooled_screen, reward, action, terminal)

                # Print frame onto display screen
                screen_ale.blit(pygame.transform.scale2x(game_surface), (0, 0))

                #Update the display screen
                pygame.display.flip()

                if terminal:
                    ale.ale_resetGame()
                    terminal = ale.ale_isGameOver()
                    reward = ale.ale_getRewardA()
                    numpy_surface = np.frombuffer(game_surface.get_buffer(),
                                                  dtype=np.uint8)

                    rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
                    del numpy_surface
                    game_screen.paint(rgb)
                    pooled_screen = game_screen.grab()
                    scaled_pooled_screen = scale_image(pooled_screen)

                    num_game += 1
                    ep_rewards.append(ep_reward)
                    ep_reward = 0.
                else:
                    ep_reward += reward

                actions.append(action)
                total_reward += reward

                if agent.step >= agent.learn_start:
                    if agent.step % agent.test_step == agent.test_step - 1:
                        avg_reward = total_reward / agent.test_step
                        avg_loss = agent.total_loss / agent.update_count
                        avg_q = agent.total_q / agent.update_count

                        try:
                            max_ep_reward = np.max(ep_rewards)
                            min_ep_reward = np.min(ep_rewards)
                            avg_ep_reward = np.mean(ep_rewards)
                        except:
                            max_ep_reward, min_ep_reward, avg_ep_reward = 0, 0, 0

                        print('\navg_r: %.4f, avg_l: %.6f, avg_q: %3.6f, avg_ep_r: %.4f, max_ep_r: %.4f, min_ep_r: %.4f, # game: %d' \
                            % (avg_reward, avg_loss, avg_q, avg_ep_reward, max_ep_reward, min_ep_reward, num_game))

                        if max_avg_ep_reward * 0.9 <= avg_ep_reward:
                            agent.step_assign_op.eval(
                                {agent.step_input: agent.step + 1})
                            agent.save_model(agent.step + 1)

                            max_avg_ep_reward = max(max_avg_ep_reward,
                                                    avg_ep_reward)

                        if agent.step > 180:
                            agent.inject_summary(
                                {
                                    'average.reward':
                                    avg_reward,
                                    'average.loss':
                                    avg_loss,
                                    'average.q':
                                    avg_q,
                                    'episode.max reward':
                                    max_ep_reward,
                                    'episode.min reward':
                                    min_ep_reward,
                                    'episode.avg reward':
                                    avg_ep_reward,
                                    'episode.num of game':
                                    num_game,
                                    'episode.rewards':
                                    ep_rewards,
                                    'episode.actions':
                                    actions,
                                    'training.learning_rate':
                                    agent.learning_rate_op.eval(
                                        {agent.learning_rate_step: agent.step
                                         }),
                                }, agent.step)

                        num_game = 0
                        total_reward = 0.
                        agent.total_loss = 0.
                        agent.total_q = 0.
                        agent.update_count = 0
                        ep_reward = 0.
                        ep_rewards = []
                        actions = []
        else:
            while not ale.ale_isGameOver():

                # Fill buffer of game screen with current frame
                numpy_surface = np.frombuffer(game_surface.get_buffer(),
                                              dtype=np.uint8)
                rgb = getRgbFromPalette(ale, game_surface, numpy_surface)
                del numpy_surface
                game_screen.paint(rgb)
                pooled_screen = game_screen.grab()
                scaled_pooled_screen = scale_image(pooled_screen)

                ale.ale_act2(agent.predict(pooled_screen),
                             np.random.choice([20, 21, 23, 24]))

                print(ale.ale_getRewardA())
                # Print frame onto display screen
                screen.blit(pygame.transform.scale2x(game_surface), (0, 0))

                # Update the display screen
                pygame.display.flip()

                # delay to 60fps
                clock.tick(60.)