Esempio n. 1
0
def run_thread(agent_cls, map_name, visualize):
    with sc2_env.SC2Env(map_name=map_name,
                        step_mul=FLAGS.step_mul,
                        game_steps_per_episode=FLAGS.game_steps_per_episode,
                        feature_screen_size=FLAGS.screen_resolution,
                        feature_minimap_size=FLAGS.minimap_resolution,
                        visualize=visualize,
                        use_feature_units=True) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)
        agent = agent_cls()

        agent_name = FLAGS.agent_file

        # set the path to save the models and graphs
        path = 'models/' + agent_name

        # restore the model only if u have the previously trained a model
        #agent.dqn.load_model(path)

        # run the steps
        run_loop.run_loop([agent], env, FLAGS.max_agent_steps)

        # save the model
        agent.dqn.save_model(path, 1)

        # plot cost and reward
        agent.dqn.plot_cost(path, save=True)
        agent.plot_reward(path, save=True)
        agent.plot_player_hp(path, save=True)
        agent.plot_enemy_hp(path, save=True)

        if FLAGS.save_replay:
            env.save_replay(agent_cls.__name__)
Esempio n. 2
0
def run_thread(agent_cls, map_name, visualize):
    with sc2_env.SC2Env(map_name=map_name,
                        step_mul=FLAGS.step_mul,
                        game_steps_per_episode=FLAGS.game_steps_per_episode,
                        feature_screen_size=FLAGS.screen_resolution,
                        feature_minimap_size=FLAGS.minimap_resolution,
                        visualize=visualize,
                        use_feature_units=True) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)
        agent = agent_cls()

        # set the path to save the models and graphs
        path1 = 'models/'
        path2 = 'graphs/'
        # restore the model only if u have the previously trained a model
        if LOAD_MODEL:
            agent.ddpg.load_model(path1)

        # run the steps
        run_loop.run_loop([agent], env, FLAGS.max_agent_steps)

        # save the model
        if SAVE_MODEL:
            agent.ddpg.save_model(path1, 1)

        # plot cost and reward
        agent.ddpg.plot_cost(path2, save=SAVE_PIC)
        agent.ddpg.plot_reward(path2, save=SAVE_PIC)
        agent.plot_hp(path2, save=SAVE_PIC)

        if FLAGS.save_replay:
            env.save_replay(agent_cls.__name__)
Esempio n. 3
0
def evaluate_k_episodes_and_avg(agent, env, k=5, write_results=False):
    # Evaluate multiple episodes and compute scores
    agent.training = False
    avg_score, max_score, min_score = 0, -1, 100
    print("Evaluating...")
    for epi in range(k):
        print("Running episode {}/{}.".format(epi, k))
        for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
            if not is_done:
                continue
            obs = recorder[0].observation
            score = obs["score_cumulative"].score
            avg_score += score
            max_score = max(max_score, score)
            min_score = min(min_score, score)
            break

    avg_score /= k
    print("Max/Min/Avg score: {} / {} / {}".format(max_score, min_score,
                                                   avg_score))
    agent.training = True

    # write results to score_log file for visualization
    if write_results:
        log_fn = SCORE_LOG + '/log.dat'
        with open(log_fn, 'a') as fout:
            fout.write('\t'.join(
                map(str, [COUNTER, max_score, min_score, avg_score])) + '\n')

    return avg_score
Esempio n. 4
0
def run_thread(agent_classes, players, map_name, visualize):
    """Run one thread worth of the environment with agents."""
    with sc2_env.SC2Env(
            map_name=map_name,
            players=players,
            agent_interface_format=sc2_env.parse_agent_interface_format(
                feature_screen=FLAGS.feature_screen_size,
                feature_minimap=FLAGS.feature_minimap_size,
                rgb_screen=FLAGS.rgb_screen_size,
                rgb_minimap=FLAGS.rgb_minimap_size,
                action_space=FLAGS.action_space,
                use_feature_units=FLAGS.use_feature_units),
            step_mul=FLAGS.step_mul,
            game_steps_per_episode=FLAGS.game_steps_per_episode,
            visualize=visualize) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)
        agents = [agent_cls() for agent_cls in agent_classes]
        run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes,
                 FLAGS.feature_screen_size, FLAGS.save_replay)
        if FLAGS.save_replay:
            env.save_replay(agent_classes[0].__name__)
Esempio n. 5
0
def run_thread(agent_classes, players, map_name, visualize):
  """Run one thread worth of the environment with agents."""
  SNAPSHOT = FLAGS.snapshot_path+FLAGS.map+'/'+FLAGS.net
  LOG = FLAGS.log_path+FLAGS.map+'/'+FLAGS.net
  if not os.path.exists(LOG):
    os.makedirs(LOG)
  with sc2_env.SC2Env(
      map_name=map_name,
      players=players,
      agent_interface_format=sc2_env.parse_agent_interface_format(
          feature_screen=FLAGS.feature_screen_size,
          feature_minimap=FLAGS.feature_minimap_size,
          rgb_screen=FLAGS.rgb_screen_size,
          rgb_minimap=FLAGS.rgb_minimap_size,
          action_space=FLAGS.action_space,
          use_feature_units=FLAGS.use_feature_units),
      step_mul=FLAGS.step_mul,
      game_steps_per_episode=FLAGS.game_steps_per_episode,
      disable_fog=FLAGS.disable_fog,
      visualize=False) as env:
    
    env = available_actions_printer.AvailableActionsPrinter(env)
    # wouldnt work for agent vs bot
    agents = [agent_cls(int(FLAGS.feature_minimap_size.x),int(FLAGS.feature_screen_size.x),LOG) for agent_cls in agent_classes]
    # run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes)
    replay_buffer = []
    for recorder, is_done in run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes):
      if FLAGS.training:
        
        replay_buffer.append(recorder)
        if is_done:
          counter = 0
          # with LOCK:
          global COUNTER
          COUNTER += 1
          counter = COUNTER
          # Learning rate schedule
          learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
          # print(replay_buffer)
          agents[0].update(replay_buffer, FLAGS.discount, learning_rate, counter)
          if counter % FLAGS.snapshot_step == 1:
            agents[0].save_model()
          replay_buffer = []
          # if counter % FLAGS.snapshot_step == 1:
          #   agents[0].save_model(SNAPSHOT, counter)
          if counter >= FLAGS.max_steps:
            break
      elif is_done:
        obs = recorder[-1].observation
        score = obs["score_cumulative"][0]
        print('Your score is '+str(score)+'!')
    if FLAGS.save_replay:
      env.save_replay(agent_classes[0].__name__)
Esempio n. 6
0
def run_thread(agent, map_name, agent_id=0):
    ''' set up and run sc2_env loop '''
    try:
        while True:
            with sc2_env.SC2Env(
                    map_name=FLAGS.map_name,
                    step_mul=FLAGS.step_mul,
                    visualize=FLAGS.render,
                    players=PLAYERS,
                    agent_interface_format=features.AgentInterfaceFormat(
                        feature_dimensions=features.Dimensions(
                            screen=FLAGS.screen_res,
                            minimap=FLAGS.minimap_res),
                        use_feature_units=True),
                    game_steps_per_episode=0) as env:
                env = available_actions_printer.AvailableActionsPrinter(env)

                # Only for a single player!
                # snapshot_dir = SNAPSHOT+str(id)  # if i need later
                replay_buffer = []
                for recorder, is_done in run_loop([agent], env,
                                                  MAX_AGENT_STEPS):
                    if FLAGS.training:
                        replay_buffer.append(recorder)
                        if is_done:
                            counter = 0
                            with LOCK:
                                global COUNTER
                                COUNTER += 1
                                counter = COUNTER
                            # Learning rate schedule
                            learning_rate = FLAGS.learning_rate * (
                                1 - 0.9 * counter / FLAGS.max_steps)
                            agent.update(replay_buffer, FLAGS.gamma,
                                         learning_rate, counter)
                            replay_buffer = []
                            if counter % FLAGS.snapshot_step == 1:
                                agent.save_model(SNAPSHOT, counter)
                            if counter >= FLAGS.max_steps:
                                break
                    elif is_done:
                        obs = recorder[-1].observation
                        score = obs["score_cumulative"][0]
                        print('Your score is ' + str(score) +
                              '!')  # does this ever print?
                if FLAGS.save_replay:
                    env.save_replay(agent.name)

    except KeyboardInterrupt:
        pass
Esempio n. 7
0
def run_thread(agent_cls, map_name, visualize):
    with sc2_env.SC2Env(map_name=map_name,
                        step_mul=FLAGS.step_mul,
                        game_steps_per_episode=FLAGS.game_steps_per_episode,
                        feature_screen_size=FLAGS.screen_resolution,
                        feature_minimap_size=FLAGS.minimap_resolution,
                        visualize=visualize,
                        use_feature_units=True) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)
        agent = agent_cls()

        agent_name = FLAGS.agent_file

        # set the path to save the models and graphs
        path = 'graphs/'

        # run the steps
        run_loop.run_loop([agent], env, FLAGS.max_agent_steps)

        agent.plot_hp(path, save=SAVE_PIC)

        if FLAGS.save_replay:
            env.save_replay(agent_cls.__name__)
Esempio n. 8
0
def run_thread(agent, map_name, visualize):
    with sc2_env.SC2Env(
            map_name=map_name,
            players=
        [
            sc2_env.Agent(sc2_env.Race.terran),
            #sc2_env.Agent(sc2_env.Race.zerg)
            #sc2_env.Bot(race=sc2_env.Race.zerg, difficulty=sc2_env.Difficulty.very_easy)
        ],
            agent_interface_format=features.AgentInterfaceFormat(
                feature_dimensions=features.Dimensions(screen=64, minimap=64),
                use_feature_units=True),
            step_mul=FLAGS.step_mul,
            visualize=False) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)

        # Only for a single player!
        agent.set_action_spec(env.action_spec())
        replay_buffer = []
        #zerg_agent = ZergAgent()
        for recorder, is_done in run_loop([
                agent,
        ], env, MAX_AGENT_STEPS):
            if FLAGS.training:
                replay_buffer.append(recorder)
                if is_done:
                    counter = 0
                    with LOCK:
                        global COUNTER
                        COUNTER += 1
                        counter = COUNTER
                    # Learning rate schedule
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * counter / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.discount, learning_rate,
                                 counter)
                    replay_buffer = []
                    if counter % FLAGS.snapshot_step == 1:
                        agent.save_model(SNAPSHOT, counter)
                        print('saving model {}'.format(counter))
                    if counter >= FLAGS.max_steps:
                        break
            elif is_done:
                obs = recorder[-1].observation
                score = obs["score_cumulative"][0]
                print('Your score is ' + str(score) + '!')
        if FLAGS.save_replay:
            env.save_replay(agent.name)
Esempio n. 9
0
def run_thread(agent, map_name, visualize):
    players = []
    agent_module, agent_name = FLAGS.agent.rsplit(".", 1)
    players.append(
        sc2_env.Agent(sc2_env.Race[FLAGS.agent_race], FLAGS.agent_name
                      or agent_name))

    with sc2_env.SC2Env(
            map_name=map_name,
            players=players,
            step_mul=FLAGS.step_mul,
            agent_interface_format=sc2_env.parse_agent_interface_format(
                feature_screen=FLAGS.feature_minimap_size,
                feature_minimap=FLAGS.feature_minimap_size,
                rgb_screen=FLAGS.rgb_screen_size,
                rgb_minimap=FLAGS.rgb_minimap_size,
                action_space=FLAGS.action_space,
                use_feature_units=FLAGS.use_feature_units),
            visualize=visualize) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)

        # Only for a single player!
        replay_buffer = []
        for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
            if FLAGS.training:
                replay_buffer.append(recorder)
                if is_done:
                    counter = 0
                    with LOCK:
                        global COUNTER
                        COUNTER += 1
                        counter = COUNTER
                    # Learning rate schedule
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * counter / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.discount, learning_rate,
                                 counter)
                    replay_buffer = []
                    if counter % FLAGS.snapshot_step == 1:
                        agent.save_model(SNAPSHOT, counter)
                    if counter >= FLAGS.max_steps:
                        break
            elif is_done:
                obs = recorder[-1].observation
                score = obs["score_cumulative"][0]
                print('Your score is ' + str(score) + '!')
        if FLAGS.save_replay:
            env.save_replay(agent.name)
Esempio n. 10
0
def run_thread(agent, map_name, visualize):
    with sc2_env.SC2Env(map_name=map_name,
                        agent_race=FLAGS.agent_race,
                        bot_race=FLAGS.bot_race,
                        difficulty=FLAGS.difficulty,
                        step_mul=FLAGS.step_mul,
                        screen_size_px=(FLAGS.screen_resolution,
                                        FLAGS.screen_resolution),
                        minimap_size_px=(FLAGS.minimap_resolution,
                                         FLAGS.minimap_resolution),
                        visualize=visualize) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)

        # Only for a single player!
        replay_buffer = []
        for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
            if FLAGS.training:
                replay_buffer.append(recorder)
                if is_done:
                    counter = 0
                    with LOCK:
                        global COUNTER
                        COUNTER += 1
                        counter = COUNTER
                    # Learning rate schedule
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * counter / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.discount, learning_rate,
                                 counter)
                    replay_buffer = []
                    if counter % FLAGS.snapshot_step == 1:
                        agent.save_model(SNAPSHOT, counter)
                    if counter >= FLAGS.max_steps:
                        break
                    if counter % 5 == 0:
                        obs = recorder[-1].observation
                        score = obs["score_cumulative"][0]
                        f = open('scorelog', 'a')
                        f.write(str(counter) + ' ' + str(score) + '\n')
                        f.close()

            elif is_done:
                obs = recorder[-1].observation
                score = obs["score_cumulative"][0]
                print('Your score is ' + str(score) + '!')
        if FLAGS.save_replay:
            env.save_replay(agent.name)
Esempio n. 11
0
def run_thread(agent, map_name, visualize):
  with sc2_env.SC2Env(
    map_name=map_name,
    agent_race=FLAGS.agent_race,
    bot_race=FLAGS.bot_race,
    difficulty=FLAGS.difficulty,
    step_mul=FLAGS.step_mul,
    screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution),
    minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution),
    visualize=visualize) as env:
    env = available_actions_printer.AvailableActionsPrinter(env)
    #TODO don't discard replay_buffer, sample (20) from (2000)
    pc_buffer = deque()
    if FLAGS.training:
      for recorder, is_done in random_run_loop( env, REPLAY_BUFFER_SIZE)
        pc_buffer.append(recorder)
        if is_done:
          break
        pc_buffer[-1][3][0].last() = True
    # Only for a single player!
    env.reset()
    replay_buffer = []
    for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
        pc_buffer.append(recorder)
        pc_buffer. popleft()

        replay_buffer.append(recorder)
        if is_done:
          counter = 0
          with LOCK:
            global COUNTER
            COUNTER += 1
            counter = COUNTER
          # Learning rate schedule
          learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
          agent.update(replay_buffer, FLAGS.discount, pc_buffer, learning_rate, counter)
          replay_buffer = []
          if counter % FLAGS.snapshot_step == 1:
            agent.save_model(SNAPSHOT, counter)
          if counter >= FLAGS.max_steps:
            break
      elif is_done:
        obs = recorder[-1].observation
        score = obs["score_cumulative"][0]
        print('Your score is '+str(score)+'!')
    if FLAGS.save_replay:
      env.save_replay(agent.name)
Esempio n. 12
0
def run_thread(agent, players, map_name, visualize):
    """Run one thread worth of the environment with agents."""
    with sc2_env.SC2Env(
            map_name=map_name,
            players=players,
            agent_interface_format=sc2_env.parse_agent_interface_format(
                feature_screen=FLAGS.feature_screen_size,
                feature_minimap=FLAGS.feature_minimap_size,
                rgb_screen=FLAGS.rgb_screen_size,
                rgb_minimap=FLAGS.rgb_minimap_size,
                action_space=None,
                use_feature_units=FLAGS.use_feature_units,
                use_raw_units=FLAGS.use_raw_units),
            step_mul=FLAGS.step_mul,
            game_steps_per_episode=FLAGS.game_steps_per_episode,
            visualize=visualize) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)

        replay_buffer = []
        for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
            if FLAGS.training:
                replay_buffer.append(recorder)
                if is_done:
                    counter = 0
                    with LOCK:
                        global COUNTER
                        COUNTER += 1
                        counter = COUNTER
                    # Learning rate schedule
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * counter / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.discount, learning_rate,
                                 counter)
                    replay_buffer = []
                    if counter % FLAGS.output_step == 1:
                        agent.save_model(OUTPUT, counter)
                    if counter >= FLAGS.max_steps:
                        break
                    obs = recorder[-1].observation
                    score = obs["score_cumulative"][0]
                    print('Your score is ' + str(score) + '!')
            elif is_done:
                obs = recorder[-1].observation
                score = obs["score_cumulative"][0]
                print('Your score is ' + str(score) + '!')
        if FLAGS.save_replay:
            env.save_replay(agent.name)
Esempio n. 13
0
def train_one_episode(agent, env):
    # Step & update weights in one episode
    # Only for a single player!
    global COUNTER
    replay_buffer = []
    for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS, COUNTER):
        replay_buffer.append(recorder)
        if not is_done:
            continue
        counter = 0
        with LOCK:
            COUNTER += 1
            counter = COUNTER
        # Learning rate schedule
        learning_rate = FLAGS.learning_rate * (1 -
                                               0.9 * counter / FLAGS.max_steps)
        agent.update(replay_buffer, FLAGS.discount, learning_rate, counter)
        break
Esempio n. 14
0
def run_thread(agent, map_name, visualize):
  with sc2_env.SC2Env(
    map_name=map_name,
    agent_race=FLAGS.agent_race,
    bot_race=FLAGS.bot_race,
    difficulty=FLAGS.difficulty,
    step_mul=FLAGS.step_mul,
    agent_interface_format=sc2_env.AgentInterfaceFormat(
        feature_dimensions=sc2_env.Dimensions(
            screen=64,
            minimap=64)),
    visualize=visualize) as env:
    env = available_actions_printer.AvailableActionsPrinter(env)

    # Only for a single player!
    replay_buffer = []
    for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
      if FLAGS.training:
        replay_buffer.append(recorder)
        if is_done:
          counter = 0
          with LOCK:
            global COUNTER
            COUNTER += 1
            counter = COUNTER
          # Learning rate schedule
          learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
          agent.update(replay_buffer, FLAGS.discount, learning_rate, counter)
          replay_buffer = []
          if counter % FLAGS.snapshot_step == 1:
            agent.save_model(SNAPSHOT, counter)
          if counter >= FLAGS.max_steps:
            break
      elif is_done:
        obs = recorder[-1].observation
        score = obs["score_cumulative"][0]
        print('Your score is '+str(score)+'!')
    if FLAGS.save_replay:
      env.save_replay(agent.name)
Esempio n. 15
0
def run_thread(agent, map_name, visualize):
  with sc2_env.SC2Env(
    map_name=map_name,
    agent_race=FLAGS.agent_race,
    bot_race=FLAGS.bot_race,
    difficulty=FLAGS.difficulty,
    step_mul=FLAGS.step_mul,
    screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution),
    minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution),
    visualize=visualize) as env:
    env = available_actions_printer.AvailableActionsPrinter(env)

    # Only for a single player!
    replay_buffer = []
    for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
      if FLAGS.training:
        replay_buffer.append(recorder)
        if is_done:
          counter = 0
          with LOCK:
            global COUNTER
            COUNTER += 1
            counter = COUNTER
          # Learning rate schedule
          learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
          agent.update(replay_buffer, FLAGS.discount, learning_rate, counter)
          replay_buffer = []
          if counter % FLAGS.snapshot_step == 1:
            agent.save_model(SNAPSHOT, counter)
          if counter >= FLAGS.max_steps:
            break
      elif is_done:
        obs = recorder[-1].observation
        score = obs["score_cumulative"][0]
        print('Your score is '+str(score)+'!')
    if FLAGS.save_replay:
      env.save_replay(agent.name)
Esempio n. 16
0
def run_thread(agent, map_name, visualize, mlsh=False):
    scores = list()
    logger.info('Launching new SC2 environment...')
    with sc2_env.SC2Env(map_name=map_name,
                        agent_race=FLAGS.agent_race,
                        bot_race=FLAGS.bot_race,
                        difficulty=FLAGS.difficulty,
                        step_mul=FLAGS.step_mul,
                        screen_size_px=(FLAGS.screen_resolution,
                                        FLAGS.screen_resolution),
                        minimap_size_px=(FLAGS.minimap_resolution,
                                         FLAGS.minimap_resolution),
                        visualize=visualize) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)

        logger.info('New SC2 environment launched successfully')
        logger.info('Minigame: %s', map_name)

        ep_counter = 0  # counts episode for this particular thread
        replay_buffer = [
        ]  # will get observations of each step during an episode to learn once episode is done

        for recorder, is_done in run_loop([agent],
                                          env,
                                          MAX_AGENT_STEPS,
                                          mlsh=mlsh,
                                          warmup=FLAGS.warmup_len,
                                          joint=FLAGS.joint_len):
            if FLAGS.training:
                replay_buffer.append(recorder)
                if is_done:
                    # end of an episode, agent has interacted with env and now we learn from the "replay"
                    counter = 0
                    with LOCK:
                        # counter counts episode accross all threads:
                        global COUNTER
                        COUNTER += 1
                        counter = COUNTER
                    # Learning rate schedule
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * counter / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.discount, learning_rate,
                                 counter)
                    replay_buffer = []
                    if counter % FLAGS.snapshot_step == 1:
                        logger.info('Saving model to %s', SNAPSHOT)
                        agent.save_model(SNAPSHOT, counter)
                    if counter >= FLAGS.max_steps:
                        break

            if is_done:
                ep_counter += 1
                obs = recorder[-1].observation
                score = obs["score_cumulative"][0]
                scores.append(score)
                # ep_counter is
                logger.info(
                    '[Episode %s] Episode score: %.2f, mean score: %.2f, max score: %.2f',
                    ep_counter, score, np.mean(scores[-300:]), np.max(scores))

        if FLAGS.save_replay:
            env.save_replay(agent.name)
Esempio n. 17
0
def run_thread(agent, players, map_name, visualize):
    global COUNTER
    with sc2_env.SC2Env(
            map_name=map_name,
            players=players,
            step_mul=FLAGS.step_mul,
            #screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution),
            #minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution),
            agent_interface_format=sc2_env.parse_agent_interface_format(
                feature_screen=FLAGS.feature_screen_size,
                feature_minimap=FLAGS.feature_minimap_size,
                rgb_screen=FLAGS.rgb_screen_size,
                rgb_minimap=FLAGS.rgb_minimap_size,
                action_space=FLAGS.action_space),
            game_steps_per_episode=FLAGS.game_steps_per_episode,
            visualize=visualize) as env:

        env = available_actions_printer.AvailableActionsPrinter(env)
        max_avg_score = 0.
        # pure evaluation
        if not FLAGS.training:
            evaluate_k_episodes_and_avg(agent, env, k=20)
            exit(0)

        # Only for a single player!
        replay_buffer = []
        for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS,
                                          COUNTER):
            if FLAGS.training:
                replay_buffer.append(recorder)
                if is_done:
                    counter = 0
                    with LOCK:
                        COUNTER += 1
                        counter = COUNTER
                    # Learning rate schedule
                    learning_rate = FLAGS.learning_rate * (
                        1 - 0.9 * counter / FLAGS.max_steps)
                    agent.update(replay_buffer, FLAGS.discount, learning_rate,
                                 counter)
                    replay_buffer = []
                    if counter % FLAGS.snapshot_step == 1:
                        agent.save_model(SNAPSHOT, counter)
                    if counter >= FLAGS.max_steps:
                        break

                    # eval with interval
                    if COUNTER % FLAGS.evaluate_every == 1 and COUNTER >= 0:
                        avg_sc = evaluate_k_episodes_and_avg(
                            agent, env, k=10, write_results=True)
                        if avg_sc > max_avg_score:
                            max_avg_score = avg_sc
                            agent.save_model(SNAPSHOT, COUNTER)
                    print(
                        "Current max average score: {}".format(max_avg_score))

            elif is_done:
                obs = recorder[-1].observation
                score = obs["score_cumulative"][0]
                print('Your score is ' + str(score) + '!')
        if FLAGS.save_replay:
            env.save_replay(agent.name)
Esempio n. 18
0
def run_thread(agent, players, map_name, visualize):
  """Run one thread worth of the environment with agents."""
  with sc2_env.SC2Env(
      map_name=map_name,
      players=players,
      agent_interface_format=sc2_env.parse_agent_interface_format(
          feature_screen=FLAGS.feature_minimap_size,
          feature_minimap=FLAGS.feature_minimap_size,
          rgb_screen=FLAGS.rgb_screen_size,
          rgb_minimap=FLAGS.rgb_minimap_size,
          action_space=FLAGS.action_space,
          use_feature_units=FLAGS.use_feature_units),
      step_mul=FLAGS.step_mul,
      game_steps_per_episode=FLAGS.game_steps_per_episode,
      disable_fog=FLAGS.disable_fog,
      visualize=visualize) as env:
    env = available_actions_printer.AvailableActionsPrinter(env)
    #agents = [agent_cls() for agent_cls in agent_classes]

    start_at = 0
    global total_score
    replay_buffer = []
    for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
      if FLAGS.training:
        replay_buffer.append(recorder)
        if is_done:
          counter = 0
          obs = recorder[-1].observation
          score = obs["score_cumulative"][0]

          with LOCK:
            global COUNTER
            COUNTER += 1
            if start_at == 0:
              start_at = COUNTER
            counter = COUNTER
            total_score += score
            mean_score = total_score/(COUNTER - start_at)

          # Learning rate schedule
          learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
          agent.update(replay_buffer, FLAGS.discount, learning_rate, counter)
          replay_buffer = []
          if counter % FLAGS.snapshot_step == 1:
            agent.save_model(SNAPSHOT, counter)
          if counter >= FLAGS.max_steps:
            break
            
          # i want a diagram!
          summary = tf.Summary()
          summary.value.add(tag='episode_score', simple_value=score)
          summary_writer.add_summary(summary, COUNTER)
        
          summary.value.add(tag='mean_score', simple_value=mean_score)
          summary_writer.add_summary(summary, COUNTER)
          logging.info("Your score is: %s, mean score is %s !", str(score), str(mean_score))
          #print('Your score is '+str(score)+'!')

      elif is_done:
        start_at += 1
        obs = recorder[-1].observation
        score = obs["score_cumulative"][0]
        total_score += score
        mean_score = total_score/start_at

        # i want a diagram!
        summary = tf.Summary()
        summary.value.add(tag='episode_score', simple_value=score)
        summary_writer.add_summary(summary, COUNTER)
      
        summary.value.add(tag='mean_score', simple_value=mean_score)
        summary_writer.add_summary(summary, COUNTER)
        logging.info("Your score is: %s, mean score is %s !", str(score), str(mean_score))

    if FLAGS.save_replay:
      env.save_replay(agent.name)
Esempio n. 19
0
print('action space: {}'.format(env.action_space))
print('act low/high: {} {}'.format(env.action_space.low,
                                   env.action_space.high))
print('obs low/high: {} {}'.format(env.observation_space.low,
                                   env.observation_space.high))

observers = _build_observers(env)
exploration_rate = decaying_value.DecayingValue(0.2, 0.1, TRAIN_EPISODES)

replay_buffer = memory.Memory(100000, env.observation_space.high.shape)
positive_demos = memory.from_demonstrations('positive_demos/',
                                            env.observation_space.high.shape)
# negative_demos = memory.from_demonstrations('negative_demos/', env.observation_space.high.shape)

agent = ddpg_agent.DDPGAgent(env.action_space, env.observation_space,
                             exploration_rate, replay_buffer, positive_demos,
                             None)

agent.pretrain_actor(2000)
agent.pretrain_critic(2000)

run_loop.run_loop(env, agent, TRAIN_EPISODES, MAX_STEPS_PER_EPISODE, observers)
wait = raw_input("Finished Training")

agent.set_learning(False)

observers.append(observer.Renderer(env, 20.))
# agent = keyboard_agent.KeyboardAgent(env)
run_loop.run_loop(env, agent, 10, None, observers)
Esempio n. 20
0
def run_thread(agent, map_name, visualize, update_unitsel=True, max_steps=FLAGS.max_steps, update_main=True,
               use_unitsel=False, update_both=True):
    with sc2_env.SC2Env(
            map_name=map_name,
            agent_race=FLAGS.agent_race,
            bot_race=FLAGS.bot_race,
            difficulty=FLAGS.difficulty,
            step_mul=FLAGS.step_mul,
            screen_size_px=(FLAGS.screen_resolution, FLAGS.screen_resolution),
            minimap_size_px=(FLAGS.minimap_resolution, FLAGS.minimap_resolution),
            visualize=visualize) as env:
        env = available_actions_printer.AvailableActionsPrinter(env)
        # Only for a single player!
        replay_buffer = []
        if agent_name == 'UnitSelAgent':
            for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS, use_unitsel):
                if FLAGS.training:
                    replay_buffer.append(recorder)
                    if is_done:
                        counter = 0
                        with LOCK:
                            global COUNTER, SAVE_COUNTER, MAIN_UPDATE_COUNTER, UNIT_SEL_COUNTER
                            COUNTER += 1
                            SAVE_COUNTER += 1
                            counter = COUNTER
                            if update_unitsel:
                                UNIT_SEL_COUNTER += 1
                            if update_main:
                                MAIN_UPDATE_COUNTER += 1
                        # Learning rate schedule
                        learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
                        if update_unitsel:
                            agent.update_unitsel(replay_buffer, FLAGS.discount, learning_rate, counter)
                        if update_main:
                            agent.update_main_policy(replay_buffer, FLAGS.discount, learning_rate, counter)
                        replay_buffer = []
                        if SAVE_COUNTER % FLAGS.snapshot_step == 1:
                            agent.save_model(SNAPSHOT, SAVE_COUNTER)
                        if counter >= max_steps:
                            break
                if is_done:
                    agent.reset_init_counter()
                    obs = recorder[-1].observation
                    score = obs["score_cumulative"][0]
                    global SCORE_BUFFER, MAX_SCORE

                    if len(SCORE_BUFFER) == 100:
                        SCORE_BUFFER.pop(0)
                    SCORE_BUFFER.append(score)
                    if len(SCORE_BUFFER) == 100:
                        avg_score = sum(SCORE_BUFFER) / 100
                    else:
                        avg_score = 0

                    if score > MAX_SCORE:
                        MAX_SCORE = score
                    print('Agent have been trained for totally ' + str(SAVE_COUNTER) + ' times')
                    print('Unit selector have been updated ' + str(UNIT_SEL_COUNTER) + ' times')
                    print('Main policy have been updated ' + str(MAIN_UPDATE_COUNTER) + ' times')
                    print('Your score is ' + str(score) + '!')
                    print('Average score in the last 100 run:' + str(avg_score))
                    print('Maximum score is:' + str(MAX_SCORE))
            else:
                for recorder, is_done in run_loop([agent], env, MAX_AGENT_STEPS):
                    if FLAGS.training:
                        replay_buffer.append(recorder)
                        if is_done:
                            counter = 0
                            with LOCK:
                                global COUNTER
                                global SAVE_COUNTER
                                global BOTH_UPDATE_COUNTER
                                global MASTER_UPDATE_COUNTER
                                COUNTER += 1
                                SAVE_COUNTER += 1
                                counter = COUNTER
                                if update_both:
                                    BOTH_UPDATE_COUNTER += 1
                                else:
                                    MASTER_UPDATE_COUNTER += 1
                            # Learning rate schedule
                            learning_rate = FLAGS.learning_rate * (1 - 0.9 * counter / FLAGS.max_steps)
                            if update_both:
                                agent.update_master_policy(replay_buffer, FLAGS.discount, learning_rate, counter)
                                agent.update(replay_buffer, FLAGS.discount, learning_rate, counter)
                            else:
                                agent.update_master_policy(replay_buffer, FLAGS.discount, learning_rate, counter)
                            replay_buffer = []
                            if SAVE_COUNTER % FLAGS.snapshot_step == 1:
                                agent.save_model(SNAPSHOT, SAVE_COUNTER)
                            if counter >= max_steps:
                                break
                    if is_done:
                        obs = recorder[-1].observation
                        score = obs["score_cumulative"][0]
                        print('Agent have been trained for totally ' + str(SAVE_COUNTER) + ' times')
                        print('Master have been updated ' + str(MASTER_UPDATE_COUNTER) + ' times')
                        print('Both policies have been updated ' + str(BOTH_UPDATE_COUNTER) + ' times')
                        print('Your score is ' + str(score) + '!')
                if FLAGS.save_replay:
                    env.save_replay(agent.name)
Esempio n. 21
0
        if cur_iter == 0:
            print("agent average reward {}: {}".format(episode, self._episode_reward))
            self._episode_reward = 0.0

def render_observer(env, agent, episode, cur_iter, obs, action, reward):
    env.render()
    time.sleep(0.025)

def _build_observers():
    observers = []
    # observers.append(lambda env, agent, episode, cur_iter, obs, action, reward: )
    observers.append(RewardTracker())
    return observers


# env = gym.make('MountainCar-v0')
env = gym.make('CartPole-v0')

exploration_rate = decaying_value.DecayingValue(1.0, 0.1, TRAIN_EPISODES)
beta = decaying_value.DecayingValue(0.4, 1.0, TRAIN_EPISODES)
memory = memory.Memory(50000, env.observation_space.high.shape, 0.6, beta)
agent = dqn_learner.DQNLearner(env.action_space, env.observation_space, exploration_rate, memory)
observers = _build_observers()

run_loop.run_loop(env, agent, TRAIN_EPISODES, MAX_STEPS_PER_EPISODE, observers)
wait = raw_input("Finished Training")

agent.set_learning(False)
run_loop.run_loop(env, agent, 2, None, [render_observer])