コード例 #1
0
ファイル: dqn.py プロジェクト: UserAB1236872/abp
def run_task(evaluation_config, network_config, reinforce_config):
    env = TowerExample()

    max_episode_steps = 10000

    state = env.reset()

    TOWER_BR, TOWER_BL, TOWER_TR, TOWER_TL = [1, 2, 3, 4]

    choose_tower = DQNAdaptive(
        name="tower",
        choices=[TOWER_BR, TOWER_BL, TOWER_TR, TOWER_TL],
        network_config=network_config,
        reinforce_config=reinforce_config)

    training_summaries_path = evaluation_config.summaries_path + "/train"
    clear_summary_path(training_summaries_path)
    train_summary_writer = tf.summary.FileWriter(training_summaries_path)

    #Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        episode_summary = tf.Summary()

        start_time = time.time()
        tower_to_kill, _ = choose_tower.predict(state.state)
        end_time = time.time()

        action = env.new_action()

        env_start_time = time.time()
        action.attack_quadrant(tower_to_kill)

        state = env.act(action)

        counter = 0

        choose_tower.reward(state.reward)

        total_reward += state.reward

        if state.is_terminal():
            logger.info("End Episode of episode %d!" % (episode + 1))
            logger.info("Total Reward %d!" % (total_reward))

        env_end_time = time.time()

        logger.debug("Counter: %d" % counter)
        logger.debug("Neural Network Time: %.2f" % (end_time - start_time))
        logger.debug("Env Time: %.2f" % (env_end_time - env_start_time))

        choose_tower.end_episode(state.state)

        episode_summary.value.add(tag="Reward", simple_value=total_reward)
        train_summary_writer.add_summary(episode_summary, episode + 1)

    train_summary_writer.flush()

    choose_tower.disable_learning()

    test_summaries_path = evaluation_config.summaries_path + "/test"
    clear_summary_path(test_summaries_path)
    test_summary_writer = tf.summary.FileWriter(test_summaries_path)

    choose_tower.explanation = True

    explanation = Explanation("Tower Capture", (40, 40))
    chart = BarChart("Move Explanation", "Actions", "QVal By Reward Type")
    layer_names = ["HP", "Type 1", "Type 2", "Type 3", "Friend", "Enemy"]

    #Test Episodes
    for episode in range(evaluation_config.test_episodes):
        state = env.reset(visualize=evaluation_config.render, record=True)
        total_reward = 0
        episode_summary = tf.Summary()

        tower_to_kill, q_values, saliencies = choose_tower.predict(state.state)

        choices = env.actions()['actions']

        for choice, action_value in choices.items():
            key = choice
            explanation.add_layers(layer_names,
                                   saliencies[action_value - 1],
                                   key=key)
            group = BarGroup("Attack {}".format(choice), saliency_key=key)

            key = choice + "_Overall"
            explanation.add_layers(layer_names,
                                   saliencies[action_value - 1],
                                   key=key)
            bar = Bar("Attack {}".format(choice),
                      q_values[action_value - 1],
                      saliency_key=key)
            group.add_bar(bar)
            chart.add_bar_group(group)

        explanation.with_bar_chart(chart)

        action = env.new_action()

        action.attack_quadrant(tower_to_kill)
        action.skip = False if evaluation_config.render else True

        state = env.act(action, explanation=explanation)

        while not state.is_terminal():
            time.sleep(0.5)
            action = env.new_action()
            action.skip = False
            state = env.act(action, explanation=explanation)

        total_reward += state.reward

        time.sleep(10)

        if state.is_terminal():
            logger.info("End Episode of episode %d!" % (episode + 1))
            logger.info("Total Reward %d!" % (total_reward))

        episode_summary.value.add(tag="Reward", simple_value=total_reward)
        test_summary_writer.add_summary(episode_summary, episode + 1)

    test_summary_writer.flush()
コード例 #2
0
def run_task(evaluation_config, network_config, reinforce_config):
    env = gym.make(evaluation_config.env)
    max_episode_steps = env._max_episode_steps
    state = env.reset()

    UP, DOWN = [2, 3]
    choices = [UP, DOWN]

    agent = DQNAdaptive(name="Pong",
                        choices=choices,
                        network_config=network_config,
                        reinforce_config=reinforce_config)

    training_summaries_path = evaluation_config.summaries_path + "/train"
    train_summary_writer = SummaryWriter(training_summaries_path)

    test_summaries_path = evaluation_config.summaries_path + "/test"
    test_summary_writer = SummaryWriter(test_summaries_path)

    # Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        for steps in range(max_episode_steps):
            action, q_values = agent.predict(np.rollaxis(state, 2))

            state, reward, done, info = env.step(action)

            agent.reward(reward)  # Reward for every step

            total_reward += reward

            if done:
                agent.end_episode(np.rollaxis(state, 2))
                train_summary_writer.add_scalar(tag="Episode Reward",
                                                scalar_value=total_reward,
                                                global_step=episode + 1)
                break

    agent.disable_learning()

    for episode in range(evaluation_config.test_episodes):
        state = env.reset()
        total_reward = 0

        for step in range(max_episode_steps):
            if evaluation_config.render:
                env.render()

            action, q_values = agent.predict(np.rollaxis(state, 2))

            state, reward, done, info = env.step(action)

            total_reward += reward

            if done:
                test_summary_writer.add_scalar(tag="Episode Reward",
                                               scalar_value=total_reward,
                                               global_step=episode + 1)
                break

    env.close()
コード例 #3
0
def run_task(evaluation_config, network_config, reinforce_config):
    env = gym.make(evaluation_config.env)
    state = env.reset(state_representation="linear")
    LEFT, RIGHT, UP, DOWN = [0, 1, 2, 3]
    choices = [LEFT, RIGHT, UP, DOWN]

    agent = DQNAdaptive(name="FruitCollecter",
                        choices=choices,
                        network_config=network_config,
                        reinforce_config=reinforce_config)

    training_summaries_path = evaluation_config.summaries_path + "/train"
    clear_summary_path(training_summaries_path)
    train_summary_writer = SummaryWriter(training_summaries_path)

    test_summaries_path = evaluation_config.summaries_path + "/test"
    clear_summary_path(test_summaries_path)
    test_summary_writer = SummaryWriter(test_summaries_path)

    # Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset(state_representation="linear")
        total_reward = 0
        done = False
        steps = 0
        while not done:
            steps += 1
            action, q_values = agent.predict(state)
            state, reward, done, info = env.step(action)

            agent.reward(reward)

            total_reward += reward

        agent.end_episode(state)
        test_summary_writer.add_scalar(tag="Train/Episode Reward",
                                       scalar_value=total_reward,
                                       global_step=episode + 1)

        train_summary_writer.add_scalar(tag="Train/Steps to collect all Fruits",
                                        scalar_value=steps + 1,
                                        global_step=episode + 1)

    agent.disable_learning()

    # Test Episodes
    for episode in range(evaluation_config.test_episodes):
        state = env.reset(state_representation="linear")
        total_reward = 0
        done = False
        steps = 0

        while not done:
            steps += 1
            action, q_values = agent.predict(state)
            if evaluation_config.render:
                env.render()
                time.sleep(0.5)

            state, reward, done, info = env.step(action)

            total_reward += reward

        agent.end_episode(state)

        test_summary_writer.add_scalar(tag="Test/Episode Reward",
                                       scalar_value=total_reward,
                                       global_step=episode + 1)
        test_summary_writer.add_scalar(tag="Test/Steps to collect all Fruits",
                                       scalar_value=steps + 1,
                                       global_step=episode + 1)

    env.close()
コード例 #4
0
def run_task(evaluation_config, network_config, reinforce_config):
    flags.FLAGS(sys.argv[:1])  # TODO Fix this!

    env = sc2_env.SC2Env(
        map_name="CollectMineralShards",
        step_mul=8,
        visualize=False,
        save_replay_episodes=0,
        replay_dir='replay',
        game_steps_per_episode=10000,
        agent_interface_format=features.AgentInterfaceFormat(
            feature_dimensions=features.Dimensions(screen=32, minimap=32),
            use_feature_units=True),
    )

    choices = ["Up", "Down", "Left", "Right"]

    agent = DQNAdaptive(name="ShardsCollector",
                        choices=choices,
                        network_config=network_config,
                        reinforce_config=reinforce_config)

    training_summaries_path = evaluation_config.summaries_path + "/train"

    if evaluation_config.training_episodes > 0:
        clear_summary_path(training_summaries_path)

    train_summary_writer = SummaryWriter(training_summaries_path)

    test_summaries_path = evaluation_config.summaries_path + "/test"
    clear_summary_path(test_summaries_path)
    test_summary_writer = SummaryWriter(test_summaries_path)

    # Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        actions = ActionWrapper(state, grid_size=32).select(["SelectMarine1"])
        state = env.step(actions)
        total_reward = 0
        done = False
        steps = 0
        model_time = 0
        while not done:
            steps += 1
            model_start_time = time.time()
            action, q_values = agent.predict(
                state[0].observation.feature_screen)
            model_time += (time.time() - model_start_time)

            actions = ActionWrapper(state, grid_size=32).select([action])

            state = env.step(actions)

            agent.reward(state[0].reward)

            total_reward += state[0].reward

            done = state[0].step_type == environment.StepType.LAST

        agent.end_episode(state[0].observation.feature_screen)

        test_summary_writer.add_scalar(tag="Train/Episode Reward",
                                       scalar_value=total_reward,
                                       global_step=episode + 1)

        train_summary_writer.add_scalar(
            tag="Train/Steps to collect all shards",
            scalar_value=steps + 1,
            global_step=episode + 1)

    agent.disable_learning()

    # Test Episodes
    for episode in range(evaluation_config.test_episodes):
        state = env.reset()
        actions = ActionWrapper(state, grid_size=32).select(["SelectMarine1"])
        state = env.step(actions)
        total_reward = 0
        done = False
        steps = 0
        model_time = 0
        while steps < 1000 and not done:
            steps += 1
            model_start_time = time.time()
            action, q_values = agent.predict(
                state[0].observation.feature_screen)

            if evaluation_config.render:
                time.sleep(evaluation_config.sleep)

            model_time += (time.time() - model_start_time)

            actions = ActionWrapper(state, grid_size=32).select([action])

            state = env.step(actions)

            total_reward += state[0].reward

            done = state[0].step_type == environment.StepType.LAST

        test_summary_writer.add_scalar(tag="Test/Episode Reward",
                                       scalar_value=total_reward,
                                       global_step=episode + 1)
        test_summary_writer.add_scalar(tag="Test/Steps to collect all Fruits",
                                       scalar_value=steps + 1,
                                       global_step=episode + 1)

    env.close()
コード例 #5
0
def run_task(evaluation_config, network_config, reinforce_config, log=True):
    env = gym.make(evaluation_config.env)
    max_episode_steps = env._max_episode_steps
    state = env.reset()

    threshold_angle = 0.087266463
    threshold_x = 1.5
    LEFT, RIGHT = [0, 1]

    agent = DQNAdaptive(name="cartpole",
                        choices=[LEFT, RIGHT],
                        network_config=network_config,
                        reinforce_config=reinforce_config)

    if log:
        training_summaries_path = evaluation_config.summaries_path + "/train"
        clear_summary_path(training_summaries_path)
        train_summary_writer = SummaryWriter(training_summaries_path)

        test_summaries_path = evaluation_config.summaries_path + "/test"
        clear_summary_path(test_summaries_path)
        test_summary_writer = SummaryWriter(test_summaries_path)

    # Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        for steps in range(max_episode_steps):
            action, q_values = agent.predict(state)
            state, reward, done, info = env.step(action)
            cart_position, cart_velocity, pole_angle, pole_velocity = state

            agent.reward(reward)  # Reward for every step

            # Reward for pole angle increase or decrease
            if -threshold_angle < pole_angle < threshold_angle:
                agent.reward(1)
            else:
                agent.reward(-1)

            if steps < max_episode_steps and done:
                agent.reward(-40)  # Reward for terminal state

            if -threshold_x < cart_position < threshold_x:
                agent.reward(1)
            else:
                agent.reward(-1)

            total_reward += reward

            if done:
                agent.end_episode(state)
                if log:
                    train_summary_writer.add_scalar(tag="Episode Reward",
                                                    scalar_value=total_reward,
                                                    global_step=episode + 1)
                break

    # train_summary_writer.flush()

    agent.disable_learning()

    for episode in range(evaluation_config.test_episodes):
        state = env.reset()
        total_reward = 0

        for step in range(max_episode_steps):
            if evaluation_config.render:
                env.render()

            action, q_values = agent.predict(state)

            state, reward, done, info = env.step(action)

            total_reward += reward

            if done:
                if log:
                    test_summary_writer.add_scalar(tag="Episode Reward",
                                                   scalar_value=total_reward,
                                                   global_step=episode + 1)
                    print('Episode Reward:', total_reward)
                break

    env.close()
    pass