Esempio n. 1
0
def run_task(evaluation_config, network_config, reinforce_config):
    env = TowerExample()

    max_episode_steps = 10000

    state = env.reset()

    TOWER_BR, TOWER_BL, TOWER_TR, TOWER_TL = [1, 2, 3, 4]

    choose_tower = DQNAdaptive(
        name="tower",
        choices=[TOWER_BR, TOWER_BL, TOWER_TR, TOWER_TL],
        network_config=network_config,
        reinforce_config=reinforce_config)

    training_summaries_path = evaluation_config.summaries_path + "/train"
    clear_summary_path(training_summaries_path)
    train_summary_writer = tf.summary.FileWriter(training_summaries_path)

    #Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        episode_summary = tf.Summary()

        start_time = time.time()
        tower_to_kill, _ = choose_tower.predict(state.state)
        end_time = time.time()

        action = env.new_action()

        env_start_time = time.time()
        action.attack_quadrant(tower_to_kill)

        state = env.act(action)

        counter = 0

        choose_tower.reward(state.reward)

        total_reward += state.reward

        if state.is_terminal():
            logger.info("End Episode of episode %d!" % (episode + 1))
            logger.info("Total Reward %d!" % (total_reward))

        env_end_time = time.time()

        logger.debug("Counter: %d" % counter)
        logger.debug("Neural Network Time: %.2f" % (end_time - start_time))
        logger.debug("Env Time: %.2f" % (env_end_time - env_start_time))

        choose_tower.end_episode(state.state)

        episode_summary.value.add(tag="Reward", simple_value=total_reward)
        train_summary_writer.add_summary(episode_summary, episode + 1)

    train_summary_writer.flush()

    choose_tower.disable_learning()

    test_summaries_path = evaluation_config.summaries_path + "/test"
    clear_summary_path(test_summaries_path)
    test_summary_writer = tf.summary.FileWriter(test_summaries_path)

    choose_tower.explanation = True

    explanation = Explanation("Tower Capture", (40, 40))
    chart = BarChart("Move Explanation", "Actions", "QVal By Reward Type")
    layer_names = ["HP", "Type 1", "Type 2", "Type 3", "Friend", "Enemy"]

    #Test Episodes
    for episode in range(evaluation_config.test_episodes):
        state = env.reset(visualize=evaluation_config.render, record=True)
        total_reward = 0
        episode_summary = tf.Summary()

        tower_to_kill, q_values, saliencies = choose_tower.predict(state.state)

        choices = env.actions()['actions']

        for choice, action_value in choices.items():
            key = choice
            explanation.add_layers(layer_names,
                                   saliencies[action_value - 1],
                                   key=key)
            group = BarGroup("Attack {}".format(choice), saliency_key=key)

            key = choice + "_Overall"
            explanation.add_layers(layer_names,
                                   saliencies[action_value - 1],
                                   key=key)
            bar = Bar("Attack {}".format(choice),
                      q_values[action_value - 1],
                      saliency_key=key)
            group.add_bar(bar)
            chart.add_bar_group(group)

        explanation.with_bar_chart(chart)

        action = env.new_action()

        action.attack_quadrant(tower_to_kill)
        action.skip = False if evaluation_config.render else True

        state = env.act(action, explanation=explanation)

        while not state.is_terminal():
            time.sleep(0.5)
            action = env.new_action()
            action.skip = False
            state = env.act(action, explanation=explanation)

        total_reward += state.reward

        time.sleep(10)

        if state.is_terminal():
            logger.info("End Episode of episode %d!" % (episode + 1))
            logger.info("Total Reward %d!" % (total_reward))

        episode_summary.value.add(tag="Reward", simple_value=total_reward)
        test_summary_writer.add_summary(episode_summary, episode + 1)

    test_summary_writer.flush()
Esempio n. 2
0
def run_task(evaluation_config, network_config, reinforce_config):
    env = TowerExample("multi_step")

    reward_types = sorted(env.reward_types())
    decomposed_rewards = {}

    for type in reward_types:
        decomposed_rewards[type] = 0

    state = env.reset()

    actions = env.actions()['actions']
    actions = sorted(actions.items(), key=operator.itemgetter(1))
    choice_descriptions = list(map(lambda x: x[0], actions))
    choices = list(map(lambda x: x[1], actions))

    # Configure network for reward type
    networks = []
    for reward_type in reward_types:
        name = reward_type
        layers = [{"type": "FC", "neurons": 50}]
        networks.append({"name": name, "layers": layers})

    network_config.networks = networks

    choose_tower = HRAAdaptive(name="tower",
                               choices=choices,
                               reward_types=reward_types,
                               network_config=network_config,
                               reinforce_config=reinforce_config)

    # Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        step = 1

        while not state.is_terminal():
            step += 1
            (tower_to_kill, q_values,
             combined_q_values) = choose_tower.predict(state.state.flatten())

            action = env.new_action()
            action.attack_quadrant(tower_to_kill)
            action.skip = True
            state = env.act(action)

            for reward_type, reward in state.typed_reward.items():
                choose_tower.reward(reward_type, reward)
                total_reward += reward

        choose_tower.end_episode(state.state.flatten())

        logger.debug("Episode %d : %d, Step: %d" %
                     (episode + 1, total_reward, step))

    choose_tower.disable_learning()

    # Test Episodes
    for episode in range(evaluation_config.test_episodes):
        layer_names = [
            "HP", "Agent Location", "Small Towers", "Big Towers", "Friend",
            "Enemy"
        ]

        saliency_explanation = Saliency(choose_tower)

        state = env.reset(visualize=evaluation_config.render, record=True)
        total_reward = 0
        step = 0

        while not state.is_terminal():
            step += 1
            explanation = SkyExplanation("Tower Capture", (40, 40))
            (tower_to_kill, q_values,
             combined_q_values) = choose_tower.predict(state.state.flatten())

            q_values = q_values.data.numpy()
            combined_q_values = combined_q_values.data.numpy()
            saliencies = saliency_explanation.generate_saliencies(
                step,
                state.state.flatten(),
                choice_descriptions,
                layer_names,
                reshape=state.state.shape)

            decomposed_q_chart = BarChart("Q Values", "Actions",
                                          "QVal By Reward Type")
            for choice_idx, choice in enumerate(choices):
                key = choice_descriptions[choice_idx]
                group = BarGroup("Attack {}".format(key), saliency_key=key)
                explanation.add_layers(layer_names, saliencies["all"], key)

                for reward_index, reward_type in enumerate(reward_types):
                    key = "{}_{}".format(choice, reward_type)
                    bar = Bar(reward_type,
                              q_values[reward_index][choice_idx],
                              saliency_key=key)
                    group.add_bar(bar)
                    explanation.add_layers(layer_names,
                                           saliencies[reward_type],
                                           key=key)

                decomposed_q_chart.add_bar_group(group)

            explanation.with_bar_chart(decomposed_q_chart)

            action = env.new_action()
            action.attack_quadrant(tower_to_kill)
            action.skip = True

            state = env.act(action, explanation=explanation)

            time.sleep(0.5)

            total_reward += state.reward

        logger.info("End Episode of episode %d with %d steps" %
                    (episode + 1, step))
        logger.info("Total Reward %d!" % (total_reward))
Esempio n. 3
0
def run_task(evaluation_config, network_config, reinforce_config):
    '''
    We just want to initialize the env for action stuff
    '''
    env = CityAttack()
    # env = CityAttack("city_attack_static/attack_enemy")

    reward_types = sorted(env.reward_types())
    decomposed_rewards = {}

    for type in reward_types:
        decomposed_rewards[type] = 0

    state = env.reset()

    actions = env.actions()['actions']
    actions = sorted(actions.items(), key=operator.itemgetter(1))
    choice_descriptions = list(map(lambda x: x[0], actions))
    choices = list(map(lambda x: x[1], actions))

    # Configure network for reward type
    networks = []
    for reward_type in reward_types:
        name = reward_type
        layers = [{"type": "FC", "neurons": 50}]
        networks.append({"name": name, "layers": layers})

    network_config.networks = networks

    choose_tower = HRAAdaptive(name="tower",
                               choices=choices,
                               reward_types=reward_types,
                               network_config=network_config,
                               reinforce_config=reinforce_config)

    choose_tower.disable_learning()

    layer_names = ["HP", "Tank", "Size", "City/Fort", "Friend/Enemy"]

    replay_fix = ReplayFixHelper(CityState)
    step = 0
    while (True):
        state = replay_fix.next()
        if state == None:
            break
        step += 1

        saliency_explanation = Saliency(choose_tower)
        explanation = SkyExplanation("Tower Capture", (40, 40))

        (tower_to_kill, q_values,
         combined_q_values) = choose_tower.predict(state.state.flatten())

        print(combined_q_values)

        q_values = q_values.data.numpy()
        combined_q_values = combined_q_values.data.numpy()
        saliencies = saliency_explanation.generate_saliencies(
            step,
            state.state.flatten(),
            choice_descriptions, [
                "HP", "Tank", "Small Bases", "Big Bases", "Big Cities",
                "Small Cities", "Friend", "Enemy"
            ],
            reshape=state.state.shape)

        decomposed_q_chart = BarChart("Q Values", "Actions",
                                      "QVal By Reward Type")
        q_vals = {}
        for choice_idx, choice in enumerate(choices):
            key = choice_descriptions[choice_idx]
            group = BarGroup("Attack {}".format(key), saliency_key=key)
            explanation.add_layers(layer_names, saliencies[choice]["all"], key)
            q_vals[key] = combined_q_values[choice_idx]

            for reward_index, reward_type in enumerate(reward_types):
                key = "{}_{}".format(choice, reward_type)
                bar = Bar(reward_type,
                          q_values[reward_index][choice_idx],
                          saliency_key=key)
                group.add_bar(bar)
                explanation.add_layers(layer_names,
                                       saliencies[choice][reward_type],
                                       key=key)

            decomposed_q_chart.add_bar_group(group)

        explanation.with_bar_chart(decomposed_q_chart)

        action = env.new_action()
        action.attack_quadrant(tower_to_kill)
        action.skip = False

        replay_fix.revise_action(action, explanation)
Esempio n. 4
0
for quad in range(1, 5):
    layers = np.random.random((40, 40, 6))
    key = "BarGroup{}".format(quad)
    group = BarGroup("Attack {}".format(actions[quad]), saliency_key=key)
    explanation.add_layers(layer_names, layers, key=key)

    value = 0.0
    for r_type in env.reward_types():
        b_layers = np.random.random((40, 40, 6))
        key = "BarGroup{}Bar{}".format(quad, r_type)

        bar_val = np.random.rand()
        value += bar_val
        bar = Bar(r_type, bar_val, saliency_key=key)

        group.add_bar(bar)
        explanation.add_layers(layer_names, b_layers, key=key)

    chart.add_bar_group(group)

    if value > max_value:
        max_quad = quad
        max_value = value

explanation.with_bar_chart(chart)

act.attack_quadrant(max_quad)
s = env.act(act, explanation=explanation)

if not s.is_terminal():
    raise Exception(
Esempio n. 5
0
def run_task(evaluation_config, network_config, reinforce_config):
    env = TowerExample()

    reward_types = sorted(env.reward_types())
    decomposed_rewards = {}

    for type in reward_types:
        decomposed_rewards[type] = 0

    max_episode_steps = 10000

    state = env.reset()

    actions = env.actions()['actions']
    actions = sorted(actions.items(), key=operator.itemgetter(1))
    choice_descriptions = list(map(lambda x: x[0], actions))
    choices = list(map(lambda x: x[1], actions))

    choose_tower = HRAAdaptive(name="Tower",
                               choices=choices,
                               reward_types=reward_types,
                               network_config=network_config,
                               reinforce_config=reinforce_config)

    training_summaries_path = evaluation_config.summaries_path + "/train"
    clear_summary_path(training_summaries_path)
    train_summary_writer = tf.summary.FileWriter(training_summaries_path)

    #Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        episode_summary = tf.Summary()
        step = 1

        while not state.is_terminal():
            step += 1
            tower_to_kill, q_values = choose_tower.predict(state.state)
            action = env.new_action()
            action.attack_quadrant(tower_to_kill)
            action.skip = True
            state = env.act(action)

            for reward_type, reward in state.typed_reward.items():
                choose_tower.reward(reward_type, reward)

        total_reward += state.reward

        choose_tower.end_episode(state.state)

        logger.info("Episode %d : %d, Step: %d" %
                    (episode + 1, total_reward, step))
        episode_summary.value.add(tag="Train/Reward",
                                  simple_value=total_reward)
        train_summary_writer.add_summary(episode_summary, episode + 1)

    train_summary_writer.flush()

    choose_tower.disable_learning()

    test_summaries_path = evaluation_config.summaries_path + "/test"
    clear_summary_path(test_summaries_path)
    test_summary_writer = tf.summary.FileWriter(test_summaries_path)

    #Test Episodes
    for episode in range(evaluation_config.test_episodes):
        contrastive = True
        explanation = SkyExplanation("Tower Capture", (40, 40))
        layer_names = [
            "HP", "Agent Location", "Small Towers", "Big Towers", "Friend",
            "Enemy"
        ]

        adaptive_explanation = Explanation(choose_tower)

        state = env.reset(visualize=evaluation_config.render, record=True)
        total_reward = 0
        episode_summary = tf.Summary()

        while not state.is_terminal():
            tower_to_kill, q_values = choose_tower.predict(state.state)
            combined_q_values = np.sum(q_values, axis=0)
            saliencies = adaptive_explanation.generate_saliencies(
                state.state, contrastive)
            charts = []

            decomposed_q_chart = BarChart("Q Values", "Actions",
                                          "QVal By Reward Type")
            for choice_idx, choice in enumerate(choices):
                key = choice_descriptions[choice_idx]
                explanation.add_layers(layer_names,
                                       saliencies[choice]["all"],
                                       key=key)
                group = BarGroup("Attack {}".format(key), saliency_key=key)

                for reward_index, reward_type in enumerate(reward_types):
                    key = "{}_{}".format(choice, reward_type)
                    bar = Bar(reward_type,
                              q_values[reward_index][choice_idx],
                              saliency_key=key)
                    explanation.add_layers(layer_names,
                                           saliencies[choice][reward_type],
                                           key=key)
                    group.add_bar(bar)

                decomposed_q_chart.add_bar_group(group)

            explanation.with_bar_chart(decomposed_q_chart)

            action = env.new_action()
            action.attack_quadrant(tower_to_kill)
            action.skip = False if evaluation_config.render else True

            state = env.act(action, explanation=explanation)

            time.sleep(0.5)

            total_reward += state.reward

        logger.info("Episode %d : %d, Step: %d" %
                    (episode + 1, total_reward, step))

        episode_summary.value.add(tag="Test/Episode Reward",
                                  simple_value=total_reward)
        test_summary_writer.add_summary(episode_summary, episode + 1)

    test_summary_writer.flush()
Esempio n. 6
0
def run_task(evaluation_config, network_config, reinforce_config):
    env = CityAttack()
    # env = CityAttack("city_attack_static/attack_enemy")

    reward_types = sorted(env.reward_types())
    decomposed_rewards = {}

    for type in reward_types:
        decomposed_rewards[type] = 0

    state = env.reset()

    actions = env.actions()['actions']
    actions = sorted(actions.items(), key=operator.itemgetter(1))
    choice_descriptions = list(map(lambda x: x[0], actions))
    choices = list(map(lambda x: x[1], actions))

    # Configure network for reward type
    networks = []
    for reward_type in reward_types:
        name = reward_type
        layers = [{"type": "FC", "neurons": 50}]
        networks.append({"name": name, "layers": layers})

    network_config.networks = networks

    choose_tower = HRAAdaptive(name="tower",
                               choices=choices,
                               reward_types=reward_types,
                               network_config=network_config,
                               reinforce_config=reinforce_config)

    # Training Episodes
    for episode in range(evaluation_config.training_episodes):
        state = env.reset()
        total_reward = 0
        step = 1

        while not state.is_terminal():
            step += 1
            (tower_to_kill,
             q_values,
             combined_q_values) = choose_tower.predict(state.state.flatten())

            prev_state = state
            action = env.new_action()
            action.attack_quadrant(tower_to_kill)
            action.skip = True
            state = env.act(action)

            for reward_type in reward_types:
                reward = or_zero(state.typed_reward, reward_type)
                choose_tower.reward(reward_type, reward)
                total_reward = reward

        choose_tower.end_episode(state.state.flatten())

        # print("Episode %d : %d, Step: %d" %
        #       (episode + 1, total_reward, step))

    choose_tower.disable_learning()

    # Test Episodes
    for episode in range(evaluation_config.test_episodes):
        # layer_names = ["HP", "Tank", "Small Bases", "Big Bases",
        #                "Big Cities", "Small Cities", "Friend", "Enemy"]

        layer_names = ["HP", "Tank", "Size",
                       "City/Fort", "Friend/Enemy"]

        saliency_explanation = Saliency(choose_tower)

        state = env.reset(visualize=evaluation_config.render, record=True)
        total_reward = 0
        step = 0

        ep_q_vals = []
        # ep_fudged_q_vals = []
        while not state.is_terminal():
            step += 1
            explanation = SkyExplanation("Tower Capture", (40, 40))
            (tower_to_kill,
             q_values,
             combined_q_values) = choose_tower.predict(state.state.flatten())

            q_values = q_values.data.numpy()
            combined_q_values = combined_q_values.data.numpy()
            saliencies = saliency_explanation.generate_saliencies(
                step, state.state.flatten(),
                choice_descriptions,
                ["HP", "Tank", "Small Bases", "Big Bases",
                       "Big Cities", "Small Cities", "Friend", "Enemy"],
                reshape=state.state.shape)

            decomposed_q_chart = BarChart(
                "Q Values", "Actions", "QVal By Reward Type")
            q_vals = {}
            for choice_idx, choice in enumerate(choices):
                key = choice_descriptions[choice_idx]
                group = BarGroup("Attack {}".format(key), saliency_key=key)
                explanation.add_layers(
                    layer_names, saliencies[choice]["all"], key)
                q_vals[key] = combined_q_values[choice_idx]

                for reward_index, reward_type in enumerate(reward_types):
                    key = "{}_{}".format(choice, reward_type)
                    bar = Bar(
                        reward_type, q_values[reward_index][choice_idx], saliency_key=key)
                    group.add_bar(bar)
                    explanation.add_layers(
                        layer_names, saliencies[choice][reward_type], key=key)

                decomposed_q_chart.add_bar_group(group)

            ep_q_vals.append(q_vals)
            explanation.with_bar_chart(decomposed_q_chart)

            # fudged_q_vals = alter_q_vals(state.state, choose_tower, step, choice_descriptions, choices, layer_names)
            # ep_fudged_q_vals.append(fudged_q_vals)

            action = env.new_action()
            action.attack_quadrant(tower_to_kill)
            action.skip = True

            state = env.act(action, explanation=explanation)

            total_reward += state.reward

        print("Q vals for ep:", ep_q_vals)
        # print("Fudged Q vals for ep:", ep_fudged_q_vals)
        print("End Episode of episode %d with %d steps" %
              (episode + 1, step))
        print("Total Reward %d!" % (total_reward))