Beispiel #1
0
def calculate_feature_importance(
    gym_env: OpenAIGymEnvironment,
    trainer: MDNRNNTrainer,
    use_gpu: bool,
    run_details: OpenAiRunDetails,
    test_batch: rlt.PreprocessedTrainingBatch,
):
    assert run_details.max_steps is not None
    assert run_details.num_test_episodes is not None
    assert run_details.seq_len is not None
    feature_importance_evaluator = FeatureImportanceEvaluator(
        trainer,
        discrete_action=gym_env.action_type == EnvType.DISCRETE_ACTION,
        state_feature_num=gym_env.state_dim,
        action_feature_num=gym_env.action_dim,
        sorted_action_feature_start_indices=list(range(gym_env.action_dim)),
        sorted_state_feature_start_indices=list(range(gym_env.state_dim)),
    )
    feature_loss_vector = feature_importance_evaluator.evaluate(
        test_batch)["feature_loss_increase"]
    feature_importance_map = {}
    for i in range(gym_env.action_dim):
        print("action {}, feature importance: {}".format(
            i, feature_loss_vector[i].item()))
        feature_importance_map[f"action{i}"] = feature_loss_vector[i].item()
    for i in range(gym_env.state_dim):
        print("state {}, feature importance: {}".format(
            i, feature_loss_vector[i + gym_env.action_dim].item()))
        feature_importance_map[f"state{i}"] = feature_loss_vector[
            i + gym_env.action_dim].item()
    return feature_importance_map
def calculate_feature_importance(
    gym_env: OpenAIGymEnvironment,
    trainer: MDNRNNTrainer,
    use_gpu: bool,
    seq_len: int = 5,
    num_test_episodes: int = 100,
    max_steps: Optional[int] = None,
    **kwargs,
):
    feature_importance_evaluator = FeatureImportanceEvaluator(
        trainer,
        discrete_action=gym_env.action_type == EnvType.DISCRETE_ACTION,
        state_feature_num=gym_env.state_dim,
        action_feature_num=gym_env.action_dim,
        sorted_action_feature_start_indices=list(range(gym_env.action_dim)),
        sorted_state_feature_start_indices=list(range(gym_env.state_dim)),
    )
    test_replay_buffer = get_replay_buffer(num_test_episodes, seq_len,
                                           max_steps, gym_env)
    test_batch = test_replay_buffer.sample_memories(
        test_replay_buffer.memory_size, use_gpu=use_gpu, batch_first=True)
    feature_loss_vector = feature_importance_evaluator.evaluate(
        test_batch)["feature_loss_increase"]
    feature_importance_map = {}
    for i in range(gym_env.action_dim):
        print("action {}, feature importance: {}".format(
            i, feature_loss_vector[i].item()))
        feature_importance_map[f"action{i}"] = feature_loss_vector[i].item()
    for i in range(gym_env.state_dim):
        print("state {}, feature importance: {}".format(
            i, feature_loss_vector[i + gym_env.action_dim].item()))
        feature_importance_map[f"state{i}"] = feature_loss_vector[
            i + gym_env.action_dim].item()
    return feature_importance_map
Beispiel #3
0
def calculate_feature_importance(gym_env,
                                 trainer,
                                 seq_len=5,
                                 num_test_episodes=100,
                                 max_steps=None,
                                 **kwargs):
    feature_importance_evaluator = FeatureImportanceEvaluator(trainer)
    test_replay_buffer = get_replay_buffer(num_test_episodes, seq_len,
                                           max_steps, gym_env)
    test_batch = test_replay_buffer.sample_memories(
        test_replay_buffer.memory_size)
    feature_loss_vector = feature_importance_evaluator.evaluate(
        test_batch)["feature_loss_increase"]
    for i in range(gym_env.action_dim):
        print("action {}, feature importance: {}".format(
            i, feature_loss_vector[i].item()))
    for i in range(gym_env.state_dim):
        print("state {}, feature importance: {}".format(
            i, feature_loss_vector[i + gym_env.action_dim].item()))