os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.activations import softmax, relu, linear, tanh

from modern_keras import reinforce
from line_world import LineWorldEnv, A, num_states
import numpy as np

if __name__ == "__main__":

    # tf.compat.v1.disable_eager_execution()
    np.set_printoptions(precision=3, suppress=True)
    env = LineWorldEnv()

    actor_model = Sequential()
    actor_model.add(Dense(64, activation=relu))
    actor_model.add(Dense(len(A), activation=softmax))

    base_line_model = Sequential()
    base_line_model.add(Dense(64, activation=tanh))
    base_line_model.add(Dense(1, activation=linear))

    reinforce(
        actor_model,
        base_line_model,
        env.deep_reset,
        env.deep_get_state,
        env.deep_is_terminal,
os.environ['TF_DISABLE_MKL'] = '1'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

from algorithms import reinforce_with_mask_without_baseline
from brains import SimpleMLPPiSWithMaskBrainReinforce
from line_world import S, A, LineWorldEnv, num_states
import numpy as np

if __name__ == "__main__":
    import tensorflow as tf

    tf.compat.v1.disable_eager_execution()

    brain = SimpleMLPPiSWithMaskBrainReinforce(len(S), len(A), lr=0.0001)
    env = LineWorldEnv()

    reinforce_with_mask_without_baseline(
        env.deep_reset,
        env.deep_get_state,
        env.deep_get_action_mask,
        env.deep_is_terminal,
        env.deep_step_with_mask,
        brain,
        episodes_count=5000,
    )

    states = env.to_categorical(np.arange(num_states))
    mask = env.deep_get_action_mask()

    for s_idx, s in enumerate(states):