## envs import ##
from environments.carla_enviroments import env_v1_ObstacleAvoidance

env_title = "ObstacleAvoidance-v0"

config = Config()
config.env_title = env_title
config.seed = 1
config.environment = gym.make(env_title)
config.num_episodes_to_run = 2000
config.show_solution_score = False
config.visualise_individual_results = True
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 1
config.use_GPU = True
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = True
config.log_loss = False
config.log_base = time.strftime("%Y%m%d%H%M%S", time.localtime())
config.save_model_freq = 300  ## save model per 300 episodes

config.retrain = True
config.resume = False
config.resume_path = 'E:\\reinforcement-learning-based-driving-decision-in-Carla\\results\Models\ObstacleAvoidance-v0\DDQN with Prioritised Replay\\20200611150242\\rolling_score_68.0417.model'
config.backbone_pretrain = False

config.force_explore_mode = True
config.force_explore_stare_e = 0.2  ## when the std of rolling score in last 10 window is smaller than this val, start explore mode
config.force_explore_rate = 0.95  ## only when the current score bigger than 0.8*max(rolling score[-10:]), forece expolre
from agents.Trainer import Trainer
from hierarchical_agents.DIAYN import DIAYN
from utilities.data_structures.Config import Config

config = Config()
config.seed = 1
config.environment = gym.make("Walker2d-v2")
config.num_episodes_to_run = 400
config.file_to_save_data_results = "data_and_graphs/Walker_Results_Data.pkl"
config.file_to_save_results_graph = "data_and_graphs/Walker_Results_Graph.png"
config.show_solution_score = False
config.visualise_individual_results = False
config.visualise_overall_agent_results = True
config.standard_deviation_results = 1.0
config.runs_per_agent = 3
config.use_GPU = False
config.overwrite_existing_results_file = False
config.randomise_random_seed = True
config.save_model = False

actor_critic_agent_hyperparameters = {
    "Actor": {
        "learning_rate": 0.0003,
        "linear_hidden_units": [64, 64],
        "final_layer_activation": None,
        "batch_norm": False,
        "tau": 0.005,
        "gradient_clipping_norm": 5,
        "initialiser": "Xavier"
    },
    "Critic": {