## envs import ## from environments.carla_enviroments import env_v1_ObstacleAvoidance env_title = "ObstacleAvoidance-v0" config = Config() config.env_title = env_title config.seed = 1 config.environment = gym.make(env_title) config.num_episodes_to_run = 2000 config.show_solution_score = False config.visualise_individual_results = True config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = True config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = True config.log_loss = False config.log_base = time.strftime("%Y%m%d%H%M%S", time.localtime()) config.save_model_freq = 300 ## save model per 300 episodes config.retrain = True config.resume = False config.resume_path = 'E:\\reinforcement-learning-based-driving-decision-in-Carla\\results\Models\ObstacleAvoidance-v0\DDQN with Prioritised Replay\\20200611150242\\rolling_score_68.0417.model' config.backbone_pretrain = False config.force_explore_mode = True config.force_explore_stare_e = 0.2 ## when the std of rolling score in last 10 window is smaller than this val, start explore mode config.force_explore_rate = 0.95 ## only when the current score bigger than 0.8*max(rolling score[-10:]), forece expolre
from agents.Trainer import Trainer from hierarchical_agents.DIAYN import DIAYN from utilities.data_structures.Config import Config config = Config() config.seed = 1 config.environment = gym.make("Walker2d-v2") config.num_episodes_to_run = 400 config.file_to_save_data_results = "data_and_graphs/Walker_Results_Data.pkl" config.file_to_save_results_graph = "data_and_graphs/Walker_Results_Graph.png" config.show_solution_score = False config.visualise_individual_results = False config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 3 config.use_GPU = False config.overwrite_existing_results_file = False config.randomise_random_seed = True config.save_model = False actor_critic_agent_hyperparameters = { "Actor": { "learning_rate": 0.0003, "linear_hidden_units": [64, 64], "final_layer_activation": None, "batch_norm": False, "tau": 0.005, "gradient_clipping_norm": 5, "initialiser": "Xavier" }, "Critic": {