random_episodes_to_run = 0 action_length_reward_bonus = 0.0 only_train_new_actions = True only_train_final_layer = True reduce_macro_action_appearance_cutoff_throughout_training = False add_1_macro_action_at_a_time = True calculate_q_values_as_increments = True abandon_ship = True clip_rewards = True use_relative_counts = True config.debug_mode = False config.hyperparameters = { "HRL": { "linear_hidden_units": linear_hidden_units, "learning_rate": learning_rate, "buffer_size": buffer_size, "batch_size": batch_size, "final_layer_activation": "None", # "columns_of_data_to_be_embedded": [0], # "embedding_dimensions": [[config.environment.observation_space.n, embedding_dimensionality]], "batch_norm": batch_norm, "gradient_clipping_norm": gradient_clipping_norm, "update_every_n_steps": update_every_n_steps, "epsilon_decay_rate_denominator": epsilon_decay_rate_denominator, "discount_rate": discount_rate,
config.environment = Wrapper(SimpleISC(mode="DISCRETE")) config.num_episodes_to_run = 50 config.file_to_save_data_results = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Data.pkl" config.file_to_save_results_graph = "results/data_and_graphs/isc/IllinoisSolarCar_Results_Graph.png" config.show_solution_score = True config.visualise_individual_results = True config.visualise_overall_agent_results = True config.standard_deviation_results = 1.0 config.runs_per_agent = 1 config.use_GPU = False config.overwrite_existing_results_file = True config.randomise_random_seed = False config.save_model = False config.seed = 0 config.debug_mode = True config.hyperparameters = { "DQN_Agents": { "learning_rate": 0.005, "batch_size": 128, "buffer_size": 100000, "epsilon": 1.0, "epsilon_decay_rate_denominator": 150, "discount_rate": 0.999, "alpha_prioritised_replay": 0.6, "beta_prioritised_replay": 0.1, "incremental_td_error": 1e-8, "update_every_n_steps": 15, "tau": 1e-2,