"batch_norm": False, "tau": 0.01, "gradient_clipping_norm": 5 }, "Critic": { "learning_rate": 0.01, "linear_hidden_units": [400, 300], "final_layer_activation": "None", "batch_norm": False, "buffer_size": 100000, "tau": 0.01, "gradient_clipping_norm": 5 }, "batch_size": 64, "discount_rate": 0.99, "mu": 0.0, # for O-H noise "theta": 0.15, # for O-H noise "sigma": 0.2, # for O-H noise "action_noise_std": 0.2, # for TD3 "action_noise_clipping_range": 0.5, # for TD3 "update_every_n_steps": 1, "learning_updates_per_learning_session": 1, "clip_rewards": False } } if __name__ == "__main__": AGENTS = [DDPG, HIRO] trainer = Trainer(config, AGENTS) trainer.run_games_for_agents()
def predictionToFloat(prediction: Variable) -> float: return prediction.data.numpy()[0] # steeringData = SteeringTrainingData(pandas.read_csv('training-data/train_data/alpine-1.csv')) # brakingData = BrakingTrainingData(pandas.read_csv('training-data/train_data/alpine-1.csv')) steeringData = ExtendedSteeringData() observations = [] driver = MyDriver() for i in range(len(steeringData)): state = dataToStateExtended(steeringData[i][0]) sample = Trainer.stateToSample(state) command = driver.drive(state) # # observation = { # 'target': (brakingData[i][1]).numpy()[0], # 'actual': command.brake # } observation = { 'target': (steeringData[i][1]).numpy()[0], 'actual': command.steering } observations.append(observation)