コード例 #1
0
            "batch_norm": False,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "Critic": {
            "learning_rate": 0.01,
            "linear_hidden_units": [400, 300],
            "final_layer_activation": "None",
            "batch_norm": False,
            "buffer_size": 100000,
            "tau": 0.01,
            "gradient_clipping_norm": 5
        },
        "batch_size": 64,
        "discount_rate": 0.99,
        "mu": 0.0,  # for O-H noise
        "theta": 0.15,  # for O-H noise
        "sigma": 0.2,  # for O-H noise
        "action_noise_std": 0.2,  # for TD3
        "action_noise_clipping_range": 0.5,  # for TD3
        "update_every_n_steps": 1,
        "learning_updates_per_learning_session": 1,
        "clip_rewards": False
    }
}

if __name__ == "__main__":
    AGENTS = [DDPG, HIRO]
    trainer = Trainer(config, AGENTS)
    trainer.run_games_for_agents()
コード例 #2
0
def predictionToFloat(prediction: Variable) -> float:
    return prediction.data.numpy()[0]


# steeringData = SteeringTrainingData(pandas.read_csv('training-data/train_data/alpine-1.csv'))
# brakingData = BrakingTrainingData(pandas.read_csv('training-data/train_data/alpine-1.csv'))

steeringData = ExtendedSteeringData()

observations = []
driver = MyDriver()

for i in range(len(steeringData)):

    state = dataToStateExtended(steeringData[i][0])
    sample = Trainer.stateToSample(state)

    command = driver.drive(state)
    #
    # observation = {
    #     'target': (brakingData[i][1]).numpy()[0],
    #     'actual': command.brake
    # }

    observation = {
        'target': (steeringData[i][1]).numpy()[0],
        'actual': command.steering
    }

    observations.append(observation)