Python MLP Examples

Programming Language: Python

Namespace/Package Name: raisimGymTorch.algo.ppo.module

Method/Function: MLP

Examples at hotexamples.com: 2

Python MLP - 2 examples found. These are the top rated real world Python examples of raisimGymTorch.algo.ppo.module.MLP extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: runner.py Project: HuboLabKaist/ME491TermProject

# Training
n_steps = math.floor(cfg['environment']['max_time'] /
                     cfg['environment']['control_dt'])
total_steps = n_steps * env.num_envs

avg_rewards = []
avg_dones = []
fig, ax = plt.subplots(1,
                       2,
                       constrained_layout=True,
                       sharex=True,
                       figsize=[10.8, 4.8])

actor = ppo_module.Actor(
    ppo_module.MLP(cfg['architecture']['policy_net'], nn.LeakyReLU, ob_dim,
                   act_dim),
    ppo_module.MultivariateGaussianDiagonalCovariance(act_dim, 1.0), 'cuda')

critic = ppo_module.Critic(
    ppo_module.MLP(cfg['architecture']['value_net'], nn.LeakyReLU, ob_dim, 1),
    'cuda')

ppo = PPO.PPO(
    actor=actor,
    critic=critic,
    num_envs=cfg['environment']['num_envs'],
    num_transitions_per_env=n_steps,
    num_learning_epochs=4,
    gamma=0.996,
    lam=0.95,
    num_mini_batches=4,

Example #2

Show file

File: tester.py Project: sehoon74/raisimLib

        "Can't find trained weight, please provide a trained weight with --weight switch\n"
    )
else:
    print("Loaded weight from {}\n".format(weight_path))
    start = time.time()
    env.reset()
    reward_ll_sum = 0
    done_sum = 0
    average_dones = 0.
    n_steps = math.floor(cfg['environment']['max_time'] /
                         cfg['environment']['control_dt'])
    total_steps = n_steps * 1
    start_step_id = 0

    print("Visualizing and evaluating the policy: ", weight_path)
    loaded_graph = ppo_module.MLP(cfg['architecture']['policy_net'],
                                  torch.nn.LeakyReLU, ob_dim, act_dim)
    loaded_graph.load_state_dict(
        torch.load(weight_path)['actor_architecture_state_dict'])

    env.load_scaling(weight_dir, int(iteration_number))
    env.turn_on_visualization()

    # max_steps = 1000000
    max_steps = 1000  ## 10 secs

    for step in range(max_steps):
        time.sleep(0.01)
        obs = env.observe(False)
        action_ll = loaded_graph.architecture(torch.from_numpy(obs).cpu())
        reward_ll, dones = env.step(action_ll.cpu().detach().numpy())
        reward_ll_sum = reward_ll_sum + reward_ll[0]