コード例 #1
0
ファイル: meta_run_sac.py プロジェクト: sgillen/misc
    policy=policy,
    value_fn=value_fn,
    q1_fn=q1_fn,
    q2_fn=q2_fn,
    act_limit=5,
)

arg_dict = {
    "env_name": env_name,
    "model": model,
    "seed": seed,  # int((time.time() % 1)*1e8),
    "total_steps": 5e5,
    "exploration_steps": 10000,
    "min_steps_per_update": 200,
    "reward_stop": 1500,
    "gamma": 1,
}

run_sg(arg_dict, sac, None, 'back to 200 ah', "/data/data2/sac/")

# p = Process(
#     target=run_sg,
#     args=(arg_dict, ppo, None, "ppo2 drake acrobot with an act hold of 20, to see if Nans go away..", "/data2/ppo2_test/"),
# )
# p.start()
# proc_list.append(p)

# for p in proc_list:
#     print("joining")
#     p.join()
コード例 #2
0
    alg_config = {
        "env_name": env_name,
        "model": model,
        "seed": int(seed),  # int((time.time() % 1)*1e8),
        "train_steps": 1e6,
        "exploration_steps": 50000,
        "min_steps_per_update": 500,
        "reward_stop": 1000,
        "gamma": 1,
        "act_std_schedule": (.1, ),
        "sgd_batch_size": 64,
        "replay_batch_size": 2048,
        "iters_per_update": 1000,
        "env_max_steps": 1000,
        "polyak": .995
        #"iters_per_update": float('inf'),
    }

    run_sg(alg_config, td3, "sac bullet defaults", "debug",
           "/data/" + "/" + "seed" + str(seed))

    p = Process(target=run_and_test, args=[alg_config])
    p.start()
    proc_list.append(p)

for p in proc_list:
    p.join()

print(f"Total time: {(time.time() - start)}")
コード例 #3
0
ファイル: meta_run.py プロジェクト: sgillen/misc
                        hold_count=1)

#model = PPOModel(policy=policy, value_fn=MLP(input_size, 1, num_layers, layer_size, activation), discrete=False)

arg_dict = {
    "env_name": env_name,
    "model": model,
    "act_var_schedule": [.1],
    "seed": seed,  # int((time.time() % 1)*1e8),
    "total_steps": 200 * 2048,
    "epoch_batch_size": 2048,
    "reward_stop": 900,
    "gamma": 1,
    "pol_epochs": 10,
    "val_epochs": 10,
}

run_sg(arg_dict, ppo, None, 'lets see if we can learn to balance',
       "/data/data2/10_sat/")

# p = Process(
#     target=run_sg,
#     args=(arg_dict, ppo, None, "ppo2 drake acrobot with an act hold of 20, to see if Nans go away..", "/data2/ppo2_test/"),
# )
# p.start()
# proc_list.append(p)

# for p in proc_list:
#     print("joining")
#     p.join()
コード例 #4
0
ファイル: debug_switched.py プロジェクト: sgillen/misc
        nominal_policy=control,
        hold_count=20,
    )

    arg_dict = {
        "env_name": env_name,
        "model": model,
        "total_steps": 500 * 2048,
        "epoch_batch_size": 2048,
        "act_var_schedule": [2, 2],
        "gate_var_schedule": [0.1, 0.1],
        "gamma": 1,
        "seed": seed,
        "reward_stop": 1500,
    }

    run_name = "25_ppo2" + str(seed)

    #  import ipdb; ipdb.set_trace()
    # run_sg(arg_dict, ppo_switch, run_name, 'reasonable torque limits, and a new but cheaty warm start', "/data/switch4/")

    run_sg(
        arg_dict,
        ppo_switch,
        run_name,
        "trying to replicate earlier results that use ppo with ppo2",
        "/data/drake_ppo22/",
    )

    print("finished run ", run_name)
コード例 #5
0
ファイル: run_utils.py プロジェクト: ntalele/seagul
    from seagul.nn import MLP
    from seagul.rl.ppo.ppo2 import ppo

    import torch
    import torch.nn as nn

    ## init policy, valuefn
    input_size = 4
    output_size = 1
    layer_size = 64
    num_layers = 3
    activation = nn.ReLU

    torch.set_default_dtype(torch.double)

    model = PPOModel(
        policy=MLP(input_size, output_size, num_layers, layer_size,
                   activation),
        value_fn=MLP(input_size, 1, num_layers, layer_size, activation),
        action_std=4,
    )

    arg_dict = {
        "env_name": "su_cartpole-v0",
        "model": model,
        "num_epochs": 10,
        "action_var_schedule": [10, 0]
    }

    run_sg(arg_dict, ppo)
コード例 #6
0
        "env_name": env_name,
        "model": model,
        "total_steps": 500 * 2048,
        "epoch_batch_size": 2048,
        "act_var_schedule": [2, 2],
        "gate_var_schedule": [0.1, 0.1],
        "gamma": 1,
        "seed": seed,
        "reward_stop": 1500,
    }

    run_name = "1000_ppo2" + str(seed)

    #  import ipdb; ipdb.set_trace()
    run_sg(arg_dict, ppo_switch, run_name,
           'trying to replicate earlier work that kinda of worked ',
           "/data/data1/switch4/")

#     p = Process(
#         target=run_sg,
#         args=(
#             arg_dict,
#             ppo_switch,
#             run_name,
#             "trying to replicate earlier results that use ppo with ppo2",
#             "/data/data2/drake_ppo2/",
#         ),
#     )
#     p.start()
#     proc_list.append(p)
コード例 #7
0
ファイル: needle_test.py プロジェクト: sgillen/misc
    "m2": m2,
    "m1": m1,
    "l1": l1,
    "lc1": lc1,
    "lc2": lc2,
    "i1": I1,
    "i2": I2,
    "act_hold": 20,
    "gate_fn": torch.load("../switching2/warm/lqr_gate_better"),
    "controller": control
}

proc_list = []
for seed in np.random.randint(0, 2**32, 1):

    alg_config = {
        "env_name": "su_acroswitchsin-v0",
        "total_steps": 500000,
        "model": model,
        "seed": seed,
        "goal_state": np.array([0, 1, 1, 0, 0, 0]),
        "goal_lookback": 10,
        "goal_thresh": 1.5,
        "iters_per_update": float('inf'),
        "exploration_steps": 50000,
        "env_config": env_config
    }

run_sg(alg_config, sac, "smoke_test" + str(seed), "",
       "/data_needle/" + trial_name)
コード例 #8
0
ファイル: debug_sg.py プロジェクト: sgillen/misc
    elif s[3] == 0:
        if s[0] < -2 and s[2] < -3:
            reward = 5.0
            s[3] = 1
        else:
            reward = -1.0

    return reward, s


env_config = {"num_steps": 500, "reward_fn": reward_fn}

arg_dict = {
    "env_name": env_name,
    "model": model,
    "act_var_schedule": [1],
    "seed": seed,  # int((time.time() % 1)*1e8),
    "total_steps": 5e5,
    "epoch_batch_size": 2048,
    "gamma": 1,
    "pol_epochs": 10,
    "val_epochs": 10,
    "env_config": env_config
}

run_name = "debug2" + str(seed)

run_sg(arg_dict, ppo, run_name, "basic smoke test", "/data/seagul/")

print("finished run ", run_name)
コード例 #9
0
ファイル: launch_seagul.py プロジェクト: ntalele/seagul
layer_size = 128
num_layers = 2
activation = nn.ReLU

proc_list = []

policy = MLP(input_size, output_size * 2, num_layers, layer_size, activation)

# Do I need to do weight sharing here?
value_fn = MLP(input_size, 1, num_layers, layer_size, activation)
q1_fn = MLP(input_size + output_size, 1, num_layers, layer_size, activation)
q2_fn = MLP(input_size + output_size, 1, num_layers, layer_size, activation)
model = SACModel(policy, value_fn, q1_fn, q2_fn, 1)

arg_dict = {
    'total_steps': 1e6,
    'model': model,
    'env_name': env_name,
    'seed': 2,
    'env_steps': 1000,
    'iters_per_update': 3000,
    'min_steps_per_update': 1000,
    'reward_stop': 3000,
    'exploration_steps': 10000,
    'replay_batch_size': 100,
    'use_gpu': False,
}

run_sg(arg_dict, sac, "/sac_walker0", "trying to get walker to work at all",
       "/sac_walker")