def algo_init(args): logger.info('Run algo_init function') setup_seed(args['seed']) if args["obs_shape"] and args["action_shape"]: obs_shape, action_shape = args["obs_shape"], args["action_shape"] max_action = args["max_action"] elif "task" in args.keys(): from offlinerl.utils.env import get_env_shape, get_env_action_range obs_shape, action_shape = get_env_shape(args['task']) max_action, _ = get_env_action_range(args["task"]) args["obs_shape"], args["action_shape"] = obs_shape, action_shape else: raise NotImplementedError net_a = Net(layer_num=args['actor_layers'], state_shape=obs_shape, hidden_layer_size=args['actor_features']) actor = TanhGaussianPolicy(preprocess_net=net_a, action_shape=action_shape, hidden_layer_size=args['actor_features'], conditioned_sigma=True).to(args['device']) actor_optim = torch.optim.Adam(actor.parameters(), lr=args['actor_lr']) return { "actor" : {"net" : actor, "opt" : actor_optim}, }
def algo_init(args): logger.info('Run algo_init function') setup_seed(args['seed']) if args["obs_shape"] and args["action_shape"]: obs_shape, action_shape = args["obs_shape"], args["action_shape"] max_action = args["max_action"] elif "task" in args.keys(): from offlinerl.utils.env import get_env_shape, get_env_action_range obs_shape, action_shape = get_env_shape(args['task']) max_action, _ = get_env_action_range(args["task"]) args["obs_shape"], args["action_shape"] = obs_shape, action_shape else: raise NotImplementedError vae = VAE(obs_shape, action_shape, args['vae_features'], args['vae_layers'], max_action).to(args['device']) vae_optim = torch.optim.Adam(vae.parameters(), lr=args['vae_lr']) jitter = Jitter(obs_shape, action_shape, args['jitter_features'], args['jitter_layers'], max_action, args['phi']).to(args['device']) jitter_optim = torch.optim.Adam(jitter.parameters(), lr=args['jitter_lr']) q1 = MLP(obs_shape + action_shape, 1, args['value_features'], args['value_layers'], hidden_activation='relu').to(args['device']) q2 = MLP(obs_shape + action_shape, 1, args['value_features'], args['value_layers'], hidden_activation='relu').to(args['device']) critic_optim = torch.optim.Adam([*q1.parameters(), *q2.parameters()], lr=args['critic_lr']) return { "vae": { "net": vae, "opt": vae_optim }, "jitter": { "net": jitter, "opt": jitter_optim }, "critic": { "net": [q1, q2], "opt": critic_optim }, }
def algo_init(args): logger.info('Run algo_init function') setup_seed(args['seed']) if args["obs_shape"] and args["action_shape"]: obs_shape, action_shape = args["obs_shape"], args["action_shape"] max_action = args["max_action"] elif "task" in args.keys(): from offlinerl.utils.env import get_env_shape, get_env_action_range obs_shape, action_shape = get_env_shape(args['task']) max_action, _ = get_env_action_range(args["task"]) args["obs_shape"], args["action_shape"] = obs_shape, action_shape else: raise NotImplementedError latent_dim = action_shape *2 vae = VAE(state_dim = obs_shape, action_dim = action_shape, latent_dim = latent_dim, max_action = max_action, hidden_size=args["vae_hidden_size"]).to(args['device']) vae_opt = optim.Adam(vae.parameters(), lr=args["vae_lr"]) if args["latent"]: actor = ActorPerturbation(obs_shape, action_shape, latent_dim, max_action, max_latent_action=2, phi=args['phi']).to(args['device']) else: net_a = Net(layer_num = args["layer_num"], state_shape = obs_shape, hidden_layer_size = args["hidden_layer_size"]) actor = Actor(preprocess_net = net_a, action_shape = latent_dim, max_action = max_action, hidden_layer_size = args["hidden_layer_size"]).to(args['device']) actor_opt = optim.Adam(actor.parameters(), lr=args["actor_lr"]) net_c1 = Net(layer_num = args['layer_num'], state_shape = obs_shape, action_shape = action_shape, concat = True, hidden_layer_size = args['hidden_layer_size']) critic1 = Critic(preprocess_net = net_c1, hidden_layer_size = args['hidden_layer_size'], ).to(args['device']) critic1_opt = optim.Adam(critic1.parameters(), lr=args['critic_lr']) net_c2 = Net(layer_num = args['layer_num'], state_shape = obs_shape, action_shape = action_shape, concat = True, hidden_layer_size = args['hidden_layer_size']) critic2 = Critic(preprocess_net = net_c2, hidden_layer_size = args['hidden_layer_size'], ).to(args['device']) critic2_opt = optim.Adam(critic2.parameters(), lr=args['critic_lr']) return { "vae" : {"net" : vae, "opt" : vae_opt}, "actor" : {"net" : actor, "opt" : actor_opt}, "critic1" : {"net" : critic1, "opt" : critic1_opt}, "critic2" : {"net" : critic2, "opt" : critic2_opt}, }