world_optimizer = optim.Adam(world_param, lr=args.world_lr) actor_optimizer = optim.Adam(actor_model.parameters(), lr=args.actor_lr) value_optimizer = optim.Adam(list(value_model1.parameters()) + list(value_model2.parameters()), lr=args.value_lr) alpha_optimizer = optim.Adam([log_alpha], lr=3e-3) if args.models is not '' and os.path.exists(args.models): model_dicts = torch.load(args.models) transition_model.load_state_dict(model_dicts['transition_model']) observation_model.load_state_dict(model_dicts['observation_model']) reward_model.load_state_dict(model_dicts['reward_model']) encoder.load_state_dict(model_dicts['encoder']) actor_model.load_state_dict(model_dicts['actor_model']) value_model1.load_state_dict(model_dicts['value_model1']) value_model2.load_state_dict(model_dicts['value_model2']) world_optimizer.load_state_dict(model_dicts['world_optimizer']) free_nats = torch.full( (1, ), args.free_nats, dtype=torch.float32, device=args.device) # Allowed deviation in KL divergence def update_belief_and_act(args, env, actor_model, transition_model, encoder, belief, posterior_state,
actor_optimizer = optim.Adam( actor_model.parameters(), lr=0 if args.learning_rate_schedule != 0 else args.actor_learning_rate, eps=args.adam_epsilon) value_optimizer = optim.Adam( value_model.parameters(), lr=0 if args.learning_rate_schedule != 0 else args.value_learning_rate, eps=args.adam_epsilon) if args.models is not '' and os.path.exists(args.models): model_dicts = torch.load(args.models) transition_model.load_state_dict(model_dicts['transition_model']) observation_model.load_state_dict(model_dicts['observation_model']) reward_model.load_state_dict(model_dicts['reward_model']) encoder.load_state_dict(model_dicts['encoder']) actor_model.load_state_dict(model_dicts['actor_model']) value_model.load_state_dict(model_dicts['value_model']) model_optimizer.load_state_dict(model_dicts['model_optimizer']) if args.algo == "dreamer": print("DREAMER") planner = actor_model else: planner = MPCPlanner(env.action_size, args.planning_horizon, args.optimisation_iters, args.candidates, args.top_candidates, transition_model, reward_model) global_prior = Normal( torch.zeros(args.batch_size, args.state_size, device=args.device), torch.ones(args.batch_size, args.state_size, device=args.device)) # Global prior N(0, I) free_nats = torch.full( (1, ), args.free_nats, device=args.device) # Allowed deviation in KL divergence
actor_optimizer = optim.Adam(actor_model.parameters(), lr=0 if args.learning_rate_schedule != 0 else args.actor_learning_rate, eps=args.adam_epsilon) value_optimizer = optim.Adam(value_model.parameters(), lr=0 if args.learning_rate_schedule != 0 else args.value_learning_rate, eps=args.adam_epsilon) if args.algo=="p2e": curious_actor_optimizer = optim.Adam(actor_model.parameters(), lr=0 if args.learning_rate_schedule != 0 else args.actor_learning_rate, eps=args.adam_epsilon) curious_value_optimizer = optim.Adam(value_model.parameters(), lr=0 if args.learning_rate_schedule != 0 else args.value_learning_rate, eps=args.adam_epsilon) onestep_optimizer = optim.Adam(onestep_param_list, lr=0 if args.learning_rate_schedule != 0 else args.disagreement_learning_rate, eps=args.adam_epsilon) if args.models is not '' and os.path.exists(args.models): model_dicts = torch.load(args.models) transition_model.load_state_dict(model_dicts['transition_model']) observation_model.load_state_dict(model_dicts['observation_model']) reward_model.load_state_dict(model_dicts['reward_model']) encoder.load_state_dict(model_dicts['encoder']) model_optimizer.load_state_dict(model_dicts['model_optimizer']) if args.algo=="dreamer" or args.algo=="p2e": actor_model.load_state_dict(model_dicts['actor_model']) value_model.load_state_dict(model_dicts['value_model']) if args.algo=="p2e": curious_actor_model.load_state_dict(model_dicts['curious_actor_model']) curious_value_model.load_state_dict(model_dicts['curious_value_model']) onestep_optimizer.load_state_dict(model_dicts['onestep_optimizer']) if args.algo=="dreamer": print("DREAMER") planner = actor_model elif args.algo=="p2e": print("Plan2Explore") planner = actor_model curious_planner = curious_actor_model else: print("PlaNet") planner = MPCPlanner(env.action_size, args.planning_horizon, args.optimisation_iters, args.candidates, args.top_candidates, transition_model, reward_model) global_prior = Normal(torch.zeros(args.batch_size, args.state_size, device=args.device), torch.ones(args.batch_size, args.state_size, device=args.device)) # Global prior N(0, I)