def load_model_ddpg(model_path, user_embeddings_path, item_embeddings_path, input_dim, action_dim, hidden_size, device): with open(user_embeddings_path, "rb") as f: user_embeddings = np.load(f) with open(item_embeddings_path, "rb") as f: item_embeddings = np.load(f) model = Actor(input_dim, action_dim, hidden_size, user_embeddings, item_embeddings) model.load_state_dict(torch.load(model_path, map_location=device)) model.eval() return model
def test(args=get_args()): env = QuadcopterEnv() model = Actor(None, env.observation_space.shape, env.action_space.shape, [-1, 1], args.device).to(args.device) args.model_path = os.path.join(args.logdir, 'lqr') model.load_state_dict( torch.load(os.path.join(args.model_path, 'policy.pth'), map_location=args.device)) for i in range(10): obs = env.reset() env.render() done = False while not done: act = model(obs.reshape((1, -1)))[0].detach().cpu().numpy()[0] obs, reward, done, info = env.step(act) env.render()
def test(args=get_args()): env = DubinEnv() # env.set_obs([]) model = Actor(None, env.observation_space['dynamics'].shape, env.action_space.shape, [-1, 1], args.device).to(args.device) args.model_path = os.path.join(args.logdir, 'lqr') model.load_state_dict( torch.load(os.path.join(args.model_path, 'policy.pth'), map_location=args.device)) for i in range(10): env.reset() # env.state[:2] -= env.goal[:2] # env.goal[:2] -= env.goal[:2] obs = env._obs() env.render() done = False while not done: normed_obs = obs['dynamics'].reshape((1, -1)) # /np.array([20,20,np.pi,1,np.pi]) act = model(normed_obs)[0].detach().cpu().numpy()[0] obs, reward, done, info = env.step(act) env.render()