def eval_policy(dataset, save_file_name): b_size = dataset.batch_size d_size = len(dataset) obs_all = [] goals_all = [] output_actions = [] iters = math.ceil(d_size / b_size) for b in range(iters): logger.debug("[%d/%d]: Eval policy" % (b, iters)) idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size)) if args.random_goals: inputs, outputs = dataset.get_batch(indices=idxs, torch_device=model.device, get_horizon_goals=False) # this is to account for broadcasting to H+1 goals goals = env_spec.get_uniform( env_spec.goal_names, b_size, torch_device=model.device).unsqueeze(1) else: inputs, outputs, goals = dataset.get_batch( indices=idxs, torch_device=model.device, get_horizon_goals=True) # get obs batch obs = AttrDict() for name in env_spec.observation_names: obs[name] = inputs[name] act = policy.get_action(model, obs, goals, batch=True) goals_all.append(goals.leaf_apply(lambda v: to_numpy(v))) obs_all.append(obs.leaf_apply(lambda v: to_numpy(v))) output_actions.append(act.leaf_apply(lambda v: to_numpy(v))) # one big dictionary combined_obs = AttrDict.leaf_combine_and_apply( obs_all, lambda vs: np.concatenate(vs, axis=0)) combined_goals = AttrDict.leaf_combine_and_apply( goals_all, lambda vs: np.concatenate(vs, axis=0)) combined_output_actions = AttrDict.leaf_combine_and_apply( output_actions, lambda vs: np.concatenate(vs, axis=0)) combined_obs.combine(combined_goals) combined_obs.combine(combined_output_actions) logger.debug("Saving Action Sequences") savemat(save_file_name, combined_obs)
def eval_model(dataset, save_file_name): b_size = dataset.batch_size d_size = len(dataset) pred_trajectories = [] action_sequences = [] true_trajectories = [] costs = [] iters = math.ceil(d_size / b_size) for b in range(iters): logger.debug("[%d/%d]: Eval model" % (b, iters)) idxs = np.arange(start=b * b_size, stop=min((b + 1) * b_size, d_size)) inputs, outputs, goals = dataset.get_batch(indices=idxs, torch_device=model.device, get_horizon_goals=True, get_action_seq=True) # get obs batch obs = AttrDict() for name in env_spec.observation_names: obs[name] = inputs[name] act_seq = AttrDict() act_seq['act'] = inputs['act_seq'] model.eval() all_obs, all_mouts = rollout(env_spec, model, obs, act_seq, policy._advance_obs_fn) # first unsqueezes and then concats all_obs = AttrDict.leaf_combine_and_apply( all_obs, func=lambda vs: torch.cat(vs, dim=1), map_func=lambda arr: arr.unsqueeze(1)) all_mouts = AttrDict.leaf_combine_and_apply( all_mouts, func=lambda vs: torch.cat(vs, dim=1), map_func=lambda arr: arr.unsqueeze(1)) cost_dict = AttrDict( {'costs': policy._cost_fn(all_obs, goals, act_seq, all_mouts)}) true_trajectories.append(goals.leaf_apply(lambda v: to_numpy(v))) pred_trajectories.append(all_obs.leaf_apply(lambda v: to_numpy(v))) action_sequences.append(act_seq.leaf_apply(lambda v: to_numpy(v))) costs.append(cost_dict.leaf_apply(lambda v: to_numpy(v))) # one big dictionary final_dict = AttrDict.leaf_combine_and_apply( true_trajectories, lambda vs: np.concatenate(vs, axis=0)) combined_pred = AttrDict.leaf_combine_and_apply( pred_trajectories, lambda vs: np.concatenate(vs, axis=0)) combined_acts = AttrDict.leaf_combine_and_apply( action_sequences, lambda vs: np.concatenate(vs, axis=0)) combined_costs = AttrDict.leaf_combine_and_apply( costs, lambda vs: np.concatenate(vs, axis=0)) final_dict.combine(combined_pred) final_dict.combine(combined_acts) # no overlapping keys final_dict.combine(combined_costs) logger.debug("Saving Model Trajectories") logger.debug("Keys: " + str(final_dict.keys())) savemat(save_file_name, final_dict)