예제 #1
0
    def act_vectorized(self,
                       obs,
                       goal,
                       horizon=None,
                       greedy=False,
                       noise=0,
                       marginal_policy=None):
        obs = torch.tensor(obs, dtype=torch.float32)
        goal = torch.tensor(goal, dtype=torch.float32)

        if horizon is not None:
            horizon = torch.tensor(horizon, dtype=torch.float32)

        logits = self.forward(obs, goal, horizon=horizon)
        logits = logits.view(-1, self.n_dims, self.granularity)
        noisy_logits = logits * (1 - noise)
        probs = torch.softmax(noisy_logits, 2)

        if greedy:
            samples = torch.argmax(probs, dim=-1)
        else:
            samples = torch.distributions.categorical.Categorical(
                probs=probs).sample()
        samples = self.flattened(samples)
        if greedy:
            samples = ptu.to_numpy(samples)
            random_samples = np.random.choice(self.action_space.n,
                                              size=len(samples))
            return np.where(
                np.random.rand(len(samples)) < noise,
                random_samples,
                samples,
            )
        return ptu.to_numpy(samples)
예제 #2
0
    def act_vectorized(self,
                       obs,
                       goal,
                       horizon=None,
                       greedy=False,
                       noise=0,
                       marginal_policy=None):
        obs = torch.tensor(obs, dtype=torch.float32)
        goal = torch.tensor(goal, dtype=torch.float32)

        if horizon is not None:
            horizon = torch.tensor(horizon, dtype=torch.float32)

        logits = self.forward(obs, goal, horizon=horizon)
        if marginal_policy is not None:
            dummy_goal = torch.zeros_like(goal)
            marginal_logits = marginal_policy.forward(obs, dummy_goal, horizon)
            logits -= marginal_logits
        noisy_logits = logits * (1 - noise)
        probs = torch.softmax(noisy_logits, 1)
        if greedy:
            samples = torch.argmax(probs, dim=-1)
        else:
            samples = torch.distributions.categorical.Categorical(
                probs=probs).sample()
        return ptu.to_numpy(samples)
예제 #3
0
    def loss_fn(self, observations, goals, actions, horizons, weights):
        obs_dtype = torch.float32
        action_dtype = torch.int64 if self.is_discrete_action else torch.float32

        observations_torch = torch.tensor(observations, dtype=obs_dtype)
        goals_torch = torch.tensor(goals, dtype=obs_dtype)
        actions_torch = torch.tensor(actions, dtype=action_dtype)
        horizons_torch = torch.tensor(horizons, dtype=obs_dtype)
        weights_torch = torch.tensor(weights, dtype=torch.float32)

        conditional_nll = self.policy.nll(observations_torch,
                                          goals_torch,
                                          actions_torch,
                                          horizon=horizons_torch)
        nll = conditional_nll

        return torch.mean(nll * weights_torch)
예제 #4
0
    def __init__(self,
                 env,
                 dim_out=1,
                 state_embedding=None,
                 goal_embedding=None,
                 layers=[512, 512],
                 max_horizon=None,
                 freeze_embeddings=False,
                 add_extra_conditioning=False,
                 dropout=0):
        super(StateGoalNetwork, self).__init__()
        self.max_horizon = max_horizon
        if state_embedding is None:
            state_embedding = Flatten()
        if goal_embedding is None:
            goal_embedding = Flatten()

        self.state_embedding = state_embedding
        self.goal_embedding = goal_embedding
        self.freeze_embeddings = freeze_embeddings

        state_dim_in = self.state_embedding(
            torch.tensor(torch.zeros(
                env.observation_space.shape)[None])).size()[1]
        goal_dim_in = self.goal_embedding(
            torch.tensor(torch.zeros(env.goal_space.shape)[None])).size()[1]

        dim_in = state_dim_in + goal_dim_in

        if max_horizon is not None:
            self.net = CBCNetwork(dim_in,
                                  max_horizon,
                                  dim_out,
                                  layers=layers,
                                  add_conditioning=add_extra_conditioning,
                                  dropout=dropout)
        else:
            self.net = FCNetwork(dim_in, dim_out, layers=layers)
예제 #5
0
    def __init__(self,
                 input_shapes,
                 dim_out,
                 input_embeddings=None,
                 layers=[512, 512],
                 freeze_embeddings=False):
        super(MultiInputNetwork, self).__init__()
        if input_embeddings is None:
            input_embeddings = [Flatten() for _ in range(len(input_shapes))]

        self.input_embeddings = input_embeddings
        self.freeze_embeddings = freeze_embeddings

        dim_ins = [
            embedding(torch.tensor(np.zeros((1, ) + input_shape))).size(1)
            for embedding, input_shape in zip(input_embeddings, input_shapes)
        ]

        full_dim_in = sum(dim_ins)
        self.net = FCNetwork(full_dim_in, dim_out, layers=layers)
예제 #6
0
 def flattened(self, tensor):
     # tensor expected to be n x self.n_dims
     multipliers = self.granularity**torch.tensor(np.arange(self.n_dims))
     flattened = (tensor * multipliers).sum(1)
     return flattened.int()