Ejemplo n.º 1
0
def sampler(P_op, P_skip, num_samples):
    cat_op = categorical.Categorical(P_op)
    cat_sk = bernoulli.Bernoulli(P_skip)
    ops, sks = cat_op.sample([num_samples]), cat_sk.sample([num_samples])
    #print(ops.shape)
    #print(sks.shape)
    return CM.ChildModelBatch(ops, sks)
Ejemplo n.º 2
0
    def edge_decision(self, type, alphas, selected_idxs, candidate_flags, probs_history, epoch):
        """Calculate the decision for each edge.

        :param type: the type of cell
        :type type: str ('normal' or 'reduce')
        """
        mat = F.softmax(torch.stack(alphas, dim=0), dim=-1).detach()
        logging.info('alpha: {}'.format(mat))
        importance = torch.sum(mat[:, 1:], dim=-1)
        logging.info(type + " importance {}".format(importance))

        probs = mat[:, 1:] / importance[:, None]
        logging.info(type + " probs {}".format(probs))
        entropy = cate.Categorical(probs=probs).entropy() / math.log(probs.shape[1])
        logging.info(type + " entropy {}".format(entropy))

        if self.use_history:
            # SGAS Cri.2
            logging.info(type + " probs history {}".format(probs_history))
            histogram_inter = self.histogram_average(probs_history, probs)
            logging.info(type + " histogram intersection average {}".format(histogram_inter))
            probs_history.append(probs)
            if (len(probs_history) > self.history_size):
                probs_history.pop(0)

            score = self.normalize(importance) * self.normalize(1 - entropy) * self.normalize(histogram_inter)
            logging.info(type + " score {}".format(score))
        else:
            # SGAS Cri.1
            score = self.normalize(importance) * self.normalize(1 - entropy)
            logging.info(type + " score {}".format(score))

        if torch.sum(candidate_flags.int()) > 0 and epoch >= self.warmup_dec_epoch and \
                (epoch - self.warmup_dec_epoch) % self.decision_freq == 0:
            masked_score = torch.min(score, (2 * candidate_flags.float() - 1) * np.inf)
            selected_edge_idx = torch.argmax(masked_score)
            # add 1 since none op
            selected_op_idx = torch.argmax(probs[selected_edge_idx]) + 1
            selected_idxs[selected_edge_idx] = selected_op_idx

            candidate_flags[selected_edge_idx] = False
            alphas[selected_edge_idx].requires_grad = False
            if type == 'normal':
                reduction = False
            elif type == 'reduce':
                reduction = True
            else:
                raise Exception('Unknown Cell Type')
            candidate_flags, selected_idxs = self.check_edges(candidate_flags, selected_idxs, reduction=reduction)
            logging.info("#" * 30 + " Decision Epoch " + "#" * 30)
            logging.info("epoch {}, {}_selected_idxs {}, added edge {} with op idx {}".format(
                epoch, type, selected_idxs, selected_edge_idx, selected_op_idx))
            logging.info(type + "_candidate_flags {}".format(candidate_flags))
            return True, selected_idxs, candidate_flags

        else:
            logging.info("#" * 30 + " Not a Decision Epoch " + "#" * 30)
            logging.info("epoch {}, {}_selected_idxs {}".format(epoch, type, selected_idxs))
            logging.info(type + "_candidate_flags {}".format(candidate_flags))
            return False, selected_idxs, candidate_flags
Ejemplo n.º 3
0
    def sample(self, num_samples=100000, binomial_n=None, visualize=True):
        if binomial_n is None: binomial_n = self.num_workers
        raise NotImplementedError
        dist = categorical.Categorical(probs=ch.tensor(pdf_at_s))
        sample = dist.sample((num_samples, ))
        new_s_stars = sample.float() / (len(pdf_at_s) - 1)
        bin_dist = binomial.Binomial(total_count=binomial_n, probs=new_s_stars)

        samples = bin_dist.sample().numpy()
        if visualize:
            plt.show()
            xs = np.arange(self.num_workers + 1)

            def make_freqs(ys):
                counts = np.array([(ys == x).sum() for x in xs])
                counts = counts / counts.sum()
                return counts

            plt.bar(xs,
                    make_freqs(samples),
                    label='samples',
                    color='red',
                    alpha=0.5)
            plt.bar(xs, make_freqs(s_hat), label='empirical dist', alpha=0.5)
            plt.legend()
        return samples
Ejemplo n.º 4
0
def optimize_model(device, model, optimizer, rewards, actions, states):
    L1, L2 = 0, 0

    # Compute g
    T = len(rewards)
    g = np.zeros(T)
    g[-1] = rewards[-1]
    for i in range(T - 2, -1, -1):
        g[i] = rewards[i] + GAMMA * g[i + 1]
    g = torch.tensor(g, dtype=torch.float, device=device)

    # Compute pi
    states = torch.tensor(states, dtype=torch.float, device=device)
    actions = torch.tensor(actions, dtype=torch.float, device=device)

    pi, v = model(states)
    v = v.squeeze(1)
    actual_log_prob = cat.Categorical(pi)
    actual_log_prob = actual_log_prob.log_prob(actions)

    # Compute L
    for t in range(T):
        L1 += -(GAMMA**t) * (g[t] - v[t].detach()) * actual_log_prob[t]

    L2 = F.smooth_l1_loss(g, v)
    loss = L1 + LOSS2_C * L2

    # Optimize model
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
Ejemplo n.º 5
0
def optimize_model(device, model, optimizer, rewards, actions, states):

    # Convert to tensors
    states = torch.tensor(states, dtype=torch.float, device=device)
    actions = torch.tensor(actions, dtype=torch.float, device=device)
    rewards = torch.tensor(rewards, dtype=torch.float, device=device)

    # Compute pi and v
    pi, v = model(states)
    v = v.squeeze(1)
    actual_log_prob = cat.Categorical(pi)
    actual_log_prob = actual_log_prob.log_prob(actions)

    # Compute Losses
    T, R = len(rewards), 0
    if T > MAX_T:
        _, R = model(torch.unsqueeze(states[-1], 0))

    L1, L2 = 0, 0
    for t in range(T - 1, -1, -1):
        R = rewards[t] + GAMMA * R
        L1 -= actual_log_prob[t] * (R - v[t]).detach()
        L2 += F.smooth_l1_loss(R, v[t])
    loss = L1 + LOSS2_C * L2

    # Optimize model
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
Ejemplo n.º 6
0
    def sample(self, logits):
        """
        Sample distribution
        """
        logits_tensor = torch.tensor(logits)
        logits_tensor_soft = F.softmax(logits_tensor, dim=-1)
        m = cat.Categorical(logits_tensor_soft)

        return m.sample()
Ejemplo n.º 7
0
def sample_class_weights(class_weights, n_samples=1):
    """
    draw a sample from Categorical variable with
    probabilities class_weights
    """

    # draw a sample from Categorical variable with
    # probabilities class_weights

    assert not torch.any(torch.isnan(class_weights))
    cat_rv = categorical.Categorical(probs=class_weights)
    return cat_rv.sample((n_samples, )).detach().squeeze()
Ejemplo n.º 8
0
    def _sample_trajectory_disceret(
            self, initial_states: Tensor,
            previous_action) -> Tuple[Tensor, Tensor, Tensor]:
        """Randomly samples T actions and computes the trajectory.

            :returns: (sequence of states, sequence of actions, costs)
            """
        actions = categorical.Categorical(
            torch.ones(self.num_actions) /
            self.num_actions).sample(sample_shape=(self._num_rollouts,
                                                   self._time_horizon, 1))
        if previous_action is not None:
            actions[0, :-1, 0] = previous_action[1:self._time_horizon, 0]

        # One more state than the time horizon because of the initial state.
        trajectories = torch.empty((self.no_models, self._num_rollouts,
                                    self._time_horizon + 1, self._state_dimen),
                                   device=initial_states.device)
        trajectories[:, :, 0, :] = initial_states
        objective_costs = torch.zeros((
            self.no_models,
            self._time_horizon,
            self._num_rollouts,
        ),
                                      device=initial_states.device)
        dones = torch.zeros((
            self.no_models,
            self._num_rollouts,
        ),
                            device=initial_states.device)

        for t in range(self._time_horizon):
            for d, dynamic in enumerate(self._dynamics):
                next_states, costs, done = dynamic.step(
                    trajectories[d, :, t, :], actions[:, t, 0])
                # assert_shape(next_states, (self._num_rollouts, self._state_dimen))
                # assert_shape(costs, (self._num_rollouts,))
                trajectories[d, :, t + 1, :] = next_states
                #print(dones)
                dones[d, :] = torch.maximum(done, dones[d, :])
                objective_costs[d,
                                t, :] = (gamma)**t * costs * (1 - dones[d, :])
                #print(dones.max())
        if self.mountain_car:
            for d in range(self.no_models):
                objective_costs[d, :, :] = objective_costs[
                    d, :, :] - 0.01 * torch.max(trajectories[d, :, :, 0], 1)[0]
        #print(objective_costs.sum(1).min(1)[0])
        objective_costs = torch.mean(objective_costs, 0)
        objective_costs = torch.sum(objective_costs, 0)
        #print(objective_costs.min())
        return trajectories, actions, objective_costs
Ejemplo n.º 9
0
def select_action(state, model, device, steps_done):

    sample = random.random()
    eps_threshold = EPS_END + (EPS_START - EPS_END) \
                    * max((1 - steps_done / EPS_DECAY), 0)

    if sample > eps_threshold:
        with torch.no_grad():
            state = torch.tensor([state], dtype=torch.float, device=device)
            pi, _ = model(state)
            return cat.Categorical(pi).sample().item()
    else:
        return random.randrange(5)
    def get_negative_sampler(self, smooth_par=0.75):
        """
        """

        node_idx, node_degrees = np.unique(self.edge_index[0, :],
                                           return_counts=True)

        # there may be isolated nodes that are not present in edge_index
        all_degrees = np.zeros(self.n_x)
        all_degrees[node_idx] = node_degrees

        Pn = all_degrees**smooth_par
        Pn = Pn / np.sum(Pn)

        self.neg_sampler = categorical.Categorical(torch.from_numpy(Pn))
Ejemplo n.º 11
0
    def __init__(self, in_channels, kernel_size):
        super(Shift, self).__init__()
        self.channels = in_channels
        self.kernel_size = kernel_size
        if kernel_size == 3:
            p = torch.Tensor([0.3, 0.4, 0.3])
        elif kernel_size == 5:
            p = torch.Tensor([0.1, 0.25, 0.3, 0.25, 0.1])
        elif kernel_size == 7:
            p = torch.Tensor([0.075, 0.1, 0.175, 0.3, 0.175, 0.1, 0.075])
        elif kernel_size == 9:
            p = torch.Tensor([0.05, 0.075, 0.1, 0.175, 0.2, 0.175, 0.1, 0.075, 0.05])
        else:
            raise RuntimeError('Unsupported kernel size')

        shift_t = categorical.Categorical(p).sample((in_channels, 2)) - (kernel_size // 2)
        self.register_buffer('shift_t', shift_t.int())
Ejemplo n.º 12
0
 def sample_action(self, policy_parameters):
     if self.discrete:
         sy_logits_na = policy_parameters
         sy_sampled_ac = Cat.Categorical(logits=sy_logits_na)
     else:
         if policy_parameters.dim() == 1:
             sy_mean, sy_logstd = policy_parameters[:self.
                                                    ac_dim], policy_parameters[
                                                        self.ac_dim:]
         else:
             sy_mean, sy_logstd = policy_parameters[:, :self.
                                                    ac_dim], policy_parameters[:,
                                                                               self
                                                                               .
                                                                               ac_dim:]
         #print(sy_mean)
         sy_sampled_ac = Norm.Normal(loc=sy_mean,
                                     scale=torch.exp(sy_logstd))
     return sy_sampled_ac.sample()
Ejemplo n.º 13
0
 def get_log_prob(self, policy_parameters, sy_ac_na):
     if self.discrete:
         sy_logits_na = policy_parameters
         sy_sampled_ac = Cat.Categorical(logits=sy_logits_na)
     else:
         if policy_parameters.dim() == 1:
             sy_mean, sy_logstd = policy_parameters[:self.
                                                    ac_dim], policy_parameters[
                                                        self.ac_dim:]
         else:
             sy_mean, sy_logstd = policy_parameters[:, :self.
                                                    ac_dim], policy_parameters[:,
                                                                               self
                                                                               .
                                                                               ac_dim:]
         sy_sampled_ac = Norm.Normal(loc=sy_mean.view(self.ac_dim, -1),
                                     scale=torch.exp(
                                         sy_logstd.view(self.ac_dim, -1)))
     return sy_sampled_ac.log_prob(sy_ac_na)
 def sample_episode(self):
     self.total_rewards = 0
     self.losses = []
     done = False
     state = self.state
     while done==False:
         probs = (self.model((torch.from_numpy(state).unsqueeze(0)).float().to(device)))
         m = c.Categorical(probs)
         action = m.sample()
         next_state, reward, done, _ = self.env.step(action.item())
         self.episode.append([state,action,reward])
         self.losses.append(-(m.log_prob(action)))
         self.env.render()
         state = next_state
         self.total_rewards += reward   
     self.G = []
     for i in range(len(self.episode)):
         self.G.append(0)
         for j in range(i,len(self.episode)):
             _,_,r = self.episode[j]
             self.G[-1] += (GAMMA**(j-i))*r
Ejemplo n.º 15
0
    def train_loop(self):
        probs, v = (self.model(
            (torch.from_numpy(self.state).unsqueeze(0).float().to(device))))
        probs = torch.squeeze(probs)
        m = c.Categorical(probs)
        action = m.sample()
        next_state, reward, done, _ = self.env.step(action.item())
        self.env.render()
        _, v_prime = self.model(
            torch.from_numpy(next_state).unsqueeze(0).float().to(device))
        # probs2 = torch.squeeze(probs2)
        # q = self.critic(torch.from_numpy(self.state).unsqueeze(0).float().to(device))
        # probs2 = (self.actor((torch.from_numpy(next_state).unsqueeze(0)).float().to(device)))
        # m2 = c.Categorical(probs2)
        # action2 = m2.sample().item()
        if done == True:
            v_prime[0] = 0
        td_error = reward + GAMMA * v_prime[0] - v[0]
        loss_a = -m.log_prob(action) * td_error
        loss_obj = nn.MSELoss()
        loss_c = loss_obj(
            torch.tensor([reward], device=device).float() +
            GAMMA * v_prime[0].float(), v[0].float())
        loss = loss_a + loss_c
        # self.optimizer_actor.zero_grad()
        # self.optimizer_critic.zero_grad()
        self.optimizer.zero_grad()
        # loss_a.backward()
        # loss_c.backward()
        loss.backward()
        # self.optimizer_actor.step()
        # self.optimizer_critic.step()
        self.optimizer.step()

        # update next state
        if done == True:
            self.state = self.env.reset()
        else:
            self.state = next_state
        return done, reward
Ejemplo n.º 16
0
 def act(self, state, deterministic=False):
     x, v = self(state)
     if self.is_continuous:
         if deterministic:
             action = x
             action_log_prob = None
             entropy = None
         else:
             c = normal.Normal(x, self.pi.log_std.exp())
             action = c.sample()
             action_log_prob = c.log_prob(action).mean()
             entropy = c.entropy()
     else:  # discrete
         if deterministic:
             action = torch.max(F.log_softmax(x, dim=1), dim=1)[1]
             action_log_prob = None
             entropy = None
         else:
             c = categorical.Categorical(logits=F.log_softmax(x, dim=1))
             action = c.sample()
             action_log_prob = c.log_prob(action)
             entropy = c.entropy()
     return action, action_log_prob, v, entropy
Ejemplo n.º 17
0
def edge_decision(type, alphas, selected_idxs, candidate_flags, probs_history,
                  epoch, model, args):
    mat = F.softmax(torch.stack(alphas, dim=0), dim=-1).detach()
    print(mat)
    importance = torch.sum(mat[:, 1:], dim=-1)
    # logging.info(type + " importance {}".format(importance))

    probs = mat[:, 1:] / importance[:, None]
    # print(type + " probs", probs)
    entropy = cate.Categorical(probs=probs).entropy() / math.log(
        probs.size()[1])
    # logging.info(type + " entropy {}".format(entropy))

    if args.use_history:  # SGAS Cri.2
        # logging.info(type + " probs history {}".format(probs_history))
        histogram_inter = histogram_average(probs_history, probs)
        # logging.info(type + " histogram intersection average {}".format(histogram_inter))
        probs_history.append(probs)
        if (len(probs_history) > args.history_size):
            probs_history.pop(0)

        score = utils.normalize(importance) * utils.normalize(
            1 - entropy) * utils.normalize(histogram_inter)
        # logging.info(type + " score {}".format(score))
    else:  # SGAS Cri.1
        score = utils.normalize(importance) * utils.normalize(1 - entropy)
        # logging.info(type + " score {}".format(score))

    if torch.sum(candidate_flags.int()) > 0 and \
            epoch >= args.warmup_dec_epoch and \
            (epoch - args.warmup_dec_epoch) % args.decision_freq == 0:
        masked_score = torch.min(score,
                                 (2 * candidate_flags.float() - 1) * np.inf)
        selected_edge_idx = torch.argmax(masked_score)
        selected_op_idx = torch.argmax(
            probs[selected_edge_idx]) + 1  # add 1 since none op
        selected_idxs[selected_edge_idx] = selected_op_idx

        candidate_flags[selected_edge_idx] = False
        alphas[selected_edge_idx].requires_grad = False
        if type == 'normal':
            reduction = False
        elif type == 'reduce':
            reduction = True
        else:
            raise Exception('Unknown Cell Type')
        candidate_flags, selected_idxs = model.check_edges(
            candidate_flags, selected_idxs)
        logging.info("#" * 30 + " Decision Epoch " + "#" * 30)
        logging.info(
            "epoch {}, {}_selected_idxs {}, added edge {} with op idx {}".
            format(epoch, type, selected_idxs, selected_edge_idx,
                   selected_op_idx))
        print(type + "_candidate_flags {}".format(candidate_flags))
        score_image(type, score, epoch)
        return True, selected_idxs, candidate_flags

    else:
        logging.info("#" * 30 + " Not a Decision Epoch " + "#" * 30)
        logging.info("epoch {}, {}_selected_idxs {}".format(
            epoch, type, selected_idxs))
        print(type + "_candidate_flags {}".format(candidate_flags))
        score_image(type, score, epoch)
        return False, selected_idxs, candidate_flags
        ]].values


#        self.y=y.values

    def __getitem__(self, ind):
        return torch.FloatTensor(self.con[ind,:]),\
                            self.uniq2[ind,:],\
                            self.uniq3[ind,:],\
                            self.uniq4[ind,:]

    def __len__(self):
        return self.uniq2.shape[0]

testdf = pd.read_csv(r'test.csv')
testdf.shape
test = HealthDatasetPred(testdf)
test_ldr = dataloader.DataLoader(test, batch_size=testdf.shape[0])
tst_ldr = iter(test_ldr)
con, x2, x3, x4 = next(tst_ldr)
model = torch.load('cat_embed.pkl')

pred = model(con, x2, x3, x4)
from torch.distributions import categorical
cat = categorical.Categorical(pred)
res = cat.sample()

testdf.index
testdf['class'] = res
testdf['class'].to_csv('cat_embed.csv')
Ejemplo n.º 19
0
def actor_critic(device="cpu"):
    discount_factor = 0.7
    lr = 1e-3
    random_chance = 0.05
    save_path = "actor_critic/"
    train = True

    fenv = FceuxNesEmulatorEnvironment()
    policy_estimator = PolicyEstimator()
    value_estimator = ValueEstimator()

    policy_optimizer = torch.optim.Adam(policy_estimator.parameters(), lr=lr)
    value_optimizer = torch.optim.Adam(value_estimator.parameters(), lr=lr)

    if os.path.isfile(save_path + "policy_estimator"):
        policy_estimator.load_state_dict(
            torch.load(save_path + "policy_estimator"))
        print("Policy estimator loaded")
    if os.path.isfile(save_path + "value_estimator"):
        value_estimator.load_state_dict(
            torch.load(save_path + "value_estimator"))
        print("Value estimator loaded")

    if train:
        avg_reward = 0
        avg_length = 0
        for i in range(200000):
            state = fenv.reset()
            episode_reward = 0.0
            episode_length = 0
            rewards = []
            states = []
            actions = []
            for t in itertools.count():
                action_probs = policy_estimator(state)
                #print(action_probs)
                if np.random.uniform() < random_chance:
                    action = torch.FloatTensor(1).random_(0, 255).detach()[0]
                else:
                    action = cat.Categorical(action_probs).sample().detach()
                #print(action)
                true_act = toAction(action)
                next_state, reward, done, _ = fenv.step(true_act)
                rewards.append(reward)
                states.append(state)
                actions.append(action)
                episode_reward += reward
                episode_length = t

                next_value = value_estimator(next_state)
                target_value = reward + discount_factor * next_value
                predict_value = value_estimator(state)

                advance = target_value.detach() - predict_value

                value_loss = (target_value.detach() - predict_value)**2
                value_optimizer.zero_grad()
                value_loss.backward()
                value_optimizer.step()

                m = cat.Categorical(action_probs)
                #action_prob = action_probs[action]
                policy_loss = -m.log_prob(action) * advance.detach()
                # print(policy_loss)
                policy_optimizer.zero_grad()
                policy_loss.backward()
                policy_optimizer.step()

                if done:
                    break
                state = next_state

            #print("Episode reward: {}".format(episode_reward))
            #print("Episode length: {}".format(episode_length))

            avg_reward += episode_reward
            avg_length += episode_length

            # print("Average reward: {}".format(avg_reward/(i+1)))
            avg_reward = 0
            avg_length = 0

            print("Saving model...")
            torch.save(policy_estimator.state_dict(),
                       save_path + "policy_estimator")
            torch.save(value_estimator.state_dict(),
                       save_path + "value_estimator")
Ejemplo n.º 20
0
            obs_history[i] = np.vstack((obs_t_minus_0[i], obs_t_minus_1[i], obs_t_minus_2[i],
                                        obs_t_minus_3[i], obs_t_minus_4[i], obs_t_minus_5[i]))
        if isinstance(obs_history, np.ndarray):
            obs_history = th.from_numpy(obs_history).float()
        length = 0
        for t in range(MAX_STEPS):
            obs_history = obs_history.type(FloatTensor)
            action_probs = maddpg.select_action(obs_history, pose).data.cpu()
            action_probs_valid = np.copy(action_probs)
            action = []
            for i, probs in enumerate(action_probs):
                rbt = world.robots[i]
                for j, frt in enumerate(rbt.get_frontiers()):
                    if len(frt) == 0:
                        action_probs_valid[i][j] = 0
                action.append(categorical.Categorical(probs=th.tensor(action_probs_valid[i])).sample())
            action = th.tensor(onehot_from_action(action))
            acts = np.argmax(action, axis=1)

            obs_, reward, done, _, next_pose = world.step(acts)
            length = length+np.sum(world.path_length)
            next_pose = th.tensor(next_pose)
            reward = th.FloatTensor(reward).type(FloatTensor)
            obs_ = np.stack(obs_)
            obs_ = th.from_numpy(obs_).float()

            obs_t_minus_5 = copy(obs_t_minus_4)
            obs_t_minus_4 = copy(obs_t_minus_3)
            obs_t_minus_3 = copy(obs_t_minus_2)
            obs_t_minus_2 = copy(obs_t_minus_1)
            obs_t_minus_1 = copy(obs_t_minus_0)
Ejemplo n.º 21
0
def sample_next_char_id(predicted_logits):
    next_char_id = categorical.Categorical(logits=predicted_logits).sample()
    return next_char_id