Esempio n. 1
0
    def generate_samples(self, init_stroke, max_len):
        prev_state = None
        prev_strokes = []
        init_stroke = init_stroke.unsqueeze(0).unsqueeze(0)
        for i in range(max_len):

            e, pi, mu1, mu2, sig1, sig2, ro, prev_state = self.forward(
                init_stroke, prev_state)
            #squezee: 1 x seq_len x dim - > seq_len x dim
            e = e.squeeze(0)
            samples = self.multibivariate_sampling(pi.squeeze(0),
                                                   mu1.squeeze(0),
                                                   mu2.squeeze(0),
                                                   sig1.squeeze(0),
                                                   sig2.squeeze(0),
                                                   ro.squeeze(0))
            e = Bernoulli(e)
            e = e.sample()

            #e = e.unsqueeze(-1)
            #print(samples)
            init_stroke = torch.cat((e, samples.cuda()), 1)

            prev_strokes.append(init_stroke)
            init_stroke = init_stroke.unsqueeze(0)
            #print(pre_strokes.shape)
        return torch.stack(prev_strokes, 1)
Esempio n. 2
0
    def __init__(self,
                 n_features,
                 mid_dim,
                 embed_agents,
                 policy_type='epsilon_greedy',
                 epsilon_greedy=0.1,
                 eval_epsilon_greedy=0.0):
        super(Speaker, self).__init__()

        self._embed_agents = embed_agents
        self._epsilon_greedy = epsilon_greedy
        self._eval_epsilon_greedy = eval_epsilon_greedy
        self._n_features = n_features
        self._mid_dim = mid_dim
        self._n_layers = 2
        self._policy_type = policy_type

        # Used to generate agent embedding.
        self._lstm = nn.LSTM(n_features,
                             mid_dim,
                             batch_first=True,
                             num_layers=self._n_layers)

        # Q-learning.
        self._Q = nn.Sequential(nn.Linear(n_features + mid_dim, mid_dim),
                                nn.BatchNorm1d(mid_dim), nn.ELU(),
                                nn.Linear(mid_dim, n_features))

        self._h = nn.Parameter(
            torch.empty((
                self._n_layers,
                1,
                mid_dim,
            ), requires_grad=True))
        nn.init.uniform_(self._h, -0.1, 0.1)

        self._c = nn.Parameter(
            torch.empty((
                self._n_layers,
                1,
                mid_dim,
            ), requires_grad=True))
        nn.init.uniform_(self._c, -0.1, 0.1)

        # Used to embed state.
        self._state = nn.Sequential(nn.Linear(n_features, n_features),
                                    nn.BatchNorm1d(n_features), nn.ELU(),
                                    nn.Linear(n_features, n_features),
                                    nn.BatchNorm1d(n_features), nn.ELU())

        # Attribute selection policy.
        self._selection_policy = nn.Sequential(
            nn.Linear(n_features + mid_dim, mid_dim), nn.BatchNorm1d(mid_dim),
            nn.ELU(), nn.Linear(mid_dim, n_features), nn.Softmax(dim=1))

        self._log_probs = None

        # Epsilon-greedy attribute selection policy.
        self._epsilon = Bernoulli(torch.tensor([epsilon_greedy]))
        self._eval_epsilon = Bernoulli(torch.tensor([eval_epsilon_greedy]))
Esempio n. 3
0
    def forward(self, batch_inputs):
        # Embed each feature
        merged_input = []
        # Get longest feature
        # Assume the longest column is the
        max_words = max(self.feature_lengths)

        for input, feature_len in zip(batch_inputs, self.feature_lengths):
            concat_sentence = input
            concat_sentence = torch.tensor(concat_sentence, dtype=torch.long)

            embeddings = self.embeddings(concat_sentence)

            if feature_len == max_words:
                # Set up success rate (rate of selecting the word) as 1 - dropout rate
                bernoulli = Bernoulli(1 - self.dropout_rate)
                rw = bernoulli.sample(torch.Size((embeddings.shape[0], embeddings.shape[1]))).numpy()
                # Use zeros at where rw is zero
                embeddings = torch.from_numpy(np.expand_dims(rw, 2)) * embeddings

            merged_input.append(embeddings)

        # Final output
        final_input = torch.cat(merged_input, dim=1)
        final_input = final_input.view(len(final_input), -1)
        out = torch.tanh(self.linear1(final_input))
        out = torch.tanh(self.linear2(out))
        out = torch.tanh(self.linear3(out))
        out = torch.tanh(self.linear4(out))

        out = F.relu(self.linear5(out))

        out = self.output_layer(out)
        return out
Esempio n. 4
0
 def __init__(self,
              args,
              model,
              loss_fn=nn.CrossEntropyLoss(reduction="sum"),
              decay_factor=1.,
              attack_ball='Linf',
              eps=0.3,
              eps_iter=0.01,
              n_iter=50,
              clip_max=1.,
              clip_min=-0.):
     super(DIM, self).__init__(model,
                               loss_fn=loss_fn,
                               eps=eps,
                               nb_iter=n_iter,
                               decay_factor=decay_factor,
                               eps_iter=eps_iter,
                               clip_min=clip_min,
                               clip_max=clip_max)
     self.model = model
     self.eps = eps
     self.eps_iter = eps_iter
     self.n_iter = n_iter
     self.clip_min = clip_min
     self.clip_max = clip_max
     self.attack_ball = attack_ball
     self.momentum = args.momentum
     self.transform_prob = args.transform_prob
     self.apply_transform = Bernoulli(torch.tensor([self.transform_prob]))
     self.resize_factor = args.resize_factor
     self.args = args
Esempio n. 5
0
def entropy_loss(arch_params):
    loss = []
    for arch_param in arch_params:
        probs = Bernoulli(logits=arch_param)
        loss.append(probs.entropy().mean())
    loss = torch.mean(torch.stack(loss))
    return loss
    def generate_samples(self, init_stroke, char, max_len):
        # char 1 x char_len
        prev_state = None
        prev_offset = None
        prev_w = None
        init_stroke = init_stroke.unsqueeze(0).unsqueeze(
            0).float().cuda()  # 1 x 1 x 3
        char_mask = torch.ones_like(char)
        strokes = []
        for i in range(max_len):
            e, pi, mu1, mu2, sig1, sig2, ro, prev_state, phi, prev_offset, prev_w = self.forward(
                init_stroke, char, char_mask, prev_state, prev_offset, prev_w)

            e = e.squeeze(0)
            sample_mixture = self.multibivariate_sampling(
                pi.squeeze(0), mu1.squeeze(0), mu2.squeeze(0), sig1.squeeze(0),
                sig2.squeeze(0), ro.squeeze(0))
            #print(e)
            e = Bernoulli(e)
            e = e.sample()

            init_stroke = torch.cat((e, sample_mixture.cuda()), 1)  # 1 x 3

            strokes.append(init_stroke)

            init_stroke = init_stroke.unsqueeze(0)

            if phi.max(1)[1].item() > char.shape[1] - 1:  #exit
                break
        return torch.stack(strokes, 1)
Esempio n. 7
0
 def __init__(self,
              params,
              alpha=0.001,
              B=100,
              p=5,
              sigma=1,
              delta=0.1,
              eta=0.01):
     if alpha < 0.0:
         raise ValueError("Invalid learning rate: {}".format(alpha))
     if B < 0.0:
         raise ValueError("Invalid B value: {}".format(B))
     if p < 0.0:
         raise ValueError("Invalid p value: {}".format(p))
     if sigma < 0.0:
         raise ValueError("Invalid sigma value: {}".format(sigma))
     if delta < 0.0:
         raise ValueError("Invalid delta value: {}".format(delta))
     if eta < 0.0:
         raise ValueError("Invalid eta value: {}".format(eta))
     self.delta = delta
     self.eta = eta
     defaults = dict(params=params,
                     alpha=alpha,
                     B=B,
                     p=p,
                     sigma=sigma,
                     delta=delta,
                     eta=eta)
     super(Natasha2, self).__init__(params, defaults)
     self.bern = Bernoulli(torch.tensor([0.5]))
Esempio n. 8
0
    def __init__(self, **kwargs):
        self.gamma = kwargs.get("gamma", 0.5)
        self.epsilon = kwargs.get("epsilon", 0.1)

        self.explore = Bernoulli(torch.tensor(self.epsilon))
        self.visits = [0, 0, 0]
        self.rewards = [0., 0., 0.]
Esempio n. 9
0
    def probabalistic_greedy(self, rewards, resample_flag=False):

        # if we are not re-sampling during this call
        if not resample_flag:
            iw = torch.sum(rewards, dim=1)
            iw = iw - torch.max(iw)
            iw = torch.exp(iw)
            iw = iw.reshape(-1)
            iw = iw / torch.sum(iw)
            return iw
        # otherwise
        else:
            # set the current iw
            iw = torch.sum(rewards, dim=1)
            iw = iw - torch.max(iw)
            iw = torch.exp(iw)
            iw = iw.reshape(-1)
            iw = iw / torch.sum(iw)
            # set up scaled bernoulli
            next_iw_dist = Bernoulli(iw**self.alpha)
            # set iw to zero with prob iw^2
            iw = next_iw_dist.sample() * iw
            # rescale everything
            iw = iw / torch.sum(iw)
            # return everything
            return iw
Esempio n. 10
0
    def __init__(self, in_dim, device, z_dim=64, noise_dim=[150, 100, 50]):
        super(SIVAE, self).__init__()

        self.noise = Bernoulli(probs=0.5)
        self.z_dim = z_dim
        self.noise_dim = noise_dim
        self.device = device

        self.hiddel_l3 = nn.Sequential(nn.Linear(in_dim + noise_dim[0], 500),
                                       nn.ReLU(), nn.Linear(500, 500),
                                       nn.ReLU(), nn.Linear(500, noise_dim[0]),
                                       nn.ReLU())

        self.hiddel_l2 = nn.Sequential(
            nn.Linear(in_dim + noise_dim[0] + noise_dim[1], 500), nn.ReLU(),
            nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, noise_dim[1]),
            nn.ReLU())

        self.hiddel_l1 = nn.Sequential(
            nn.Linear(in_dim + noise_dim[1] + noise_dim[2], 500), nn.ReLU(),
            nn.Linear(500, 500), nn.ReLU(), nn.Linear(500, 500), nn.ReLU())

        self.mu = nn.Linear(500, z_dim)

        self.z_logvar = nn.Sequential(nn.Linear(in_dim, 500), nn.ReLU(),
                                      nn.Linear(500, 500), nn.ReLU(),
                                      nn.Linear(500, z_dim))

        self.decoder = nn.Sequential(nn.Linear(z_dim, 500), nn.ReLU(),
                                     nn.Linear(500, 500), nn.ReLU(),
                                     nn.Linear(500, 500), nn.ReLU(),
                                     nn.Linear(500, in_dim))
    def optimizer_step(self, sample):
        sample_observation_initial_context, sample_action_T, sample_next_observation_T, sample_reward_T = sample
        image_probs, reward_probs = self.model.forward_multiple(
            sample_observation_initial_context, sample_action_T)

        # reward loss
        true_reward = numerical_reward_to_bit_array(
            sample_reward_T, self.reward_prediction_bits, self.use_cuda)
        reward_loss = self.reward_criterion(reward_probs, true_reward)

        # image loss
        reconstruction_loss = self.frame_criterion(image_probs,
                                                   sample_next_observation_T)

        loss = reconstruction_loss + self.reward_loss_coef * reward_loss

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # The minimal cross entropy between the distributions p and q is the entropy of p
        # so if they are equal the loss is equal to the distribution of p
        true_entropy = Bernoulli(probs=sample_next_observation_T).entropy()
        normalized_frame_loss = reconstruction_loss - true_entropy.mean()
        return (normalized_frame_loss, reward_loss), (image_probs,
                                                      reward_probs)
Esempio n. 12
0
    def forward(self, seq, seq_lens):

        if self.training:
            word_dropout = Bernoulli(self.word_dropout).sample(seq.shape)
            word_dropout = word_dropout.type(torch.LongTensor)
            seq = seq.cpu()
            seq = seq * word_dropout
            seq = seq.cuda()

        embedded_seq = self.embed(seq)
        embedded_seq = self.input_dropout(embedded_seq)

        encoder_input = nn.utils.rnn.pack_padded_sequence(embedded_seq,
                                                          seq_lens,
                                                          batch_first=True,
                                                          enforce_sorted=False)

        encoder_hidden, (h_0, c_0) = self.encoder(encoder_input)
        encoder_hidden, _ = nn.utils.rnn.pad_packed_sequence(encoder_hidden,
                                                             batch_first=True)
        encoder_hidden = self.output_dropout(encoder_hidden)

        final_hidden = encoder_hidden[torch.arange(encoder_hidden.size(0)),
                                      seq_lens - 1, :]

        # TODO Highway layers

        return final_hidden, encoder_hidden
Esempio n. 13
0
class ucbJanken():
    '''UCB algorithm with epsilon-greedy selection
    kwargs:
        gamma (float): exploration constant
        epsilon (float): probability of choosing randomly
        reset_prob (float): probability of resetting
    '''
    def __init__(self, **kwargs):
        self.gamma = kwargs.get("gamma", 0.5)
        self.epsilon = kwargs.get("epsilon", 0.1)
        self.reset_prob = kwargs.get("reset_prob", 0.2)
        self.coin = Bernoulli(torch.tensor(self.reset_prob))

        self.explore = Bernoulli(torch.tensor(self.epsilon))
        self.visits = [0, 0, 0]
        self.rewards = [0., 0., 0.]

    def __str__(self):
        return f"ucb: gamma = {self.gamma:.3f}, epsilon = {self.epsilon:.3f}"

    def observe(self, move, reward):
        m = move.item() if isinstance(move, torch.Tensor) else move
        r = reward.item() if isinstance(reward, torch.Tensor) else reward
        flip = self.coin.sample()
        if flip.item() == 1:
            self.reset()
        self.rewards[move.item()] += reward.item()

    def ucb(self, m):
        if self.visits[m] == 0:
            return 0
        return self.rewards[m]/self.visits[m]\
               + self.gamma*sqrt(sum(self.visits))/self.visits[m]

    def throw(self):
        if sum(self.visits) == 0:
            m = randint(0, 2)
        else:
            r = self.explore.sample()
            if r.item() == 1:
                m = randint(0, 2)
            else:
                m = max(MOVES, key=self.ucb)
        self.visits[m] += 1
        return torch.tensor(m)

    def reset(self):
        self.visits = [0, 0, 0]
        self.rewards = [0., 0., 0.]

    @property
    def dist(self):
        if sum(self.visits) == 0:
            return 1 / 3 * torch.ones(3)
        best = max(MOVES, key=self.ucb)
        d = torch.zeros(3)
        d[best] = 1.0
        d = (1 - self.epsilon) * d + (self.epsilon / 3.0) * torch.ones(3)
        return d
Esempio n. 14
0
 def classify(self, p):
     be = Bernoulli(torch.tensor([0.5]))
     if p < 0.5:
         return 0
     elif p > 0.5:
         return 1
     else:
         return be.sample()
Esempio n. 15
0
def bald_acq(obj_samples):
    # the output of objective is of shape num_samples x batch_shape x d_out
    mean_p = obj_samples.mean(dim=0)
    posterior_entropies = Bernoulli(mean_p).entropy().squeeze(-1)
    sample_entropies = Bernoulli(obj_samples).entropy()
    conditional_entropies = sample_entropies.mean(dim=0).squeeze(-1)

    return posterior_entropies - conditional_entropies
Esempio n. 16
0
def test1():
    from torch.distributions.bernoulli import Bernoulli

    # Creates a Bernoulli distribution parameterized by probs
    dist = Bernoulli(torch.tensor([0.1, 0.5, 0.9]))

    # Samples are binary (0 or 1). They take the value 1 with probability p
    dist.sample()  # >>> tensor([0., 0., 1.])
Esempio n. 17
0
 def __init__(self, reset_prob=0.5):
     if isinstance(reset_prob, torch.Tensor):
         self.coin = Bernoulli(reset_prob)
         self.reset_prob = reset_prob.item()
     else:
         self.reset_prob = reset_prob
         self.coin = Bernoulli(torch.tensor(reset_prob))
     self.move = randint(0, 2)
Esempio n. 18
0
    def fast_jl_mat(self, m, n):
        bern = Bernoulli(probs=0.5)
        D = torch.diag(bern.sample([n]) * 2 - 1)
        H = torch.tensor(hadamard(n)).float()
        P = self.sampling_mat(m, n)
        U = P.matmul(H.matmul(D)) / np.sqrt(m)

        return U
Esempio n. 19
0
def schedule_sample(prev_logit, prev_tgt, epsilon):
    prev_out = torch.argmax(prev_logit, dim=1, keepdim=True)
    prev_choices = torch.cat([prev_out, prev_tgt], dim=1)  # [B, 2]
    batch_size = prev_choices.size(0)
    prob = Bernoulli(torch.tensor([epsilon]*batch_size).unsqueeze(1))
    # sampling
    sample = prob.sample().long().to(prev_tgt.device)
    next_inp = torch.gather(prev_choices, 1, sample)
    return next_inp
Esempio n. 20
0
    def compute_log_pdf_bernoulli(self, fs_samples, target_matrix):
        """

        :param fs_samples:
        :param target_matrix:
        :return:
        """
        dist = Bernoulli(torch.sigmoid(fs_samples))
        log_pdf = dist.log_prob(target_matrix)
        return log_pdf
 def optimality(self, probabilities):
     # sample some bernoulli rv under the distribution over probabilities
     optimality_tensor = torch.zeros(
         (self.sample_size, self.trajectory_length, 1))
     for t in range(self.trajectory_length):
         for j in range(self.sample_size):
             optim_dist = Bernoulli(probabilities[t])
             optimality_tensor[j, t, 0] = optim_dist.sample()
     # return
     return optimality_tensor
Esempio n. 22
0
    def MoG_sample(self):

        prob = torch.ones(self.input_shape) * .5
        bern = Bernoulli(prob)
        b = bern.sample().cuda()

        eps = torch.zeros_like(b).normal_().cuda()
        z1 = self.mean1 + self.logsd * eps
        z2 = self.mean2 + self.logsd * eps
        z = b * z1 + (1. - b) * z2
        return z
    def reward_forward(self, prob, locations, orig_window_length, full_image,
                       other_full_image):
        """
        forward with policy gradient
        :param prob: probability maps
        :param locations: locations recording where the patches are extracted
        :param orig_window_length: original patches length to calculat the replication times
        :param full_image: ground truth full image
        :param other_full_image: another ground truth full image
        :return:
        """
        # Bernoulli samoling
        batch_size = prob.size(0)
        bernoulli_dist = Bernoulli(prob)
        samples = bernoulli_dist.sample()
        log_probs = bernoulli_dist.log_prob(samples)

        # put back
        with torch.no_grad():
            repeat_times = int(np.ceil(batch_size / orig_window_length))

            target_full_images = other_full_image.repeat(repeat_times, 1, 1, 1)
            inpaint_full_images = full_image.repeat(repeat_times, 1, 1, 1)

            # j th full image
            j = 0
            for batch_idx in range(batch_size):
                sample = samples[batch_idx]
                y1, x1, y2, x2 = locations[batch_idx]
                # sample = torch.where(sample >= 0.5, torch.ones_like(sample), torch.zeros_like(sample))
                inpaint_full_images[j, :, y1:y2, x1:x2] = sample.detach()

                if (batch_idx + 1) % orig_window_length == 0:
                    j += 1

            # calculate the reward over the re-composed root and ground truth root
            rewards = self.forward(inpaint_full_images, target_full_images)
            # broadcast the rewards to each element of the feature maps
            broadcast_rewards = torch.zeros(batch_size, 1)
            broadcast_rewards = broadcast_rewards.to(device)
            # j th full image
            j = 0
            for batch_idx in range(batch_size):
                broadcast_rewards[batch_idx] = rewards[j]
                if (batch_idx + 1) % orig_window_length == 0:
                    j += 1

        broadcast_rewards = broadcast_rewards.view(broadcast_rewards.size(0),
                                                   1, 1, 1)
        image_size = prob.size(2)
        broadcast_rewards = broadcast_rewards.repeat(1, 1, image_size,
                                                     image_size)

        return log_probs, broadcast_rewards
Esempio n. 24
0
 def get_action(self, state):
     all_hp_probs, all_anchor_probs = self.forward(state)
     all_anchor_act, all_hp_act = [], []
     for layer_anchor_probs in all_anchor_probs:
         anchor_sampler = Bernoulli(layer_anchor_probs)
         layer_anchor_act = anchor_sampler.sample()
         all_anchor_act.append(layer_anchor_act)
     for hp_probs in all_hp_probs:
         sampler = OneHotCategorical(logits=hp_probs)
         all_hp_act.append(sampler.sample())
     return all_hp_act, all_anchor_act
Esempio n. 25
0
 def action(self, x):
     x = T.from_numpy(x).double().unsqueeze(0)
     # x = x.double().unsqueeze(0)
     message_means, message_sds, action_probs = self.forward(x)
     action_dbn = Bernoulli(action_probs)
     action = action_dbn.sample()
     message_dbn = Normal(message_means, message_sds)
     message = message_dbn.sample()
     log_prob = action_dbn.log_prob(action) + message_dbn.log_prob(
         message).sum()
     x = T.cat((message[0, :], action[0].double()))
     return x, log_prob
Esempio n. 26
0
def sequential_data_preparation(
    input_batch,
    input_keep=1,
    start_index=2,
    end_index=3,
    dropout_index=1,
    device=get_device()
):
    """
    Sequential Training Data Builder.

    Args:
        input_batch (torch.Tensor): Batch of padded sequences, output of
            nn.utils.rnn.pad_sequence(batch) of size
            `[sequence length, batch_size, 1]`.
        input_keep (float): The probability not to drop input sequence tokens
            according to a Bernoulli distribution with p = input_keep.
            Defaults to 1.
        start_index (int): The index of the sequence start token.
        end_index (int): The index of the sequence end token.
        dropout_index (int): The index of the dropout token. Defaults to 1.
        device (torch.device): Device to be used.
    Returns:
    (torch.Tensor, torch.Tensor, torch.Tensor): encoder_seq, decoder_seq,
        target_seq
        encoder_seq is a batch of padded input sequences starting with the
            start_index, of size `[sequence length +1, batch_size]`.
        decoder_seq is like encoder_seq but word dropout is applied
            (so if input_keep==1, then decoder_seq = encoder_seq).
        target_seq (torch.Tensor): Batch of padded target sequences ending
            in the end_index, of size `[sequence length +1, batch_size]`.
    """
    batch_size = input_batch.shape[1]
    input_batch = input_batch.long().to(device)
    decoder_batch = input_batch.clone()
    # apply token dropout if keep != 1
    if input_keep != 1:
        # build dropout indices consisting of dropout_index
        dropout_indices = torch.LongTensor(
            dropout_index * torch.ones(1, batch_size).numpy()
        )
        # mask for token dropout
        mask = Bernoulli(input_keep).sample((input_batch.shape[0], ))
        mask = torch.LongTensor(mask.numpy())
        dropout_loc = np.where(mask == 0)[0]

        decoder_batch[dropout_loc] = dropout_indices

    end_padding = torch.LongTensor(torch.zeros(1, batch_size).numpy())
    target_seq = torch.cat((input_batch[1:, :], end_padding), dim=0)
    target_seq = copy.deepcopy(target_seq).to(device)

    return input_batch, decoder_batch, target_seq
Esempio n. 27
0
 def __init__(self, base_classifier: torch.nn.Module, num_classes: int,
              calibrated_alpha: float, K: int):
     """
     :param base_classifier: maps from [batch x channel x height x width] to [batch x num_classes]
     :param num_classes:
     :param calibrated_alpha: the noise level hyperparameter
     """
     self.base_classifier = base_classifier
     self.num_classes = num_classes
     self.calibrated_alpha = calibrated_alpha
     self.K = K
     self.m = Bernoulli(torch.tensor([self.calibrated_alpha]).cuda())
Esempio n. 28
0
    def forward(self, target, output):
        """

        :param output: reconstructed input (B, C, W, H)
        :param target: initial input (B, C, W, H)
        :return: mean squared loss
        """
        dist = Bernoulli(logits=output)
        rec_loss = -dist.log_prob(target)
        rec_loss = torch.mean(rec_loss.sum(dim=[1, 2, 3]))

        return rec_loss
Esempio n. 29
0
    def __init__(self, **kwargs):
        self.delta = kwargs.get("delta", 0.35)
        self.epsilon = kwargs.get("epsilon", 0.3)
        reset_prob = kwargs.get("reset_prob", 0.05)
        self.coin = Bernoulli(torch.tensor(reset_prob))
        self.means = [0., 0., 0.]
        self.arms = {0, 1, 2}
        self.not_played = [0, 1, 2]

        self.thresh = int(log(3.0 / self.delta))
        self.round = 1
        self.best = None
Esempio n. 30
0
    def __init__(self, reset_prob=0.015, **kwargs):
        #expected reset time = 1/(reset_prob)
        if reset_prob == 0:
            self.coin = None
        else:
            self.coin = Bernoulli(torch.tensor(reset_prob))
        self.dists = kwargs.get("dists")
        self.bias = kwargs.get("bias")

        if self.dists:
            self.policy = Categorical(dists.pop(0))
        else:
            self.policy = self.rand_dist()
    def f(self, x, z, logits, hard=False):

        B = x.shape[0]

        # image likelihood given b
        # b = harden(z).detach()
        x_hat = self.generator.forward(z)
        alpha = torch.sigmoid(x_hat)
        beta = Beta(alpha*self.beta_scale, (1.-alpha)*self.beta_scale)
        x_noise = torch.clamp(x + torch.FloatTensor(x.shape).uniform_(0., 1./256.).cuda(), min=1e-5, max=1-1e-5)
        logpx = beta.log_prob(x_noise) #[120,3,112,112]  # add uniform noise here
        logpx = torch.sum(logpx.view(B, -1),1) # [PB]  * self.w_logpx

        # prior is constant I think 
        # for q(b|x), we just want to increase its entropy 
        if hard:
            dist = Bernoulli(logits=logits)
        else:
            dist = RelaxedBernoulli(torch.Tensor([1.]).cuda(), logits=logits)
            
        logqb = dist.log_prob(z.detach())
        logqb = torch.sum(logqb,1)

        return logpx, logqb, alpha
Esempio n. 32
0
def make_decisions(logits):

    dist1 = Bernoulli(logits=logits[:,0])

    # Decision 1
    b1 = dist1.sample()
    logprob1 = dist1.log_prob(b1)

    if b1 ==0:
        dist2 = Bernoulli(logits=logits[:,1])
    else:
        dist2 = Bernoulli(logits=logits[:,2])

    # Decision 2
    b2 = dist2.sample()
    logprob2 = dist2.log_prob(b2)

    return b1, logprob1, b2, logprob2   
#                         early_stop=5)
#     print ('Done training\n')
# # fada
# else:
#     # net_relax.load_params_v3(save_dir=home+'/Downloads/tmmpp/', step=30551, name='') #.499
#     net_relax.load_params_v3(save_dir=home+'/Documents/Grad_Estimators/new/', step=1607, name='') #.4
# print()








dist = Bernoulli(bern_param)
samps = []
grads = []
logprobgrads = []
for i in range(n):
    samp = dist.sample()

    logprob = dist.log_prob(samp.detach())
    logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0]
    # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy())
    # fsdfa

    samps.append(samp.numpy())
    grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy())
    logprobgrads.append(logprobgrad.numpy())
Esempio n. 34
0
C=3
N = 2000




prelogits = torch.zeros([B,C])
logits = prelogits - logsumexp(prelogits)
# logits = torch.tensor(logits.clone().detach(), requires_grad=True)
logits.requires_grad_(True)



grads = []
for i in range(N):
    dist1 = Bernoulli(logits=logits[:,0])

    # Decision 1
    b1 = dist1.sample()
    logprob1 = dist1.log_prob(b1)

    if b1 ==0:
        dist2 = Bernoulli(logits=logits[:,1])
    else:
        dist2 = Bernoulli(logits=logits[:,2])

    # Decision 2
    b2 = dist2.sample()
    logprob2 = dist2.log_prob(b2)

    if b1 == 0 and b2 == 0:
    print()
    print('REINFORCE')
    print ('Value:', val)
    # print ('n:', n)
    # print ('theta:', theta)
    print()


    optim = torch.optim.Adam([bern_param], lr=.004)

    steps = []
    losses= []
    for step in range(total_steps):

        dist = Bernoulli(logits=bern_param)

        optim.zero_grad()

        bs = []
        for i in range(20):
            samps = dist.sample()
            bs.append(H(samps))
        bs = torch.FloatTensor(bs).unsqueeze(1)

        logprob = dist.log_prob(bs)
        # logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0]

        loss = torch.mean(f(bs) * logprob)

        #review the pytorch_toy and the RL code to see how PG was done 
    def forward(self, grad_est_type, x=None, warmup=1., inf_net=None): #, k=1): #, marginf_type=0):

        outputs = {}
        B = x.shape[0]

        #Samples from relaxed bernoulli 
        z, logits, logqz = self.q.sample(x) 

        if isnan(logqz).any():
            print(torch.sum(isnan(logqz).float()).data.item())
            print(torch.mean(logits).data.item())
            print(torch.max(logits).data.item())
            print(torch.min(logits).data.item())
            print(torch.max(z).data.item())
            print(torch.min(z).data.item())
            fdsfad

        
        # Compute discrete ELBO
        b = harden(z).detach()
        logpx_b, logq_b, alpha1 = self.f(x, b, logits, hard=True)
        fhard = (logpx_b - logq_b).detach()
        

        if grad_est_type == 'SimpLAX':
            # Control Variate
            logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False)
            fsoft = logpx_z.detach() #- logq_z
            c = self.surr(x, z).view(B)

            # REINFORCE with Control Variate
            Adv = (fhard - fsoft - c).detach()
            cost1 = Adv * logqz

            # Unbiased gradient of fhard/elbo
            cost_all = cost1 + c + fsoft # + logpx_b

            # Surrogate loss
            surr_cost = torch.abs(fhard - fsoft - c)#**2



        elif grad_est_type == 'RELAX':

            #p(z|b)
            theta = logit_to_prob(logits)
            v = torch.rand(z.shape[0], z.shape[1]).cuda()
            v_prime = v * (b - 1.) * (theta - 1.) + b * (v * theta + 1. - theta)
            # z_tilde = logits.detach() + torch.log(v_prime) - torch.log1p(-v_prime)
            z_tilde = logits + torch.log(v_prime) - torch.log1p(-v_prime)
            z_tilde = torch.sigmoid(z_tilde)

            # Control Variate
            logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False)
            fsoft = logpx_z.detach() #- logq_z
            c_ztilde = self.surr(x, z_tilde).view(B)
            c_z = self.surr(x, z).view(B)

            # REINFORCE with Control Variate
            dist_bern = Bernoulli(logits=logits)
            logqb = dist_bern.log_prob(b.detach())
            logqb = torch.sum(logqb,1)

            Adv = (fhard - fsoft - c_ztilde).detach()
            cost1 = Adv * logqb

            # Unbiased gradient of fhard/elbo
            cost_all = cost1 + fsoft + c_z - c_ztilde#+ logpx_b

            # Surrogate loss
            surr_cost = torch.abs(fhard - fsoft - c_ztilde)#**2




        elif grad_est_type == 'SimpLAX_nosoft':
            # Control Variate
            logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False)
            # fsoft = logpx_z.detach() #- logq_z
            c = self.surr(x, z).view(B)

            # REINFORCE with Control Variate
            Adv = (fhard - c).detach()
            cost1 = Adv * logqz

            # Unbiased gradient of fhard/elbo
            cost_all = cost1 + c  # + logpx_b

            # Surrogate loss
            surr_cost = torch.abs(fhard - c)#**2



        elif grad_est_type == 'RELAX_nosoft':

            #p(z|b)
            theta = logit_to_prob(logits)
            v = torch.rand(z.shape[0], z.shape[1]).cuda()
            v_prime = v * (b - 1.) * (theta - 1.) + b * (v * theta + 1. - theta)
            z_tilde = logits + torch.log(v_prime) - torch.log1p(-v_prime)
            z_tilde = torch.sigmoid(z_tilde)

            # Control Variate
            logpx_z, logq_z, alpha2 = self.f(x, z, logits, hard=False)
            # fsoft = logpx_z.detach() #- logq_z
            c_ztilde = self.surr(x, z_tilde).view(B)
            c_z = self.surr(x, z).view(B)

            # REINFORCE with Control Variate
            dist_bern = Bernoulli(logits=logits)
            logqb = dist_bern.log_prob(b.detach())
            logqb = torch.sum(logqb,1)

            Adv = (fhard - c_ztilde).detach()

            # print (Adv.shape, logqb.shape)
            cost1 = Adv * logqb

            # Unbiased gradient of fhard/elbo
            # print (cost1.shape, c_z.shape, c_ztilde.shape)
            # fsdf
            cost_all = cost1 + c_z - c_ztilde#+ logpx_b

            # Surrogate loss
            surr_cost = torch.abs(fhard - c_ztilde)#**2






        # Confirm generator grad isnt in encoder grad
        # logprobgrad = torch.autograd.grad(outputs=torch.mean(fhard), inputs=(logits), retain_graph=True)[0]
        # print (logprobgrad.shape, torch.max(logprobgrad), torch.min(logprobgrad))

        # logprobgrad = torch.autograd.grad(outputs=torch.mean(fsoft), inputs=(logits), retain_graph=True)[0]
        # print (logprobgrad.shape, torch.max(logprobgrad), torch.min(logprobgrad))
        # fsdfads


        outputs['logpx'] = torch.mean(logpx_b)
        outputs['x_recon'] = alpha1
        # outputs['welbo'] = torch.mean(logpx + warmup*( logpz - logqz))
        outputs['welbo'] = torch.mean(cost_all) #torch.mean(logpx_b + warmup*(KL))
        outputs['elbo'] = torch.mean(logpx_b - logq_b - 138.63)
        # outputs['logws'] = log_ws
        outputs['z'] = z
        outputs['logpz'] = torch.zeros(1) #torch.mean(logpz)
        outputs['logqz'] = torch.mean(logq_b)
        outputs['surr_cost'] = torch.mean(surr_cost)

        outputs['fhard'] = torch.mean(fhard)
        # outputs['fsoft'] = torch.mean(fsoft)
        # outputs['c'] = torch.mean(c)
        outputs['logq_z'] = torch.mean(logq_z)
        outputs['logits'] = logits

        return outputs
reinforce_cat_grad_stds = []
for theta in thetas:
    

    print ()
    print ('theta:', theta)
    # theta = .01 #.99 #.1 #95 #.3 #.9 #.05 #.3
    bern_param = torch.tensor([theta], requires_grad=True)







    dist = Bernoulli(bern_param)
    samps = []
    grads = []
    logprobgrads = []
    for i in range(n):
        samp = dist.sample()

        logprob = dist.log_prob(samp.detach())
        logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0]
        # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy())
        # fsdfa

        samps.append(samp.numpy())
        grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy())
        logprobgrads.append(logprobgrad.numpy())
# thetas = np.linspace(.97,.999, 12)

reinforce_grad_means = []
reinforce_grad_stds = []
pz_grad_means = []
pz_grad_stds = []
for theta in thetas:
    

#     print ()
    print ('theta:', theta)
#     # theta = .01 #.99 #.1 #95 #.3 #.9 #.05 #.3
    bern_param = torch.tensor([theta], requires_grad=True)


    dist = Bernoulli(bern_param)
    samps = []
    grads = []
    logprobgrads = []
    for i in range(n):
        samp = dist.sample()

        logprob = dist.log_prob(samp.detach())
        logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0]
        # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy())
        # fsdfa

        samps.append(samp.numpy())
        grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy())
        logprobgrads.append(logprobgrad.numpy())