Ejemplo n.º 1
0
class ucbJanken():
    '''UCB algorithm with epsilon-greedy selection
    kwargs:
        gamma (float): exploration constant
        epsilon (float): probability of choosing randomly
        reset_prob (float): probability of resetting
    '''
    def __init__(self, **kwargs):
        self.gamma = kwargs.get("gamma", 0.5)
        self.epsilon = kwargs.get("epsilon", 0.1)
        self.reset_prob = kwargs.get("reset_prob", 0.2)
        self.coin = Bernoulli(torch.tensor(self.reset_prob))

        self.explore = Bernoulli(torch.tensor(self.epsilon))
        self.visits = [0, 0, 0]
        self.rewards = [0., 0., 0.]

    def __str__(self):
        return f"ucb: gamma = {self.gamma:.3f}, epsilon = {self.epsilon:.3f}"

    def observe(self, move, reward):
        m = move.item() if isinstance(move, torch.Tensor) else move
        r = reward.item() if isinstance(reward, torch.Tensor) else reward
        flip = self.coin.sample()
        if flip.item() == 1:
            self.reset()
        self.rewards[move.item()] += reward.item()

    def ucb(self, m):
        if self.visits[m] == 0:
            return 0
        return self.rewards[m]/self.visits[m]\
               + self.gamma*sqrt(sum(self.visits))/self.visits[m]

    def throw(self):
        if sum(self.visits) == 0:
            m = randint(0, 2)
        else:
            r = self.explore.sample()
            if r.item() == 1:
                m = randint(0, 2)
            else:
                m = max(MOVES, key=self.ucb)
        self.visits[m] += 1
        return torch.tensor(m)

    def reset(self):
        self.visits = [0, 0, 0]
        self.rewards = [0., 0., 0.]

    @property
    def dist(self):
        if sum(self.visits) == 0:
            return 1 / 3 * torch.ones(3)
        best = max(MOVES, key=self.ucb)
        d = torch.zeros(3)
        d[best] = 1.0
        d = (1 - self.epsilon) * d + (self.epsilon / 3.0) * torch.ones(3)
        return d
Ejemplo n.º 2
0
def test1():
    from torch.distributions.bernoulli import Bernoulli

    # Creates a Bernoulli distribution parameterized by probs
    dist = Bernoulli(torch.tensor([0.1, 0.5, 0.9]))

    # Samples are binary (0 or 1). They take the value 1 with probability p
    dist.sample()  # >>> tensor([0., 0., 1.])
Ejemplo n.º 3
0
    def predict(self, image_input):
        batch_size = image_input.shape[0]
        memory = self._encoder(image_input)

        # Inicializamos los tensores que contendran los control points de las curvas, el numero de curvas de cada imagen
        # y las probabilidades del agente RL
        control_points = torch.zeros((0, batch_size, 2), dtype=torch.float32, device=image_input.device)
        num_beziers = torch.zeros(batch_size, dtype=torch.long, device=image_input.device)

        # Tensor que guarda los indices de las samples aún activas
        active_samples = torch.arange(batch_size, dtype=torch.long, device=image_input.device)
        # Tensor que guarda los indices de las secuencias que pertenecen a tokens BOS
        bos_idxs = torch.zeros(1, dtype=torch.long, device=image_input.device)

        # Llamamos al agente RL para decidir qué samples siguen activas
        output_decoder = self._rl_decoder(control_points[:, active_samples], bos_idxs, memory[:, active_samples])
        output_decoder = output_decoder[-1]
        step_probabilities = 1e-8 + (1-2e-8)*torch.sigmoid(self._rl_probability(output_decoder))
        # Creamos una distribución de bernoulli con cada probabilidad y generemos un batch de samples
        distribution = Bernoulli(step_probabilities)
        new_active_samples = distribution.sample()
        # Actualizamos las imagenes cuya recreación sigue activa usando el resultado del sampling
        active_samples = active_samples[new_active_samples.bool().view(-1)]
        while active_samples.shape[0] > 0:
            # Generamos self.num_cp puntos de control para las imagenes cuya recreación sigue activa (i.e generamos una nueva curva de bezier para estas imagenes)
            for n in range(self.num_cp):
                # Ejecutamos el decoder para obtener un nuevo punto de control
                output = self._decoder(control_points[:, active_samples], bos_idxs, memory[:, active_samples])
                last = output[-1]

                cp = torch.sigmoid(self._out_cp(last)).view(1, active_samples.shape[0], 2)
                new_cp = torch.zeros((1, batch_size, 2), device=cp.device)
                new_cp[:, active_samples] = cp
                control_points = torch.cat((control_points, new_cp), dim=0)

            # Actualizamos los bos_idxs
            bos_idxs = torch.cat((bos_idxs, torch.tensor([bos_idxs[-1]+1+self.num_cp], dtype=torch.long, device=bos_idxs.device)), dim=0)
            # Actualizamos el tensor num_beziers
            num_beziers[active_samples] += 1

            # Llamamos al agente RL para decidir qué samples siguen activas
            output_decoder = self._rl_decoder(control_points[:, active_samples], bos_idxs, memory[:, active_samples])
            output_decoder = output_decoder[-1]
            step_probabilities = 1e-8 + (1-2e-8)*torch.sigmoid(self._rl_probability(output_decoder))
            # Creamos una distribución de bernoulli con cada probabilidad y generemos un batch de samples
            distribution = Bernoulli(step_probabilities)
            new_active_samples = distribution.sample()
            # Actualizamos las imagenes cuya recreación sigue activa usando el resultado del sampling
            active_samples = active_samples[new_active_samples.bool().view(-1)]

        # Una vez predichos todos los puntos de control, los pasamos al dominio (0, im_size-0.5)x(0, im_size-0.5)
        control_points = (self.image_size - 0.5)*control_points

        return control_points, num_beziers
Ejemplo n.º 4
0
def select_action(policy, state):
    if policy.type == 'stochastic':
        p = policy(state)
        m = Bernoulli(p)
        action = m.sample()
    else:
        p = policy(state)
        action = (p > 0.5).double()
        m = Bernoulli(torch.ones(p.shape) * 0.2)
        noise = m.sample().to(device)
        action = ((noise - action) != 0).double()

    action = torch.clamp(action, min=0, max=1)
    return action
Ejemplo n.º 5
0
class copyJanken():
    '''Copies opponent's last move'''
    def __init__(self, epsilon=0.5):
        self.epsilon = epsilon
        self.explore = Bernoulli(torch.tensor(self.epsilon))
        self.last = None

    def __str__(self):
        return f"copy: epsilon = {self.epsilon:.3f}"

    def reset(self):
        pass

    def throw(self):
        if self.last is None:
            return torch.tensor(randint(0, 2))
        else:
            return self.last

    def observe(self, move, reward):
        r = self.explore.sample()
        if r.item() == 1:
            return torch.tensor(randint(0, 2))

    @property
    def dist(self):
        d = torch.zeros(3)
        d[self.last] = 1
        return d
Ejemplo n.º 6
0
    def forward(self, batch_inputs):
        # Embed each feature
        merged_input = []
        # Get longest feature
        # Assume the longest column is the
        max_words = max(self.feature_lengths)

        for input, feature_len in zip(batch_inputs, self.feature_lengths):
            concat_sentence = input
            concat_sentence = torch.tensor(concat_sentence, dtype=torch.long)

            embeddings = self.embeddings(concat_sentence)

            if feature_len == max_words:
                # Set up success rate (rate of selecting the word) as 1 - dropout rate
                bernoulli = Bernoulli(1 - self.dropout_rate)
                rw = bernoulli.sample(torch.Size((embeddings.shape[0], embeddings.shape[1]))).numpy()
                # Use zeros at where rw is zero
                embeddings = torch.from_numpy(np.expand_dims(rw, 2)) * embeddings

            merged_input.append(embeddings)

        # Final output
        final_input = torch.cat(merged_input, dim=1)
        final_input = final_input.view(len(final_input), -1)
        out = torch.tanh(self.linear1(final_input))
        out = torch.tanh(self.linear2(out))
        out = torch.tanh(self.linear3(out))
        out = torch.tanh(self.linear4(out))

        out = F.relu(self.linear5(out))

        out = self.output_layer(out)
        return out
    def generate_samples(self, init_stroke, char, max_len):
        # char 1 x char_len
        prev_state = None
        prev_offset = None
        prev_w = None
        init_stroke = init_stroke.unsqueeze(0).unsqueeze(
            0).float().cuda()  # 1 x 1 x 3
        char_mask = torch.ones_like(char)
        strokes = []
        for i in range(max_len):
            e, pi, mu1, mu2, sig1, sig2, ro, prev_state, phi, prev_offset, prev_w = self.forward(
                init_stroke, char, char_mask, prev_state, prev_offset, prev_w)

            e = e.squeeze(0)
            sample_mixture = self.multibivariate_sampling(
                pi.squeeze(0), mu1.squeeze(0), mu2.squeeze(0), sig1.squeeze(0),
                sig2.squeeze(0), ro.squeeze(0))
            #print(e)
            e = Bernoulli(e)
            e = e.sample()

            init_stroke = torch.cat((e, sample_mixture.cuda()), 1)  # 1 x 3

            strokes.append(init_stroke)

            init_stroke = init_stroke.unsqueeze(0)

            if phi.max(1)[1].item() > char.shape[1] - 1:  #exit
                break
        return torch.stack(strokes, 1)
Ejemplo n.º 8
0
    def generate_samples(self, init_stroke, max_len):
        prev_state = None
        prev_strokes = []
        init_stroke = init_stroke.unsqueeze(0).unsqueeze(0)
        for i in range(max_len):

            e, pi, mu1, mu2, sig1, sig2, ro, prev_state = self.forward(
                init_stroke, prev_state)
            #squezee: 1 x seq_len x dim - > seq_len x dim
            e = e.squeeze(0)
            samples = self.multibivariate_sampling(pi.squeeze(0),
                                                   mu1.squeeze(0),
                                                   mu2.squeeze(0),
                                                   sig1.squeeze(0),
                                                   sig2.squeeze(0),
                                                   ro.squeeze(0))
            e = Bernoulli(e)
            e = e.sample()

            #e = e.unsqueeze(-1)
            #print(samples)
            init_stroke = torch.cat((e, samples.cuda()), 1)

            prev_strokes.append(init_stroke)
            init_stroke = init_stroke.unsqueeze(0)
            #print(pre_strokes.shape)
        return torch.stack(prev_strokes, 1)
Ejemplo n.º 9
0
    def probabalistic_greedy(self, rewards, resample_flag=False):

        # if we are not re-sampling during this call
        if not resample_flag:
            iw = torch.sum(rewards, dim=1)
            iw = iw - torch.max(iw)
            iw = torch.exp(iw)
            iw = iw.reshape(-1)
            iw = iw / torch.sum(iw)
            return iw
        # otherwise
        else:
            # set the current iw
            iw = torch.sum(rewards, dim=1)
            iw = iw - torch.max(iw)
            iw = torch.exp(iw)
            iw = iw.reshape(-1)
            iw = iw / torch.sum(iw)
            # set up scaled bernoulli
            next_iw_dist = Bernoulli(iw**self.alpha)
            # set iw to zero with prob iw^2
            iw = next_iw_dist.sample() * iw
            # rescale everything
            iw = iw / torch.sum(iw)
            # return everything
            return iw
Ejemplo n.º 10
0
class BernoulliDomainParam(DomainParam):
    """ Domain parameter sampled from a Bernoulli distribution """

    def __init__(self, val_0: [int, float], val_1: [int, float], prob_1: float, **kwargs):
        """
        Constructor

        :param val_0: value of event 0
        :param val_1: value of event 1
        :param prob_1: probability of event 1, equals 1 - probability of event 0
        :param kwargs: forwarded to `DomainParam` constructor
        """
        if 'mean' not in kwargs:
            kwargs['mean'] = None
        super().__init__(**kwargs)

        self.val_0 = val_0
        self.val_1 = val_1
        self.prob_1 = prob_1
        self.distr = Bernoulli(self.prob_1)

    @staticmethod
    def get_field_names() -> Sequence[str]:
        return ['name', 'mean', 'val_0', 'val_1', 'prob_1', 'clip_lo', 'clip_up', 'roundint']

    def adapt(self, domain_distr_param: str, domain_distr_param_value: [float, int]):
        # Set the attributes
        super().adapt(domain_distr_param, domain_distr_param_value)

        # Re-create the distribution, otherwise the changes will have no effect
        self.distr = Bernoulli(self.prob_1)

    def sample(self, num_samples: int = 1) -> list:
        """
        Generate new domain parameter values.

        :param num_samples: number of samples (sets of new parameter values)
        :return: list of Tensors containing the new parameter values
        """
        assert isinstance(num_samples, int) and num_samples > 0

        if self.distr is None:
            # Return nominal values multiple times
            return list(to.ones(num_samples) * self.mean)
        else:
            # Draw num_samples samples (rsample is not implemented for Bernoulli)
            sample_tensor = self.distr.sample(sample_shape=to.Size([num_samples]))

            # Sample_tensor contains either 0 or 1
            sample_tensor = (to.ones_like(sample_tensor) - sample_tensor) * self.val_0 + sample_tensor * self.val_1

            # Clip the values
            sample_tensor = to.clamp(sample_tensor, self.clip_lo, self.clip_up)

            # Round values to integers if desired
            if self.roundint:
                sample_tensor = to.round(sample_tensor).type(to.int)

            # Convert the large tensor into a list of small tensors
            return list(sample_tensor)
Ejemplo n.º 11
0
class constJanken():
    '''Plays the same move until reset'''
    def __init__(self, reset_prob=0.5):
        if isinstance(reset_prob, torch.Tensor):
            self.coin = Bernoulli(reset_prob)
            self.reset_prob = reset_prob.item()
        else:
            self.reset_prob = reset_prob
            self.coin = Bernoulli(torch.tensor(reset_prob))
        self.move = randint(0, 2)

    def __str__(self):
        return f"const: reset_prob = {self.reset_prob:.3f}"

    def throw(self):
        return torch.tensor(self.move)

    def observe(self, move, reward):
        r = self.coin.sample()
        if r.item() == 1:
            self.reset()

    def reset(self):
        self.move = randint(0, 2)

    @property
    def dist(self):
        d = torch.zeros(3)
        d[self.move] = 1.
        return d
Ejemplo n.º 12
0
class serJanken():
    def __init__(self, **kwargs):
        self.delta = kwargs.get("delta", 0.35)
        self.epsilon = kwargs.get("epsilon", 0.3)
        reset_prob = kwargs.get("reset_prob", 0.05)
        self.coin = Bernoulli(torch.tensor(reset_prob))
        self.means = [0., 0., 0.]
        self.arms = {0, 1, 2}
        self.not_played = [0, 1, 2]

        self.thresh = int(log(3.0 / self.delta))
        self.round = 1
        self.best = None

    def throw(self):
        if self.best is not None:
            return self.best

        k = randint(0, len(self.not_played) - 1)
        m = self.not_played.pop(k)
        if not self.not_played:
            self.round += 1
            self.not_played = list(self.arms)
        return m

    def observe(self, move, reward):
        m = move.item() if isinstance(move, torch.Tensor) else move
        r = reward.item() if isinstance(reward, torch.Tensor) else reward

        norm_r = (r + 1) / 2
        self.means[m] = (self.round - 1)/(self.round)* self.means[m] \
                        + norm_r/self.round

        if self.best is not None:
            if max(self.means) - self.means[self.best] > self.epsilon:
                self.reset()
            return

        flip = self.coin.sample()
        if flip.item() == 1:
            self.reset()
        #elimination
        if self.round >= self.thresh:
            max_mean = max(self.arms, key=lambda i: self.means[i])
            elim = set()
            for m in self.arms:
                if max_mean - self.means[m] + self.epsilon \
                    >= sqrt(1/(2*self.round) * log( 12*self.round**2/self.delta)):
                    elim.add(m)
            self.arms -= elim
            if len(self.arms) == 1:
                self.best = self.arms.pop()

    def reset(self):
        self.round = 1
        self.arms = {0, 1, 2}
        self.not_played = [0, 1, 2]
        self.means = [0., 0., 0.]
        self.best = None
Ejemplo n.º 13
0
    def fast_jl_mat(self, m, n):
        bern = Bernoulli(probs=0.5)
        D = torch.diag(bern.sample([n]) * 2 - 1)
        H = torch.tensor(hadamard(n)).float()
        P = self.sampling_mat(m, n)
        U = P.matmul(H.matmul(D)) / np.sqrt(m)

        return U
Ejemplo n.º 14
0
 def classify(self, p):
     be = Bernoulli(torch.tensor([0.5]))
     if p < 0.5:
         return 0
     elif p > 0.5:
         return 1
     else:
         return be.sample()
Ejemplo n.º 15
0
class ModSTDP(nn.Module):
    def __init__(self, layer, ucapture, uminus, usearch, ubackoff, umin,
                 maxweight):
        super(ModSTDP, self).__init__()
        # Initialize your variables here, including any Bernoulli Random Variable distributions
        self.layer = layer
        self.ucapture = ucapture
        self.uminus = uminus
        self.usearch = usearch
        self.ubackoff = ubackoff
        self.umin = umin
        self.maxweight = maxweight
        self.bmin = Bernoulli(torch.tensor([self.umin]))
        self.bcap = Bernoulli(torch.tensor([self.ucapture]))
        self.bminus = Bernoulli(torch.tensor([self.uminus]))
        self.bsearch = Bernoulli(torch.tensor([self.usearch]))
        self.bbackoff = Bernoulli(torch.tensor([self.ubackoff]))

        self.fplus = lambda w: Bernoulli((w / self.maxweight) *
                                         (2 - w / self.maxweight))
        self.fminus = lambda w: Bernoulli((1 - w / self.maxweight) *
                                          (1 + w / self.maxweight))

    # forward function is called when you pass data (input and output spikes) into the already instantiated class
    # Args: input_spikes - 4D spike wave tensor that was input to the Excitatory neurons. Its dimensions are
    #                      (time,in_channels,height,width). Height and width are nothing but Receptive Field's height and width
    #       output_spikes - 4D spike wave tensor that is the output after Lateral Inhibition
    # This function does not need to return anything.

    def forward(self, input_spikes, output_spikes):
        time = input_spikes.shape[0]
        out_channel = output_spikes.shape[1]
        wshape = self.layer.weight.shape

        x = torch.sum(input_spikes.squeeze().reshape(time, -1),
                      dim=0).reshape(1, -1).repeat(out_channel, 1)

        y = torch.sum(output_spikes.squeeze().reshape(time, -1),
                      dim=0).reshape(-1, 1).repeat(1, x.shape[1])

        w = self.layer.weight.reshape(out_channel, -1)
        branch1_idx = ((x >= y) & (x > 0) & (y > 0))
        w[branch1_idx] += self.bcap.sample() * torch.max(
            self.fplus(w[branch1_idx]).sample(), self.bmin.sample())
        branch2_idx = ((x < y) & (x > 0) & (y > 0))
        w[branch2_idx] -= self.bminus.sample() * torch.max(
            self.fminus(w[branch2_idx]).sample(), self.bmin.sample())
        branch3_idx = ((x > 0) & (y == 0))
        w[branch3_idx] += self.bsearch.sample() * torch.max(
            self.fplus(w[branch3_idx]).sample(), self.bmin.sample())
        branch4_idx = ((x == 0) & (y > 0))
        w[branch4_idx] -= self.bbackoff.sample() * torch.max(
            self.fminus(w[branch4_idx]).sample(), self.bmin.sample())

        self.layer.weight = torch.clamp(w.reshape(wshape), 0, self.maxweight)
Ejemplo n.º 16
0
def make_decisions(logits):

    dist1 = Bernoulli(logits=logits[:, 0])

    # Decision 1
    b1 = dist1.sample()
    logprob1 = dist1.log_prob(b1)

    if b1 == 0:
        dist2 = Bernoulli(logits=logits[:, 1])
    else:
        dist2 = Bernoulli(logits=logits[:, 2])

    # Decision 2
    b2 = dist2.sample()
    logprob2 = dist2.log_prob(b2)

    return b1, logprob1, b2, logprob2
Ejemplo n.º 17
0
def make_decisions(logits):

    dist1 = Bernoulli(logits=logits[:,0])

    # Decision 1
    b1 = dist1.sample()
    logprob1 = dist1.log_prob(b1)

    if b1 ==0:
        dist2 = Bernoulli(logits=logits[:,1])
    else:
        dist2 = Bernoulli(logits=logits[:,2])

    # Decision 2
    b2 = dist2.sample()
    logprob2 = dist2.log_prob(b2)

    return b1, logprob1, b2, logprob2   
Ejemplo n.º 18
0
def schedule_sample(prev_logit, prev_tgt, epsilon):
    prev_out = torch.argmax(prev_logit, dim=1, keepdim=True)
    prev_choices = torch.cat([prev_out, prev_tgt], dim=1)  # [B, 2]
    batch_size = prev_choices.size(0)
    prob = Bernoulli(torch.tensor([epsilon]*batch_size).unsqueeze(1))
    # sampling
    sample = prob.sample().long().to(prev_tgt.device)
    next_inp = torch.gather(prev_choices, 1, sample)
    return next_inp
 def optimality(self, probabilities):
     # sample some bernoulli rv under the distribution over probabilities
     optimality_tensor = torch.zeros(
         (self.sample_size, self.trajectory_length, 1))
     for t in range(self.trajectory_length):
         for j in range(self.sample_size):
             optim_dist = Bernoulli(probabilities[t])
             optimality_tensor[j, t, 0] = optim_dist.sample()
     # return
     return optimality_tensor
class BertEncoder(nn.Module):
    def __init__(self, config, scc_n_layer=6):
        super(BertEncoder, self).__init__()
        self.prd_n_layer = config.num_hidden_layers
        self.scc_n_layer = scc_n_layer
        assert self.prd_n_layer % self.scc_n_layer == 0
        self.compress_ratio = self.prd_n_layer // self.scc_n_layer
        self.bernoulli = None
        self.output_attentions = config.output_attentions
        self.output_hidden_states = config.output_hidden_states
        self.layer = nn.ModuleList([BertLayer(config) for _ in range(self.prd_n_layer)])
        self.scc_layer = nn.ModuleList([BertLayer(config) for _ in range(self.scc_n_layer)])

    def set_replacing_rate(self, replacing_rate):
        if not 0 < replacing_rate <= 1:
            raise Exception('Replace rate must be in the range (0, 1]!')
        self.bernoulli = Bernoulli(torch.tensor([replacing_rate]))

    def forward(self, hidden_states, attention_mask=None, head_mask=None, encoder_hidden_states=None,
                encoder_attention_mask=None):
        all_hidden_states = ()
        all_attentions = ()
        if self.training:
            inference_layers = []
            for i in range(self.scc_n_layer):
                if self.bernoulli.sample() == 1:  # REPLACE
                    inference_layers.append(self.scc_layer[i])
                else:  # KEEP the original
                    for offset in range(self.compress_ratio):
                        inference_layers.append(self.layer[i * self.compress_ratio + offset])

        else:  # inference with compressed model
            inference_layers = self.scc_layer

        for i, layer_module in enumerate(inference_layers):
            if self.output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)

            layer_outputs = layer_module(hidden_states, attention_mask, head_mask[i], encoder_hidden_states,
                                         encoder_attention_mask)
            hidden_states = layer_outputs[0]

            if self.output_attentions:
                all_attentions = all_attentions + (layer_outputs[1],)

        # Add last layer
        if self.output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states,)

        outputs = (hidden_states,)
        if self.output_hidden_states:
            outputs = outputs + (all_hidden_states,)
        if self.output_attentions:
            outputs = outputs + (all_attentions,)
        return outputs  # last-layer hidden state, (all hidden states), (all attentions)
    def reward_forward(self, prob, locations, orig_window_length, full_image,
                       other_full_image):
        """
        forward with policy gradient
        :param prob: probability maps
        :param locations: locations recording where the patches are extracted
        :param orig_window_length: original patches length to calculat the replication times
        :param full_image: ground truth full image
        :param other_full_image: another ground truth full image
        :return:
        """
        # Bernoulli samoling
        batch_size = prob.size(0)
        bernoulli_dist = Bernoulli(prob)
        samples = bernoulli_dist.sample()
        log_probs = bernoulli_dist.log_prob(samples)

        # put back
        with torch.no_grad():
            repeat_times = int(np.ceil(batch_size / orig_window_length))

            target_full_images = other_full_image.repeat(repeat_times, 1, 1, 1)
            inpaint_full_images = full_image.repeat(repeat_times, 1, 1, 1)

            # j th full image
            j = 0
            for batch_idx in range(batch_size):
                sample = samples[batch_idx]
                y1, x1, y2, x2 = locations[batch_idx]
                # sample = torch.where(sample >= 0.5, torch.ones_like(sample), torch.zeros_like(sample))
                inpaint_full_images[j, :, y1:y2, x1:x2] = sample.detach()

                if (batch_idx + 1) % orig_window_length == 0:
                    j += 1

            # calculate the reward over the re-composed root and ground truth root
            rewards = self.forward(inpaint_full_images, target_full_images)
            # broadcast the rewards to each element of the feature maps
            broadcast_rewards = torch.zeros(batch_size, 1)
            broadcast_rewards = broadcast_rewards.to(device)
            # j th full image
            j = 0
            for batch_idx in range(batch_size):
                broadcast_rewards[batch_idx] = rewards[j]
                if (batch_idx + 1) % orig_window_length == 0:
                    j += 1

        broadcast_rewards = broadcast_rewards.view(broadcast_rewards.size(0),
                                                   1, 1, 1)
        image_size = prob.size(2)
        broadcast_rewards = broadcast_rewards.repeat(1, 1, image_size,
                                                     image_size)

        return log_probs, broadcast_rewards
Ejemplo n.º 22
0
 def get_action(self, state):
     all_hp_probs, all_anchor_probs = self.forward(state)
     all_anchor_act, all_hp_act = [], []
     for layer_anchor_probs in all_anchor_probs:
         anchor_sampler = Bernoulli(layer_anchor_probs)
         layer_anchor_act = anchor_sampler.sample()
         all_anchor_act.append(layer_anchor_act)
     for hp_probs in all_hp_probs:
         sampler = OneHotCategorical(logits=hp_probs)
         all_hp_act.append(sampler.sample())
     return all_hp_act, all_anchor_act
Ejemplo n.º 23
0
    def MoG_sample(self):

        prob = torch.ones(self.input_shape) * .5
        bern = Bernoulli(prob)
        b = bern.sample().cuda()

        eps = torch.zeros_like(b).normal_().cuda()
        z1 = self.mean1 + self.logsd * eps
        z2 = self.mean2 + self.logsd * eps
        z = b * z1 + (1. - b) * z2
        return z
Ejemplo n.º 24
0
 def action(self, x):
     x = T.from_numpy(x).double().unsqueeze(0)
     # x = x.double().unsqueeze(0)
     message_means, message_sds, action_probs = self.forward(x)
     action_dbn = Bernoulli(action_probs)
     action = action_dbn.sample()
     message_dbn = Normal(message_means, message_sds)
     message = message_dbn.sample()
     log_prob = action_dbn.log_prob(action) + message_dbn.log_prob(
         message).sum()
     x = T.cat((message[0, :], action[0].double()))
     return x, log_prob
Ejemplo n.º 25
0
 def optimality_batch(self, probabilities):
     # sample some bernoulli rv under the distribution over probabilities
     optimality_tensor = torch.zeros(
         (self.sample_size, self.trajectory_length, self.trajectory_length))
     for j in range(self.sample_size):
         optim_temp = torch.zeros(self.trajectory_length)
         for t in range(self.trajectory_length):
             optim_dist = Bernoulli(probabilities[t])
             optim_temp[t] = optim_dist.sample()
         # set the whole thing in as an input
         optimality_tensor[j, :, :] = optim_temp
     # return
     return optimality_tensor
Ejemplo n.º 26
0
class randJanken():
    '''Random janken player chooses a random policy distribution and randomly resets it.
    kwargs:
        reset_prob (float): probability of resetting at each turn
        dists: list of distributions to use
        bias (float): scalar to determine biasing towards moves that previously won'''
    def __init__(self, reset_prob=0.015, **kwargs):
        #expected reset time = 1/(reset_prob)
        if reset_prob == 0:
            self.coin = None
        else:
            self.coin = Bernoulli(torch.tensor(reset_prob))
        self.dists = kwargs.get("dists")
        self.bias = kwargs.get("bias")

        if self.dists:
            self.policy = Categorical(dists.pop(0))
        else:
            self.policy = self.rand_dist()

    def __str__(self):
        if self.coin is None:
            return "uniform"
        return f"rand: reset_prob = {self.coin.probs.item():.3f}, bias = {self.bias:.3f}"

    def rand_dist(self):
        return Categorical(torch.rand(3))

    def throw(self):
        r = 0
        if self.coin is not None:
            r = self.coin.sample().item()
        if r == 1:
            if self.dists:
                self.policy = Categorical(dists.pop(0))
            else:
                self.reset()
        return self.policy.sample()

    def observe(self, move, reward):
        if reward == 1 and self.bias:
            v = F.one_hot(move, 3)
            self.policy.probs = self.bias * v + (1 -
                                                 self.bias) * self.policy.probs

    def reset(self):
        self.policy = self.rand_dist()

    @property
    def dist(self):
        return self.policy.probs
Ejemplo n.º 27
0
    def train(self, batch_size=100, discount=0.99, tau=0.005, policy_freq=2):

        for it in range(self.iters):

            # Sample replay buffer
            x, y, u, r, d = self.replay_buffer.sample(batch_size)
            state = torch.DoubleTensor(x).to(device)
            action = torch.DoubleTensor(u).to(device)
            next_state = torch.DoubleTensor(y).to(device)
            done = torch.DoubleTensor(1 - d).to(device)
            reward = torch.DoubleTensor(r).to(device)

            # Select action according to policy and add clipped noise 
            m = Bernoulli(torch.ones(u.shape) * 0.2)
            noise = m.sample().to(device)
            next_action = ((noise - self.actor_target(next_state)) != 0).double()


            # Compute the target Q value
            target_Q1, target_Q2 = self.critic_target(next_state, next_action)
            target_Q = torch.min(target_Q1, target_Q2)
            target_Q = reward + (done * discount * target_Q).detach()

            # Get current Q estimates
            current_Q1, current_Q2 = self.critic(state, action)

            # Compute critic loss
            critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q) 

            # Optimize the critic
            self.critic_optimizer.zero_grad()
            critic_loss.backward()
            self.critic_optimizer.step()

            # Delayed policy updates
            if it % policy_freq == 0:

                # Compute actor loss
                actor_loss = -self.critic.Q1(state, self.actor(state)).mean()
                
                # Optimize the actor 
                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                self.actor_optimizer.step()

                # Update the frozen target models
                for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)

                for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
                    target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
Ejemplo n.º 28
0
    def simulate(self, theta, psi):
        bottom = theta[0].item()
        ee50 = theta[1].item()
        slope = theta[2].item()
        top = theta[3].item()
        n = len(psi)
        x = torch.zeros(n)
        for index in range(n):
            rate = bottom + ((top - bottom) /
                             (1 + (-(psi[index] - ee50) * slope).exp()))
            p = Bernoulli(rate)
            x[index] = p.sample()

        return x
Ejemplo n.º 29
0
    def forward(self, z, x=None):
        y = z

        y = F.relu(self.fc1(y))
        y = y.view(-1, 16, 5, 5)
        y = F.relu(self.conv1(y))
        y = F.relu(self.conv2(y))
        y = F.relu(self.conv3(y))
        y = self.conv2(y)
        y = y[:, 0, 1:, 1:]
        dist = Bernoulli(logits = y)
        if x is None: x = dist.sample()
        score = dist.log_prob(x.float())
        score = score.sum(dim=2).sum(dim=1)
        return x, score
Ejemplo n.º 30
0
    def sample(self, x, parameters):
        """
        Create some logistic regression data. *** NOTE: This ignores the precisions of each of the values of w, and
        simply assuming the true (unknown) weight is w; this is different to finding the predictive distribution!! ***

        :param x: input values to predict at
        :param parameters: model parameters (not will not update)
        :param hyperparameters: model hyperparameters (will also not update)
        :return: y: tensor of parameter labels
        """
        w_nat_means = parameters["w_mu"]

        z = torch.mv(x, w_nat_means)
        p = self.act(z)

        output_dist = Bernoulli(p)
        y = output_dist.sample()
        return y
    def get_numerical_reward(self, reward_logits):
        reward_bernoulli = Bernoulli(logits=reward_logits)
        sampled_r = reward_bernoulli.sample()

        r_out = torch.FloatTensor(sampled_r.shape[0], 1).fill_(0)
        if self.use_cuda:
            r_out = r_out.cuda()
        for i in range(sampled_r.shape[0]):
            r = 0
            if sampled_r[i, 0] == 1:
                r_out[i] = 0
            else:
                for k in range(2, sampled_r.shape[1]):
                    r += int(math.pow(2, sampled_r.shape[1] - 1 -
                                      k)) if sampled_r[i, k] == 1 else 0
                if sampled_r[i, 1] == 1:
                    r = -r
                r_out[i] = r
        return r_out
    print()


    optim = torch.optim.Adam([bern_param], lr=.004)

    steps = []
    losses= []
    for step in range(total_steps):

        dist = Bernoulli(logits=bern_param)

        optim.zero_grad()

        bs = []
        for i in range(20):
            samps = dist.sample()
            bs.append(H(samps))
        bs = torch.FloatTensor(bs).unsqueeze(1)

        logprob = dist.log_prob(bs)
        # logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0]

        loss = torch.mean(f(bs) * logprob)

        #review the pytorch_toy and the RL code to see how PG was done 

        loss.backward()  
        optim.step()

        if step%50 ==0:
            if step %500==0:
Ejemplo n.º 33
0


prelogits = torch.zeros([B,C])
logits = prelogits - logsumexp(prelogits)
# logits = torch.tensor(logits.clone().detach(), requires_grad=True)
logits.requires_grad_(True)



grads = []
for i in range(N):
    dist1 = Bernoulli(logits=logits[:,0])

    # Decision 1
    b1 = dist1.sample()
    logprob1 = dist1.log_prob(b1)

    if b1 ==0:
        dist2 = Bernoulli(logits=logits[:,1])
    else:
        dist2 = Bernoulli(logits=logits[:,2])

    # Decision 2
    b2 = dist2.sample()
    logprob2 = dist2.log_prob(b2)

    if b1 == 0 and b2 == 0:
        reward = 1
    elif b1 == 0 and b2 == 1:
        reward = 2
pz_grad_stds = []
for theta in thetas:
    

#     print ()
    print ('theta:', theta)
#     # theta = .01 #.99 #.1 #95 #.3 #.9 #.05 #.3
    bern_param = torch.tensor([theta], requires_grad=True)


    dist = Bernoulli(bern_param)
    samps = []
    grads = []
    logprobgrads = []
    for i in range(n):
        samp = dist.sample()

        logprob = dist.log_prob(samp.detach())
        logprobgrad = torch.autograd.grad(outputs=logprob, inputs=(bern_param), retain_graph=True)[0]
        # print (samp.data.numpy(), logprob.data.numpy(), logprobgrad.data.numpy())
        # fsdfa

        samps.append(samp.numpy())
        grads.append( (f(samp.numpy()) - 0.) * logprobgrad.numpy())
        logprobgrads.append(logprobgrad.numpy())


    # print (grads[:10])

    print ('Grad Estimator: REINFORCE')
    # print ('Avg samp', np.mean(samps))