Ejemplo n.º 1
0
    def forward(self, input):
        if self.quant:
            p_a = torch.sigmoid(self.p_a)
            p_b = torch.sigmoid(self.p_b)
            p_w_0 = p_a
            p_w_pos = p_b * (1. - p_w_0)
            p_w_neg = (1. - p_b) * (1. - p_w_0)
            p = torch.stack([p_w_neg, p_w_0, p_w_pos], dim=-1)
            if self.training:
                w_mean = (p * self.w_candidate).sum(dim=-1)
                w_var = (p *
                         self.w_candidate.pow(2)).sum(dim=-1) - w_mean.pow(2)
                act_mean = F.linear(input, w_mean, self.bias)
                act_var = F.linear(input.pow(2), w_var, None)
                var_eps = torch.randn_like(act_mean)
                y = act_mean + var_eps * act_var.add(self.eps).sqrt()
            else:
                m = Multinomial(probs=p)
                indices = m.sample().argmax(dim=-1)
                w = self.w_candidate[indices]
                y = F.linear(input, w, self.bias)
        else:
            y = super().forward(input)

        self.forward(y)
        return y
Ejemplo n.º 2
0
def select_action(state):
    state = torch.from_numpy(state).float().unsqueeze(0)
    probs = policy(Variable(state))
    m = Multinomial(probs)
    action = m.sample()
    policy.saved_log_probs.append(m.log_prob(action))
    return action.data[0]
Ejemplo n.º 3
0
    def decide(self, choices: List[any]) -> int:

        inputs = list(map(lambda choice: torch.FloatTensor(choice), choices))
        enhanced_features = list(
            map(lambda vec: self._base_network.model.forward(vec), inputs))
        action_features = list(
            map(lambda vec: self._policy_gradient.model.forward(vec.detach()),
                enhanced_features))

        # Get move
        probabilities = Function.softmax(torch.cat(list(action_features)))
        distribution = Multinomial(1, probabilities)
        move = distribution.sample()
        _, index_of_move = move.max(0)

        # Expected reward
        expected_reward = self._value_function.model(
            enhanced_features[index_of_move])
        log_probability = distribution.log_prob(move)

        # Record estimate
        self.rounds.append(
            Round(value=expected_reward, log_probability=log_probability))

        # Return
        return index_of_move.item()
Ejemplo n.º 4
0
def select_action(state):
    state = torch.from_numpy(state).float().unsqueeze(0)
    probs, state_value = model(Variable(state))
    m = Multinomial(probs)
    action = m.sample()
    model.saved_actions.append(SavedAction(m.log_prob(action), state_value))
    return action.data[0]
Ejemplo n.º 5
0
def sample_with_weights(values: torch.Tensor, weights: torch.Tensor,
                        num_samples: int) -> torch.Tensor:
    # define multinomial with weights as probs
    multi = Multinomial(probs=weights)
    # sample num samples, with replacement
    samples = multi.sample(sample_shape=(num_samples, ))
    # get indices of success trials
    indices = torch.where(samples)[1]
    # return those indices from trace
    return values[indices]
Ejemplo n.º 6
0
def select_action(state, variance=1, temp=10):
    # this function selects stochastic actions based on the policy probabilities
    state = torch.from_numpy(state).float().unsqueeze(0)
    action_scores = actor(state)
    prob = F.softmax(action_scores / temp, dim=1)  #
    m = Multinomial(vaccine_supply, prob[0])
    action = m.sample()
    log_prob = m.log_prob(action)
    entropy = -(log_prob * prob).sum(1, keepdim=True)
    return action.numpy(), log_prob, entropy
Ejemplo n.º 7
0
def sample(lp:Tensor,axis=1,numsamples=1,MAP=False):
	lastaxis = lp.ndimension() -1
	lpt = lp.transpose(lastaxis,axis)
	M = Multinomial(total_count=numsamples,logits=lpt)
	#D = Dirichlet((lp.exp())*(numsamples.float())/(lp.size(lastaxis)))
	samps = M.sample().detach()
	samps = samps.transpose(lastaxis,axis)/numsamples
	logprob = (lp-(samps.detach()).log())
	logprob[logprob!=logprob] = float('Inf')
	logprob = logprob.min(dim=axis,keepdim=True)[0]

	return None,None
Ejemplo n.º 8
0
def select_action(state, variance=1, temp=10):
    state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
    action_scores = actor(state)
    print(action_scores, file=myLog)
    prob = F.softmax(action_scores / temp, dim=1)  #
    #print('***',prob)
    m = Multinomial(vaccine_supply, prob[0])  #[0]
    action = m.sample()
    #print(action)
    log_prob = m.log_prob(action)
    entropy = -torch.sum(torch.log(prob) * prob, axis=-1)
    return action.numpy(), log_prob, entropy
    def sample_from_population_with_weights(
        particles: Tensor, weights: Tensor, num_samples: int = 1
    ) -> Tensor:
        """Return samples from particles sampled with weights."""

        # define multinomial with weights as probs
        multi = Multinomial(probs=weights)
        # sample num samples, with replacement
        samples = multi.sample(sample_shape=(num_samples,))
        # get indices of success trials
        indices = torch.where(samples)[1]
        # return those indices from trace
        return particles[indices]
Ejemplo n.º 10
0
def select_action(state, variance=1, temp=10):
    # this function selects stochastic actions based on the policy probabilities
    #state = torch.from_numpy(np.array(state)).float().unsqueeze(0)   #Reza: this might be a bit faster torch.tensor(state,dtype=torch.float32).unsqueeze(0)
    state = torch.tensor(state, dtype=torch.float32).unsqueeze(0)

    action_scores = actor(state)
    print(action_scores, file=myLog)
    prob = F.softmax(action_scores / temp, dim=1)  #
    #print('***',prob)
    m = Multinomial(vaccine_supply, prob[0])  #[0]
    action = m.sample()
    log_prob = m.log_prob(action)
    entropy = -torch.sum(torch.log(prob) * prob, axis=-1)
    return action.numpy(), log_prob, entropy
    def select_action(self, state, temp=1):
        # this function selects stochastic actions based on the policy probabilities
        state = torch.tensor(state, dtype=torch.float32,
                             device=self.device).unsqueeze(0)
        logits = self.actor(state)

        # TODO: check this one later
        logits_norm = (logits - torch.mean(logits)) / \
                             (torch.std(logits) + 1e-5)

        m = Multinomial(self.args.vaccine_supply,
                        logits=logits_norm.squeeze() / temp)
        action = m.sample()
        log_prob = m.log_prob(action)
        entropy = -torch.sum(m.logits * m.probs)
        return action.to('cpu').numpy(), log_prob, entropy
Ejemplo n.º 12
0
def select_action(state, variance=1, temp=1):
    # this function selects stochastic actions based on the policy probabilities    
    # state = torch.from_numpy(np.array(state)).float().unsqueeze(0)   #Reza: this might be a bit faster torch.tensor(state,dtype=torch.float32).unsqueeze(0)
    state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
    action_scores = actor(state)
    action_scores_norm = (action_scores-torch.mean(action_scores))/\
                         (torch.std(action_scores)+1e-5)
    # print(action_scores, file=myLog)
    # prob = F.softmax(action_scores_norm , dim=1)
    # print('***',prob)
    m = Multinomial(vaccine_supply, logits=action_scores_norm.squeeze()/ temp)
    # m = Multinomial(vaccine_supply, prob[0])  # [0]
    action = m.sample()
    log_prob = m.log_prob(action)
    # entropy = - torch.sum(torch.log(prob) * prob, axis=-1)
    entropy = -torch.sum(m.logits* m.probs, axis=-1)
    return action.to('cpu').numpy(), log_prob, entropy
Ejemplo n.º 13
0
def sample_liklihood(lp,axis=1,numsamples=1):
	lastaxis = lp.ndimension() - 1
	lporig = lp
	lpunif = torch.zeros_like(lp)
	lpunif = lp.exp() * 0 - (lp.exp() * 0).logsumexp(dim=1, keepdim=True)
	samplinglp = lpunif
	lpt = samplinglp.transpose(lastaxis, axis)
	M = Multinomial(total_count=numsamples, logits=lpt)
	samps = M.sample().detach()
	samps = samps.transpose(lastaxis, axis) / numsamples
	logprob = (lporig - (samps.detach()).log())
	logprob[logprob != logprob] = float('Inf')
	logprob = logprob.min(dim=axis, keepdim=True)[0]

	lpmodel = min_correction(lpunif - lporig, axis)

	return samps.detach(), logprob, lpmodel
Ejemplo n.º 14
0
    def evaluate(self, possible_boards):
        # possible_boards -> neural network -> sigmoid -> last_layer_sigmoid
        last_layer_outputs = self.run_through_neural_network(possible_boards)
        # last_layer_sigmoid = list(map(lambda x: x.sigmoid(), last_layer_outputs))

        # Decide move and save log_prob for backward
        # We make sure not to affect the value fn with .detach()

        probs = self.pg_plugin._softmax(last_layer_outputs)
        distribution = Multinomial(1, probs)
        move = distribution.sample()
        self.saved_log_probabilities.append(distribution.log_prob(move))

        _, move = move.max(0)
        # calculate the value estimation and save for backward
        value_estimate = self.pg_plugin.value_model(last_layer_outputs[move])
        self.saved_value_estimations.append(value_estimate)
        return move
Ejemplo n.º 15
0
    def sample_fn():
        fig = figure(figsize=(12, 5))
        model.eval()

        if K is None:
            n_samp = 12
            Y = None
        else:
            n_samp = 2 * K
            Y = torch.arange(2 * K, device=device) % K

        X = torch.zeros(n_samp, C, H, W, device=device).long()

        with torch.no_grad():
            for h in range(H):
                for w in range(W):
                    for c in range(C):
                        _, logits = model(X, Y)

                        m = Multinomial(logits=logits[:, :, h, w, c])
                        X_ = m.sample(torch.Size([]))
                        X[:, c, h, w] = torch.argmax(X_, dim=1)

        X = X.cpu().numpy()
        if C > 1:
            X = X.astype('float') / 255.0
            _ = imshow(
                X.reshape(2, n_samp // 2, C, H,
                          W).transpose(0, 3, 1, 4,
                                       2).reshape(2 * H, n_samp // 2 * W, C))
        else:
            _ = imshow(
                X.reshape(2, n_samp // 2, H,
                          W).transpose(0, 2, 1,
                                       3).reshape(2 * H, n_samp // 2 * W))
        colorbar()

        return fig
Ejemplo n.º 16
0
def sampleunif(lp:Tensor,axis=1,numsamples=1):
	''' Samples from the random variables uniformly
	A model is given in the probability space with logit vector lp
	The probability that the sample is in the model is calculated.

	'''
	lastaxis = lp.ndimension() -1
	lporig = lp
	lpunif = torch.zeros_like(lp)
	lpunif = lpunif - (lpunif).logsumexp(dim=1,keepdim=True)
	lpt = lpunif.transpose(lastaxis,axis)
	M = Multinomial(total_count=numsamples,logits=lpt)
	samps = M.sample().detach()
	samps = samps.transpose(lastaxis,axis)/numsamples
	logprob = (lporig-(samps.detach()).log())
	logprob[logprob!=logprob] = float('Inf')
	logprob = logprob.min(dim=axis,keepdim=True)[0]
	# lpmodel = (lpunif-lporig).min(dim=axis,keepdim=True)[0]
	# TODO min
	lpmodel = softmin(lpunif-lporig,axis)
	# lpmodel= (lpunif-lporig).min(dim=1,keepdim=True)[0]# -  float(lporig.shape[1])
	# lpmodel = renyi_prob(lpunif,lporig,1)
	inmodel_lprobs = logprob + lpmodel - lpunif.mean(dim=1, keepdim=True)  # - max_correction(-lporig, axis)
	return None, None, None
Ejemplo n.º 17
0
    def generate_discrete_network(self, method: str = "sample"):
        """ generates discrete weights from the weights of the layer based on the weight distributions

        :param method: the method to use to generate the discrete weights. Either argmax or sample

        :returns: tuple (sampled_w, sampled_b) where sampled_w and sampled_b are tensors of the shapes
        (output_channels x input_channels x kernel rows x kernel columns) and (output_features x 1). sampled_b is None if the layer has no bias
        """

        probabilities_w = self.generate_weight_probabilities(self.W_logits)
        # logit probabilities must be in inner dimension for torch.distribution.Multinomial
        # stepped transpose bc we need to keep the order of the other dimensions
        probabilities_w = probabilities_w.transpose(0, 1).transpose(
            1, 2).transpose(2, 3).transpose(3, 4)
        if self.b_logits is not None:
            probabilities_b = self.generate_weight_probabilities(self.b_logits)
            probabilities_b = probabilities_b.transpose(0, 1).transpose(1, 2)
        else:
            # layer does not use bias
            probabilities_b = None
        discrete_values_tensor = torch.tensor(
            self.discrete_weight_values).double()
        discrete_values_tensor = discrete_values_tensor.to(
            self.W_logits.device)
        if method == "sample":
            # this is a output_channels x input_channels x kernel rows x kernel columns x discretization_levels mask
            m_w = Multinomial(probs=probabilities_w)
            sampled_w = m_w.sample()
            if torch.all(sampled_w.sum(dim=4) != 1):
                raise ValueError("sampled mask for weights does not sum to 1")

            # need to generate the discrete weights from the masks
            sampled_w = torch.matmul(sampled_w, discrete_values_tensor)

            if probabilities_b is not None:
                # this is a output channels x 1 x discretization levels mask
                m_b = Multinomial(probs=probabilities_b)
                sampled_b = m_b.sample()

                if torch.all(sampled_b.sum(dim=2) != 1):
                    raise ValueError("sampled mask for bias does not sum to 1")
                sampled_b = torch.matmul(sampled_b, discrete_values_tensor)
            else:
                sampled_b = None

        elif method == "argmax":
            # returns a (out_feat x in_feat) matrix where the values correspond to the index of the discretized value
            # with the largest probability
            argmax_w = torch.argmax(probabilities_w, dim=4)
            # creating placeholder for discrete weights
            sampled_w = torch.zeros_like(argmax_w).to("cpu")
            sampled_w[:] = discrete_values_tensor[argmax_w[:]]

            if probabilities_b is not None:
                argmax_b = torch.argmax(probabilities_b, dim=2)
                sampled_b = torch.zeros_like(argmax_b).to("cpu")
                sampled_b[:] = discrete_values_tensor[argmax_b[:]]
            else:
                sampled_b = None
        else:
            raise ValueError(
                f"Invalid method {method} for layer discretization")

        # sanity checks
        if sampled_w.shape != probabilities_w.shape[:-1]:
            raise ValueError(
                "sampled probability mask for weights does not match expected shape"
            )
        if sampled_b:
            if sampled_b.shape != probabilities_b.shape[:-1]:
                raise ValueError(
                    "sampled probability mask for bias does not match expected shape"
                )

        return sampled_w, sampled_b