def forward(self, input): if self.quant: p_a = torch.sigmoid(self.p_a) p_b = torch.sigmoid(self.p_b) p_w_0 = p_a p_w_pos = p_b * (1. - p_w_0) p_w_neg = (1. - p_b) * (1. - p_w_0) p = torch.stack([p_w_neg, p_w_0, p_w_pos], dim=-1) if self.training: w_mean = (p * self.w_candidate).sum(dim=-1) w_var = (p * self.w_candidate.pow(2)).sum(dim=-1) - w_mean.pow(2) act_mean = F.linear(input, w_mean, self.bias) act_var = F.linear(input.pow(2), w_var, None) var_eps = torch.randn_like(act_mean) y = act_mean + var_eps * act_var.add(self.eps).sqrt() else: m = Multinomial(probs=p) indices = m.sample().argmax(dim=-1) w = self.w_candidate[indices] y = F.linear(input, w, self.bias) else: y = super().forward(input) self.forward(y) return y
def select_action(state): state = torch.from_numpy(state).float().unsqueeze(0) probs = policy(Variable(state)) m = Multinomial(probs) action = m.sample() policy.saved_log_probs.append(m.log_prob(action)) return action.data[0]
def decide(self, choices: List[any]) -> int: inputs = list(map(lambda choice: torch.FloatTensor(choice), choices)) enhanced_features = list( map(lambda vec: self._base_network.model.forward(vec), inputs)) action_features = list( map(lambda vec: self._policy_gradient.model.forward(vec.detach()), enhanced_features)) # Get move probabilities = Function.softmax(torch.cat(list(action_features))) distribution = Multinomial(1, probabilities) move = distribution.sample() _, index_of_move = move.max(0) # Expected reward expected_reward = self._value_function.model( enhanced_features[index_of_move]) log_probability = distribution.log_prob(move) # Record estimate self.rounds.append( Round(value=expected_reward, log_probability=log_probability)) # Return return index_of_move.item()
def select_action(state): state = torch.from_numpy(state).float().unsqueeze(0) probs, state_value = model(Variable(state)) m = Multinomial(probs) action = m.sample() model.saved_actions.append(SavedAction(m.log_prob(action), state_value)) return action.data[0]
def sample_with_weights(values: torch.Tensor, weights: torch.Tensor, num_samples: int) -> torch.Tensor: # define multinomial with weights as probs multi = Multinomial(probs=weights) # sample num samples, with replacement samples = multi.sample(sample_shape=(num_samples, )) # get indices of success trials indices = torch.where(samples)[1] # return those indices from trace return values[indices]
def select_action(state, variance=1, temp=10): # this function selects stochastic actions based on the policy probabilities state = torch.from_numpy(state).float().unsqueeze(0) action_scores = actor(state) prob = F.softmax(action_scores / temp, dim=1) # m = Multinomial(vaccine_supply, prob[0]) action = m.sample() log_prob = m.log_prob(action) entropy = -(log_prob * prob).sum(1, keepdim=True) return action.numpy(), log_prob, entropy
def sample(lp:Tensor,axis=1,numsamples=1,MAP=False): lastaxis = lp.ndimension() -1 lpt = lp.transpose(lastaxis,axis) M = Multinomial(total_count=numsamples,logits=lpt) #D = Dirichlet((lp.exp())*(numsamples.float())/(lp.size(lastaxis))) samps = M.sample().detach() samps = samps.transpose(lastaxis,axis)/numsamples logprob = (lp-(samps.detach()).log()) logprob[logprob!=logprob] = float('Inf') logprob = logprob.min(dim=axis,keepdim=True)[0] return None,None
def select_action(state, variance=1, temp=10): state = torch.tensor(state, dtype=torch.float32).unsqueeze(0) action_scores = actor(state) print(action_scores, file=myLog) prob = F.softmax(action_scores / temp, dim=1) # #print('***',prob) m = Multinomial(vaccine_supply, prob[0]) #[0] action = m.sample() #print(action) log_prob = m.log_prob(action) entropy = -torch.sum(torch.log(prob) * prob, axis=-1) return action.numpy(), log_prob, entropy
def sample_from_population_with_weights( particles: Tensor, weights: Tensor, num_samples: int = 1 ) -> Tensor: """Return samples from particles sampled with weights.""" # define multinomial with weights as probs multi = Multinomial(probs=weights) # sample num samples, with replacement samples = multi.sample(sample_shape=(num_samples,)) # get indices of success trials indices = torch.where(samples)[1] # return those indices from trace return particles[indices]
def select_action(state, variance=1, temp=10): # this function selects stochastic actions based on the policy probabilities #state = torch.from_numpy(np.array(state)).float().unsqueeze(0) #Reza: this might be a bit faster torch.tensor(state,dtype=torch.float32).unsqueeze(0) state = torch.tensor(state, dtype=torch.float32).unsqueeze(0) action_scores = actor(state) print(action_scores, file=myLog) prob = F.softmax(action_scores / temp, dim=1) # #print('***',prob) m = Multinomial(vaccine_supply, prob[0]) #[0] action = m.sample() log_prob = m.log_prob(action) entropy = -torch.sum(torch.log(prob) * prob, axis=-1) return action.numpy(), log_prob, entropy
def select_action(self, state, temp=1): # this function selects stochastic actions based on the policy probabilities state = torch.tensor(state, dtype=torch.float32, device=self.device).unsqueeze(0) logits = self.actor(state) # TODO: check this one later logits_norm = (logits - torch.mean(logits)) / \ (torch.std(logits) + 1e-5) m = Multinomial(self.args.vaccine_supply, logits=logits_norm.squeeze() / temp) action = m.sample() log_prob = m.log_prob(action) entropy = -torch.sum(m.logits * m.probs) return action.to('cpu').numpy(), log_prob, entropy
def select_action(state, variance=1, temp=1): # this function selects stochastic actions based on the policy probabilities # state = torch.from_numpy(np.array(state)).float().unsqueeze(0) #Reza: this might be a bit faster torch.tensor(state,dtype=torch.float32).unsqueeze(0) state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0) action_scores = actor(state) action_scores_norm = (action_scores-torch.mean(action_scores))/\ (torch.std(action_scores)+1e-5) # print(action_scores, file=myLog) # prob = F.softmax(action_scores_norm , dim=1) # print('***',prob) m = Multinomial(vaccine_supply, logits=action_scores_norm.squeeze()/ temp) # m = Multinomial(vaccine_supply, prob[0]) # [0] action = m.sample() log_prob = m.log_prob(action) # entropy = - torch.sum(torch.log(prob) * prob, axis=-1) entropy = -torch.sum(m.logits* m.probs, axis=-1) return action.to('cpu').numpy(), log_prob, entropy
def sample_liklihood(lp,axis=1,numsamples=1): lastaxis = lp.ndimension() - 1 lporig = lp lpunif = torch.zeros_like(lp) lpunif = lp.exp() * 0 - (lp.exp() * 0).logsumexp(dim=1, keepdim=True) samplinglp = lpunif lpt = samplinglp.transpose(lastaxis, axis) M = Multinomial(total_count=numsamples, logits=lpt) samps = M.sample().detach() samps = samps.transpose(lastaxis, axis) / numsamples logprob = (lporig - (samps.detach()).log()) logprob[logprob != logprob] = float('Inf') logprob = logprob.min(dim=axis, keepdim=True)[0] lpmodel = min_correction(lpunif - lporig, axis) return samps.detach(), logprob, lpmodel
def evaluate(self, possible_boards): # possible_boards -> neural network -> sigmoid -> last_layer_sigmoid last_layer_outputs = self.run_through_neural_network(possible_boards) # last_layer_sigmoid = list(map(lambda x: x.sigmoid(), last_layer_outputs)) # Decide move and save log_prob for backward # We make sure not to affect the value fn with .detach() probs = self.pg_plugin._softmax(last_layer_outputs) distribution = Multinomial(1, probs) move = distribution.sample() self.saved_log_probabilities.append(distribution.log_prob(move)) _, move = move.max(0) # calculate the value estimation and save for backward value_estimate = self.pg_plugin.value_model(last_layer_outputs[move]) self.saved_value_estimations.append(value_estimate) return move
def sample_fn(): fig = figure(figsize=(12, 5)) model.eval() if K is None: n_samp = 12 Y = None else: n_samp = 2 * K Y = torch.arange(2 * K, device=device) % K X = torch.zeros(n_samp, C, H, W, device=device).long() with torch.no_grad(): for h in range(H): for w in range(W): for c in range(C): _, logits = model(X, Y) m = Multinomial(logits=logits[:, :, h, w, c]) X_ = m.sample(torch.Size([])) X[:, c, h, w] = torch.argmax(X_, dim=1) X = X.cpu().numpy() if C > 1: X = X.astype('float') / 255.0 _ = imshow( X.reshape(2, n_samp // 2, C, H, W).transpose(0, 3, 1, 4, 2).reshape(2 * H, n_samp // 2 * W, C)) else: _ = imshow( X.reshape(2, n_samp // 2, H, W).transpose(0, 2, 1, 3).reshape(2 * H, n_samp // 2 * W)) colorbar() return fig
def sampleunif(lp:Tensor,axis=1,numsamples=1): ''' Samples from the random variables uniformly A model is given in the probability space with logit vector lp The probability that the sample is in the model is calculated. ''' lastaxis = lp.ndimension() -1 lporig = lp lpunif = torch.zeros_like(lp) lpunif = lpunif - (lpunif).logsumexp(dim=1,keepdim=True) lpt = lpunif.transpose(lastaxis,axis) M = Multinomial(total_count=numsamples,logits=lpt) samps = M.sample().detach() samps = samps.transpose(lastaxis,axis)/numsamples logprob = (lporig-(samps.detach()).log()) logprob[logprob!=logprob] = float('Inf') logprob = logprob.min(dim=axis,keepdim=True)[0] # lpmodel = (lpunif-lporig).min(dim=axis,keepdim=True)[0] # TODO min lpmodel = softmin(lpunif-lporig,axis) # lpmodel= (lpunif-lporig).min(dim=1,keepdim=True)[0]# - float(lporig.shape[1]) # lpmodel = renyi_prob(lpunif,lporig,1) inmodel_lprobs = logprob + lpmodel - lpunif.mean(dim=1, keepdim=True) # - max_correction(-lporig, axis) return None, None, None
def generate_discrete_network(self, method: str = "sample"): """ generates discrete weights from the weights of the layer based on the weight distributions :param method: the method to use to generate the discrete weights. Either argmax or sample :returns: tuple (sampled_w, sampled_b) where sampled_w and sampled_b are tensors of the shapes (output_channels x input_channels x kernel rows x kernel columns) and (output_features x 1). sampled_b is None if the layer has no bias """ probabilities_w = self.generate_weight_probabilities(self.W_logits) # logit probabilities must be in inner dimension for torch.distribution.Multinomial # stepped transpose bc we need to keep the order of the other dimensions probabilities_w = probabilities_w.transpose(0, 1).transpose( 1, 2).transpose(2, 3).transpose(3, 4) if self.b_logits is not None: probabilities_b = self.generate_weight_probabilities(self.b_logits) probabilities_b = probabilities_b.transpose(0, 1).transpose(1, 2) else: # layer does not use bias probabilities_b = None discrete_values_tensor = torch.tensor( self.discrete_weight_values).double() discrete_values_tensor = discrete_values_tensor.to( self.W_logits.device) if method == "sample": # this is a output_channels x input_channels x kernel rows x kernel columns x discretization_levels mask m_w = Multinomial(probs=probabilities_w) sampled_w = m_w.sample() if torch.all(sampled_w.sum(dim=4) != 1): raise ValueError("sampled mask for weights does not sum to 1") # need to generate the discrete weights from the masks sampled_w = torch.matmul(sampled_w, discrete_values_tensor) if probabilities_b is not None: # this is a output channels x 1 x discretization levels mask m_b = Multinomial(probs=probabilities_b) sampled_b = m_b.sample() if torch.all(sampled_b.sum(dim=2) != 1): raise ValueError("sampled mask for bias does not sum to 1") sampled_b = torch.matmul(sampled_b, discrete_values_tensor) else: sampled_b = None elif method == "argmax": # returns a (out_feat x in_feat) matrix where the values correspond to the index of the discretized value # with the largest probability argmax_w = torch.argmax(probabilities_w, dim=4) # creating placeholder for discrete weights sampled_w = torch.zeros_like(argmax_w).to("cpu") sampled_w[:] = discrete_values_tensor[argmax_w[:]] if probabilities_b is not None: argmax_b = torch.argmax(probabilities_b, dim=2) sampled_b = torch.zeros_like(argmax_b).to("cpu") sampled_b[:] = discrete_values_tensor[argmax_b[:]] else: sampled_b = None else: raise ValueError( f"Invalid method {method} for layer discretization") # sanity checks if sampled_w.shape != probabilities_w.shape[:-1]: raise ValueError( "sampled probability mask for weights does not match expected shape" ) if sampled_b: if sampled_b.shape != probabilities_b.shape[:-1]: raise ValueError( "sampled probability mask for bias does not match expected shape" ) return sampled_w, sampled_b