def sample_true2(): cat = Categorical(probs= torch.tensor(true_mixture_weights)) cluster = cat.sample() # print (cluster) # fsd norm = Normal(torch.tensor([cluster*10.]).float(), torch.tensor([5.0]).float()) samp = norm.sample() # print (samp) return samp,cluster
def sample_gmm(batch_size, mixture_weights): cat = Categorical(probs=mixture_weights) cluster = cat.sample([batch_size]) # [B] mean = (cluster*10.).float().cuda() std = torch.ones([batch_size]).cuda() *5. norm = Normal(mean, std) samp = norm.sample() samp = samp.view(batch_size, 1) return samp
def act(self, state): # Remember: agent is not deterministic, sample actions from distribution (e.g. Gaussian) params = self.actor(state) m = 2 * F.tanh(params[0][0]) s = F.softplus(params[0][1]) norm = Normal(m, s) action = norm.sample() action = action.unsqueeze(0) l_p = norm.log_prob(action) action.clamp_(self.low, self.high) return action, l_p
def forward(self, x): if self.training is True: mu_z_x = self.mu_resblock(x) sigma_z_x = self.sigma_resblock(x) gaussian_distribution = Normal(mu_z_x, sigma_z_x) sampled_LVs = gaussian_distribution.sample([self.gamma]).reshape( -1, self.mu_resblock.dz) return sampled_LVs, mu_z_x else: mu_z_x = self.mu_resblock(x) return mu_z_x
def sample_true2(): cat = Categorical(probs=torch.tensor(true_mixture_weights)) cluster = cat.sample() # print (cluster) # fsd norm = Normal( torch.tensor([cluster * 10.]).float(), torch.tensor([5.0]).float()) samp = norm.sample() # print (samp) return samp, cluster
def forward(self, obs, act=None): x = self.mlp(obs) mu = self.fc_mu(x) std = torch.exp(self.log_std) dist = Normal(mu, std) action = dist.sample() if act is None else act mu_action = mu log_prob = dist.log_prob(action).sum(dim=-1) return action, log_prob, mu_action
def get_hat(mu, sigma, arg): softmax = nn.Softmax() normal_dist = Normal(torch.tensor([0.0]), torch.tensor([1.0])) y_hat = torch.zeros(mu.shape).float() for _ in range(arg.mc_samples): normal_samples = normal_dist.sample(sample_shape=mu.shape).squeeze(-1) y_sample = mu + sigma.expand(-1,2) * normal_samples y_hat += softmax(y_sample) y_hat /= arg.mc_samples return torch.log(y_hat + torch.Tensor([1e-20]))
def learn(self, y_target, mgc, batch_size): # prepare batches self.model_t.eval() self.model_s.train() x_list, y_list, c_list = _create_batches( y_target, mgc, batch_size, UPSAMPLE_COUNT=self.UPSAMPLE_COUNT, mgc_order=self.params.mgc_order) if len(x_list) == 0: return 0 # learn total_loss = 0 for x, y, c in tqdm.tqdm(zip(x_list, y_list, c_list), total=len(c_list)): x = torch.tensor(x, dtype=torch.float32).to(device) y = torch.tensor(y, dtype=torch.float32).to(device) c = torch.tensor(c, dtype=torch.float32).to(device) self.trainer.zero_grad() q_0 = Normal(x.new_zeros(x.size()), x.new_ones(x.size())) z = q_0.sample() # with torch.no_grad(): c_up = self.model_t.upsample(c).detach() # from ipdb import set_trace # set_trace() x_student, mu_s, logs_s = self.model_s(z, c_up) mu_logs_t = self.model_t(x_student, c) loss_t, loss_KL, loss_reg = self.criterion_t(mu_s, logs_s, mu_logs_t[:, 0:1, :-1], mu_logs_t[:, 1:, :-1], size_average=True) # loss_t, loss_KL, loss_reg = self.criterion_t(mu_logs_t[:, 0:1, :-1], mu_logs_t[:, 1:, :-1], mu_s, logs_s, size_average=False) # stft_student, _ = #self.stft(x_student[:, :, 1:]) # stft_truth, _ = #self.stft(x[:, :, 1:]) stft_student = stft(x_student[:, 0, 1:], scale='linear') stft_truth = stft(x[:, 0, 1:], scale='linear') loss_frame = self.criterion_frame(stft_student, stft_truth.detach()) # from ipdb import set_trace # set_trace() loss_tot = loss_t + loss_frame total_loss += loss_tot.item() loss_tot.backward() torch.nn.utils.clip_grad_norm_(self.model_s.parameters(), 10.) self.trainer.step() del loss_tot, loss_frame, loss_KL, loss_reg, loss_t, x, y, c, c_up, stft_student, stft_truth, q_0, z del x_student, mu_s, logs_s, mu_logs_t return total_loss / len(x_list)
class TD3(DDPG): """A TD3 Agent from the paper Addressing Function Approximation Error in Actor-Critic Methods (Fujimoto et al. 2018) https://arxiv.org/abs/1802.09477""" agent_name = "TD3" def __init__(self, config): DDPG.__init__(self, config) self.critic_local_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic", override_seed=self.config.seed + 1) self.critic_target_2 = self.create_NN(input_dim=self.state_size + self.action_size, output_dim=1, key_to_use="Critic") self.critic_target_2.load_state_dict(copy.deepcopy(self.critic_local_2.state_dict())) self.critic_optimizer_2 = optim.Adam(self.critic_local_2.parameters(), lr=self.hyperparameters["Critic"]["learning_rate"]) self.action_noise_std = self.hyperparameters["action_noise_std"] self.action_noise_distribution = Normal(torch.Tensor([0.0]), torch.Tensor([self.action_noise_std])) self.action_noise_clipping_range = self.hyperparameters["action_noise_clipping_range"] def compute_critic_values_for_next_states(self, next_states): """Computes the critic values for next states to be used in the loss for the critic""" with torch.no_grad(): actions_next = self.actor_target(next_states) action_noise = self.action_noise_distribution.sample(sample_shape=actions_next.shape) action_noise = action_noise.squeeze(-1) clipped_action_noise = torch.clamp(action_noise, min=-self.action_noise_clipping_range, max = self.action_noise_clipping_range) actions_next_with_noise = actions_next + clipped_action_noise critic_targets_next_1 = self.critic_target(torch.cat((next_states, actions_next_with_noise), 1)) critic_targets_next_2 = self.critic_target_2(torch.cat((next_states, actions_next_with_noise), 1)) critic_targets_next = torch.min(torch.cat((critic_targets_next_1, critic_targets_next_2),1), dim=1)[0].unsqueeze(-1) return critic_targets_next def critic_learn(self, states, actions, rewards, next_states, dones): """Runs a learning iteration for both the critics""" critic_targets_next = self.compute_critic_values_for_next_states(next_states) critic_targets = self.compute_critic_values_for_current_states(rewards, critic_targets_next, dones) critic_expected_1 = self.critic_local(torch.cat((states, actions), 1)) critic_expected_2 = self.critic_local_2(torch.cat((states, actions), 1)) critic_loss_1 = functional.mse_loss(critic_expected_1, critic_targets) critic_loss_2 = functional.mse_loss(critic_expected_2, critic_targets) self.take_optimisation_step(self.critic_optimizer, self.critic_local, critic_loss_1, self.hyperparameters["Critic"]["gradient_clipping_norm"]) self.take_optimisation_step(self.critic_optimizer_2, self.critic_local_2, critic_loss_2, self.hyperparameters["Critic"]["gradient_clipping_norm"]) self.soft_update_of_target_network(self.critic_local, self.critic_target, self.hyperparameters["Critic"]["tau"]) self.soft_update_of_target_network(self.critic_local_2, self.critic_target_2, self.hyperparameters["Critic"]["tau"])
def get_action(self, state): state = torch.FloatTensor(state).unsqueeze(0).to(self.device) mean, log_std = self.policy_net.forward(state) std = log_std.exp() normal = Normal(mean, std) z = normal.sample() action = torch.tanh(z) action = action.cpu().detach().squeeze(0).numpy() return action
def forward(self, x, a=None): mu = self.mu(x) policy = Normal(mu, self.log_std.exp()) pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def sample_from_gaussian(y_hat): assert y_hat.size(1) == 2 y_hat = y_hat.transpose(1, 2) mean = y_hat[:, :, :1] log_std = y_hat[:, :, 1:] dist = Normal(mean, torch.exp(log_std)) sample = dist.sample() sample = torch.clamp(torch.clamp(sample, min=-1.), max=1.) del dist return sample
def forward(self, x: torch.Tensor, a: torch.Tensor) \ -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: mu = self.mu(x, a) policy = Normal(mu, self.log_std.exp()) pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def build_aligments(self, set_of_nearest_points, num_of_sampled=1000): # dim: [neighbours, hidden] latent_mu, latent_logsigma = self.encode( torch.Tensor(set_of_nearest_points)) dist = Normal(loc=latent_mu, scale=latent_logsigma.exp()) sampled = dist.sample(torch.Size([num_of_sampled])) pass
class GPT2Model1(GPT2Model): def __init__(self, config): super(GPT2Model1, self).__init__(config) def forward(self, input_ids, position_ids=None, token_type_ids=None, past=None, std=0.01, reset=True): self.noise = Normal(torch.tensor([0.0], requires_grad=False), torch.tensor([std], requires_grad=False)) if past is None: past_length = 0 past = [None] * len(self.h) else: past_length = past[0][0].size(-2) if position_ids is None: position_ids = torch.arange(past_length, input_ids.size(-1) + past_length, dtype=torch.long, device=input_ids.device) position_ids = position_ids.unsqueeze(0).expand_as(input_ids) input_shape = input_ids.size() input_ids = input_ids.view(-1, input_ids.size(-1)) position_ids = position_ids.view(-1, position_ids.size(-1)) inputs_embeds = self.wte(input_ids) # inputs_embeds.requires_grad = True position_embeds = self.wpe(position_ids) # position_embeds += self.noise.sample(position_embeds.size()).view_as(position_embeds).to(device) if token_type_ids is not None: token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) token_type_embeds = self.wte(token_type_ids) else: token_type_embeds = 0 hidden_states = inputs_embeds + position_embeds + token_type_embeds if reset: hidden_states += self.noise.sample( hidden_states.size()).view_as(hidden_states).to(device) # past += self.noise.sample(past.size()).view_as(past).to(device) presents = [] for block, layer_past in zip(self.h, past): # if layer_past is not None: # layer_past += self.noise.sample(layer_past.size()).view_as(layer_past).to(device) hidden_states, present = block(hidden_states, layer_past) presents.append(present) hidden_states = self.ln_f(hidden_states) output_shape = input_shape + (hidden_states.size(-1), ) return hidden_states.view(*output_shape), presents
def step(self, state): state = torch.flatten(torch.from_numpy(state).float()) mean, sigma = self(state) dist = Normal(mean, sigma) action = dist.sample() action = action.view(self.action_shape) print("log dist", dist.log_prob(action)) normal_dist = torch.normal(mean, sigma) prob = torch.normal(action, mean, sigma) print("log prob", torch.log(prob)) return action.numpy(), dist.log_prob(action)
class TanhNormal(torch.distributions.Distribution): """ Represent distribution of X where X ~ tanh(Z) Z ~ N(mean, std) Note: this is not very numerically stable. """ def __init__(self, normal_mean, normal_std, epsilon=1e-6): """ :param normal_mean: Mean of the normal distribution :param normal_std: Std of the normal distribution :param epsilon: Numerical stability epsilon when computing log-prob. """ self.normal_mean = normal_mean self.normal_std = normal_std self.normal = Normal(normal_mean, normal_std) self.epsilon = epsilon def sample_n(self, n, return_pre_tanh_value=False): z = self.normal.sample_n(n) if return_pre_tanh_value: return torch.tanh(z), z else: return torch.tanh(z) def log_prob(self, value, pre_tanh_value=None): """ :param value: some value, x :param pre_tanh_value: arctanh(x) :return: """ if pre_tanh_value is None: pre_tanh_value = torch.log((1 + value) / (1 - value)) / 2 return self.normal.log_prob(pre_tanh_value) - torch.log(1 - value * value + self.epsilon) def sample(self, return_pretanh_value=False): z = self.normal.sample() if return_pretanh_value: return torch.tanh(z), z else: return torch.tanh(z) def rsample(self, return_pretanh_value=False): z = (self.normal_mean + self.normal_std * Variable( Normal(np.zeros(self.normal_mean.size()), np.ones(self.normal_std.size())).sample())) # z.requires_grad_() if return_pretanh_value: return torch.tanh(z), z else: return torch.tanh(z)
def select_action(state, env): '''given a state, this function chooses the action to take arguments: state - observation matrix specifying the current model state env - integer specifying which environment to sample action for return - action to take''' state = torch.from_numpy(state).float() # retrain the model - returns ''' mu1, sigma1, mu2, sigma2, state_value1, state_value2 = model(state) ''' # mu1 sigma 1 correspond to environment 1 # mu2 sigma 2 correspond to environment 2 # decide which mu and sigma to use depending on the env being trained if env == 1: model = model1 mu, sigma, state_value = model1(state) saved_actions = model1.saved_actions if env == 2: model = model2 mu, sigma, state_value = model2(state) saved_actions = model2.saved_actions # debugging if False: print(mu) print(state_value) print(model.sigma_head_env.weight) print(model.affine1.weight) # if sigma is nan if sigma != sigma: print(mu) print(state_value) # print out the weights print(model.sigma_head_env.weight) sigma = torch.tensor(float(0.1)) print('sigma is nan') exit() # creates a multivariate distribution # samples an action according to the policy distribution prob = Normal(mu, sigma.sqrt()) entropy = 0.5 * ((sigma * 2 * pi).log() + 1) action = prob.sample() log_prob = prob.log_prob(action) model.entropies.append(entropy) saved_actions.append(SavedAction(log_prob, state_value)) # returns the action as a number converted to python type and bounded within -1, 1 return action.item()
def forward(self, x, a=None): mu = self.mu(x) std = self.log_std.exp() policy = Normal(mu, std) pi = policy.sample() # gaussian likelihood logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi, mu # 순서 ActorCritic return 값이랑 맞춤.
def sample_from_gaussian(y_hat, log_std_min=-7.0, scale_factor=1.0): assert y_hat.size(2) == 2 mean = y_hat[:, :, :1] log_std = torch.clamp(y_hat[:, :, 1:], min=log_std_min) dist = Normal( mean, torch.exp(log_std), ) sample = dist.sample() sample = torch.clamp(torch.clamp(sample, min=-scale_factor), max=scale_factor) del dist return sample
def synthesize(self, spect): num_samples = len(spect) * 16 * 16 c = torch.tensor(spect.T, dtype=torch.float32).to(device) q_0 = Normal(c.new_zeros((1, 1, num_samples)), c.new_ones((1, 1, num_samples))) z = q_0.sample() torch.cuda.synchronize() with torch.no_grad(): c_up = self.model_t.upsample(c) y = self.model_s.generate(z, c_up).squeeze() torch.cuda.synchronize() return y.cpu().numpy()
def getAct(self, obs, params=None): with torch.no_grad(): val = self.getVal(obs, params) mean = self._policy_forward(obs, params) if params is None: dist = Normal(mean, torch.exp(self.log_std)) else: dist = Normal(mean, torch.exp(params['log_std'])) action = dist.sample() log_prob = dist.log_prob(action).sum(axis=-1) return action.numpy(), val.numpy(), log_prob.numpy()
def act(self, state, deterministic=True): state = torch.tensor(state, dtype=torch.float, device="cuda") mean, log_std = self.actor(state) if(deterministic): action = torch.tanh(mean) else: std = log_std.exp() normal = Normal(mean, std) z = normal.sample() action = torch.tanh(z) action = action.detach().cpu().numpy() return action
def forward(self, x): # forward pass through encoder x = F.relu(self.linear1(x)) x = F.relu(self.linear2(x)) encoder_mean = self.encoder_mean(x) encoder_std = F.softplus(self.encoder_std(x)) # sample latent based on encoder outputs latent_dist = Normal(encoder_mean, encoder_std) latent = latent_dist.sample() return latent, encoder_mean, encoder_std
def action(self, x): x = T.from_numpy(x).double().unsqueeze(0) # x = x.double().unsqueeze(0) message_means, message_sds, action_probs = self.forward(x) action_dbn = Bernoulli(action_probs) action = action_dbn.sample() message_dbn = Normal(message_means, message_sds) message = message_dbn.sample() log_prob = action_dbn.log_prob(action) + message_dbn.log_prob( message).sum() x = T.cat((message[0, :], action[0].double())) return x, log_prob
def distort_tensor(self, args, input, scale=0, stop=False): # TODO this is horrible with torch.no_grad(): if args.offset or args.offset_input: if args.debug: print('\n\ndistorting {}'.format(list(input.shape))) if self.generate_offsets: distr = Normal(loc=0, scale=scale * torch.ones_like(input)) if '224' in input.shape: # TODO fragile self.input_offsets = distr.sample() elif stop: self.act2_offsets = distr.sample() else: self.act1_offsets = distr.sample() offsets = distr.sample() if stop: # last layer, fix generated offsets self.generate_offsets = False if '224' in input.shape: # TODO fragile out = input + self.input_offsets elif stop: out = input + self.act2_offsets else: out = input + self.act1_offsets if args.debug: print('\nbefore {}\noffsets {}\nafter {}\n'.format( input.flatten().detach().cpu().numpy()[:6], offsets.flatten().detach().cpu().numpy()[:6], out.flatten().detach().cpu().numpy()[:6])) else: noise = input * torch.cuda.FloatTensor(input.size()).uniform_( -args.noise, args.noise) out = input + noise return out
def evaluate(model_t, model_s, ema=None): if ema is not None: model_s_ema = clone_as_averaged_model(model_s, ema) model_t.eval() model_s_ema.eval() running_loss = [0., 0., 0., 0.] epoch_loss = 0. display_step = 100 for batch_idx, (x, y, c, _) in enumerate(test_loader): x, y, c = x.to(device), y.to(device), c.to(device) q_0 = Normal(x.new_zeros(x.size()), x.new_ones(x.size())) z = q_0.sample() c_up = model_t.upsample(c) x_student, mu_s, logs_s = model_s_ema(z, c_up) mu_logs_t = model_t(x_student, c) if args.KL_type == 'pq': loss_t, loss_KL, loss_reg = criterion_t(mu_logs_t[:, 0:1, :-1], mu_logs_t[:, 1:, :-1], mu_s, logs_s) elif args.KL_type == 'qp': loss_t, loss_KL, loss_reg = criterion_t(mu_s, logs_s, mu_logs_t[:, 0:1, :-1], mu_logs_t[:, 1:, :-1]) stft_student = stft(x_student[:, 0, 1:], scale='linear') stft_truth = stft(x[:, 0, 1:], scale='linear') loss_frame = criterion_frame(stft_student, stft_truth.detach()) loss_tot = loss_t + loss_frame running_loss[0] += loss_tot.item() / display_step running_loss[1] += loss_KL.item() / display_step running_loss[2] += loss_reg.item() / display_step running_loss[3] += loss_frame.item() / display_step epoch_loss += loss_tot.item() if (batch_idx + 1) % display_step == 0: print('{} [Total, KL, Reg, Frame Loss] : {}'.format( batch_idx + 1, np.array(running_loss))) running_loss = [0., 0., 0., 0.] del loss_tot, loss_frame, loss_KL, loss_reg, loss_t, x, y, c, c_up, stft_student, stft_truth, q_0, z del x_student, mu_s, logs_s, mu_logs_t epoch_loss /= len(test_loader) print('Evaluation Loss : {:.4f}'.format(epoch_loss)) del model_s_ema return epoch_loss
class Gaussian: """ A gaussian density, shifted by a mean mu, and scaled by a standard deviation sigma. This is used for the variational posterior distributions. """ def __init__(self, mu, rho) -> None: """ Initialize a Gaussian distribution with the given parameters. :param mu: variational posterior parameter. :param rho: variational posterior parameter. """ self._rho = rho self._mu = mu self._normal = Normal(0, 1) self._normal.loc = self._normal.loc.to(Constant.device) self._normal.scale = self._normal.scale.to(Constant.device) @property def mu(self): """Property for the mu variable.""" return self._mu def __sigma(self): """A matrix of the same size as rho.""" return torch.log1p(torch.exp(self._rho)) def __epsilon(self): """Epsilon is point-wise multiplied with sigma, therefore it must be of the same size as sigma.""" return self._normal.sample(self._rho.size()) def sample(self): """Sampling weights.""" return self._mu + self.__sigma() * self.__epsilon() def log_prob(self, input_): """ Calculate the probability of the input, assuming it has a normal distribution with mean mu and standard deviation sigma. :param input_: The input to the pdf. """ two_pi = torch.empty(1) two_pi = two_pi.new_full(self._rho.size(), 2 * math.pi, device=Constant.device) p1 = torch.log(torch.sqrt(two_pi)) p2 = torch.log(self.__sigma()) p3 = ((input_ - self._mu)**2) / ((2 * self.__sigma())**2) return (-p1 - p2 - p3).sum()
def sample_action(p): steer_dist = Normal(p[0], torch.abs(p[1]) + 0.001) #make sure sigma is positive and non-zero acc_dist = Normal(p[2], torch.abs(p[3]) + 0.001) #make sure sigma is positive and non-zero brake_dist = Normal(p[4], torch.abs(p[5]) + 0.001) #make sure sigma is positive and non-zero steer = steer_dist.sample() acc = acc_dist.sample() brake = brake_dist.sample() actions = torch.tensor([steer, acc, brake], dtype=torch.float, requires_grad=True) log_prob = (steer_dist.log_prob(steer) + acc_dist.log_prob(acc) + brake_dist.log_prob(brake)) # print('prob',log_prob) return actions, log_prob
def forward(self, x, a=None, batch = False): #pdb.set_trace() policy = Normal(self.mu(x), self.log_std.exp()) if batch: pdb.set_trace() pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def choose_action(self, state): state = T.Tensor(state) self.actor.eval() mus, sigmas = self.actor(state) normal = Normal(mus, sigmas) self.entropy = normal.entropy() self.actions = normal.sample() self.log_prob = normal.log_prob(self.actions) return self.actions.numpy()
class GaussianNoise(nn.Module): def __init__(self, stddev): super().__init__() self.dist = Normal( torch.cuda.FloatTensor([0.]), torch.cuda.FloatTensor([stddev]) ) def forward(self, x): if self.training: noise = self.dist.sample(x.size()).squeeze(-1) return x + noise else: return x
def sample_true(batch_size): # print (true_mixture_weights.shape) cat = Categorical(probs=torch.tensor(true_mixture_weights)) cluster = cat.sample([batch_size]) # [B] mean = (cluster*10.).float() std = torch.ones([batch_size]) *5. # print (cluster.shape) # fsd # norm = Normal(torch.tensor([cluster*10.]).float(), torch.tensor([5.0]).float()) norm = Normal(mean, std) samp = norm.sample() # print (samp.shape) # fadsf samp = samp.view(batch_size, 1) return samp
def synthesize(model): global global_step model.eval() for batch_idx, (x, c) in enumerate(synth_loader): if batch_idx == 0: x, c = x.to(device), c.to(device) q_0 = Normal(x.new_zeros(x.size()), x.new_ones(x.size())) z = q_0.sample() start_time = time.time() with torch.no_grad(): y_gen = model.module.reverse(z, c).squeeze() wav = y_gen.to(torch.device("cpu")).data.numpy() wav_name = '{}/{}/generate_{}_{}.wav'.format(args.sample_path, args.model_name, global_step, batch_idx) print('{} seconds'.format(time.time() - start_time)) librosa.output.write_wav(wav_name, wav, sr=22050) print('{} Saved!'.format(wav_name)) del x, c, z, q_0, y_gen, wav