def aggregate_updates(self, global_model, agent_updates_dict, cur_round): # adjust LR if robust LR is selected lr_vector = torch.Tensor([self.server_lr] * self.n_params).to( self.args.device) if self.args.robustLR_threshold > 0: lr_vector = self.compute_robustLR(agent_updates_dict) aggregated_updates = 0 if self.args.aggr == 'avg': aggregated_updates = self.agg_avg(agent_updates_dict) elif self.args.aggr == 'comed': aggregated_updates = self.agg_comed(agent_updates_dict) elif self.args.aggr == 'sign': aggregated_updates = self.agg_sign(agent_updates_dict) if self.args.noise > 0: aggregated_updates.add_( torch.normal(mean=0, std=self.args.noise * self.args.clip, size=(self.n_params, )).to(self.args.device)) cur_global_params = parameters_to_vector(global_model.parameters()) new_global_params = (cur_global_params + lr_vector * aggregated_updates).float() vector_to_parameters(new_global_params, global_model.parameters()) # some plotting stuff if desired # self.plot_sign_agreement(lr_vector, cur_global_params, new_global_params, cur_round) # self.plot_norms(agent_updates_dict, cur_round) return
def set_and_eval(step): vector_to_parameters(old_params - alpha * x * step, actor_critic.policy.parameters()) _, logp, _, _, d_kl = actor_critic.policy(obs, act, **policy_args) ratio = (logp - logp_old).exp() pi_loss = -(ratio * adv).mean() return mpi_avg(d_kl.item()), mpi_avg(pi_loss.item())
def f_barrier(params): vector_to_parameters(params, self.module.actor.parameters()) new_logp = self.module.actor.log_prob(cur_obs, actions) surr_loss = self._compute_surr_loss(old_logp, new_logp, advantages) avg_kl = torch.mean(old_logp - new_logp) return surr_loss.item( ) if avg_kl < self.config["delta"] else np.inf
def func(params, params_i, params_j): # print ("Params inpute: ", type(params)) import time old_params = list(model.parameters()) #parameters_to_vector(model.parameters()) t1s = time.time() cur_params = [v.clone() for v in old_params] cur_params[params_i:params_j] = params t1e = time.time() # print ("klidx 0: ", (t1e-t1s)) t1s = time.time() vector_to_parameters(parameters_to_vector(cur_params), model.parameters()) t1e = time.time() # print ("klidx 1: ", (t1e-t1s)) new_log_probs = model(inputs) old_log_probs = torch.clone(new_log_probs).detach() f = kl_fn(new_log_probs, old_log_probs) t1s = time.time() tmp_params = list(model.parameters())[params_i:params_j] vector_to_parameters(parameters_to_vector(old_params), model.parameters()) t1e = time.time() # print ("klidx 2: ", (t1e-t1s)) return f, tmp_params
def step(self, closure=None, thr=1e-2, eps=1e-9): loss = None if closure is not None: loss = closure() world_size = self.dist.get_world_size() grads = [p.grad for p in self.model.parameters()] # pack packed_tensor = parameters_to_vector(grads) # all reduce self.dist.all_reduce(packed_tensor) # unpack vector_to_parameters(packed_tensor.div_(world_size), grads) if self.lars: for group in self.param_groups: for p in group['params']: setattr(p, 'data_pre', p.data.detach().clone()) self.actual_optimizer.step(closure=None) if self.lars: for group in self.param_groups: for p in group['params']: d_norm_pre = p.data_pre.norm() if d_norm_pre > thr: upd = p.data - p.data_pre upd_norm = upd.norm() rate = group['lr'] * d_norm_pre / (upd_norm + eps) p.data = p.data_pre.add(rate, upd) return loss
def comp_diag_fisher(self, model_params, data_loader, adv=True): model = models.get_model(self.args.data) vector_to_parameters(model_params, model.parameters()) params = {n: p for n, p in model.named_parameters() if p.requires_grad} precision_matrices = {} for n, p in deepcopy(params).items(): p.data.zero_() precision_matrices[n] = p.data model.eval() for _, (inputs, labels) in enumerate(data_loader): model.zero_grad() inputs, labels = inputs.to(device=self.args.device, non_blocking=True),\ labels.to(device=self.args.device, non_blocking=True).view(-1, 1) if not adv: labels.fill_(self.args.base_class) outputs = model(inputs) log_all_probs = F.log_softmax(outputs, dim=1) target_log_probs = outputs.gather(1, labels) batch_target_log_probs = target_log_probs.sum() batch_target_log_probs.backward() for n, p in model.named_parameters(): precision_matrices[n].data += (p.grad.data**2) / len( data_loader.dataset) return parameters_to_vector(precision_matrices.values()).detach()
def get_mc_predictions(self, forward_function, inputs, mc_samples=1, ret_numpy=False, *args, **kwargs): """Returns Monte Carlo predictions. Arguments: forward_function (callable): The forward function of the model that takes inputs and returns the outputs. inputs (FloatTensor): The inputs to the model. mc_samples (int): The number of Monte Carlo samples. ret_numpy (bool): If true, the returned list contains numpy arrays, otherwise it contains torch tensors. """ # We only support a single parameter group. parameters = self.param_groups[0]['params'] predictions = [] Precision = self.state['Precision'] mu = self.state['mu'] for _ in range(mc_samples): # Sample a parameter vector: raw_noise = torch.normal(mean=torch.zeros_like(mu), std=1.0) p = torch.addcdiv(mu, 1., raw_noise, torch.sqrt(Precision)) vector_to_parameters(p, parameters) # Call the forward computation function outputs = forward_function(inputs, *args, **kwargs) if ret_numpy: outputs = outputs.data.cpu().numpy() predictions.append(outputs) return predictions
def evaluate_step(self, inputs, labels, device="cpu", M=0): epsilons = [] for sample in range(M): epsilons.append( torch.bernoulli(torch.sigmoid(2 * self.optim.state["lambda"])) ) params = self.optim.param_groups[0]["params"] if len(epsilons) == 0: epsilons.append( torch.where( self.optim.state["mu"] <= 0, torch.zeros_like(self.optim.state["mu"]), torch.ones_like(self.optim.state["mu"]), ) ) output_list = [] for epsilon in epsilons: vector_to_parameters(2 * epsilon - 1, params) outputs = self.model(inputs.to(device)) output_list.append(outputs) output_tensor = torch.stack(output_list, dim=2) probs = torch.mean(output_tensor, dim=2) loss = self.criterion(probs, labels.to(device)) _, pred = torch.max(probs, 1) correct = ( pred.eq(labels.to(device).view_as(pred)).sum().item() / labels.shape[0] ) * 100 return loss, correct
def from_vec(self, x): r"""Set the network parameters from a single flattened vector. Args: x (Tensor): A single flattened vector of the network parameters with consistent size. """ vector_to_parameters(vec=x, parameters=self.parameters())
def func(params): old_params = parameters_to_vector(model.parameters()) # print ("old: ", len(old_params), len(list(model.parameters()))) # print (type(params), type(params[0])) if isinstance(params[0], torch.nn.Parameter): # print ("1") if mat == 'A': vector_to_parameters(params[0].view(-1, 1), model.A) #parameters()) elif mat == 'B': vector_to_parameters(params[0].view(-1, 1), model.B) #parameters()) else: # print ("2") if mat == 'A': vector_to_parameters(parameters_to_vector(params[0]), model.A) #parameters()) elif mat == 'B': vector_to_parameters(parameters_to_vector(params[0]), model.B) #parameters()) z = model(ids) f = mat_completion_loss(W, M, z, model.A, model.B, ids) if mat == 'A': tmp_params = [model.A] elif mat == 'B': tmp_params = [model.B] vector_to_parameters(old_params, model.parameters()) return f, z, tmp_params
def SNN_error(self, loader, delta_prime, n_mtcarlo_approx, sample_freq): """ Compute upper bound on the error of the Stochastic neural network by application of Theorem of the sample convergence bound """ samples_errors = 0. snn_error = [] with torch.no_grad(): t = time.time() iter_counter = sample_freq#00 for i in range(n_mtcarlo_approx): vector_to_parameters(self.sample_weights().detach(), self.model.parameters()) samples_errors += test_error(self.model, loader, self.accuracy_loss, self.device) if i == iter_counter: snn_error_intermed = solve_kl_sup(samples_errors/i, (log(2/delta_prime)/i)) plog("Iter {}; SNN error {:.4g}; Took {:.4g}s".format(i, snn_error_intermed, time.time()-t)) snn_error.append(snn_error_intermed) # print("Computational time for {} is {}".format(i, time.time() - t)) iter_counter += sample_freq#00 t = time.time() snn_final_error = solve_kl_sup(samples_errors/n_mtcarlo_approx, (log(2/delta_prime)/n_mtcarlo_approx)) snn_error.append(snn_final_error) return snn_error
def get_mc_predictions(self, forward_function, inputs, ret_numpy=False, raw_noises=None, *args, **kwargs): """Returns Monte Carlo predictions. Arguments: forward_function (callable): The forward function of the model that takes inputs and returns the outputs. inputs (FloatTensor): The inputs to the model. mc_samples (int): The number of Monte Carlo samples. ret_numpy (bool): If true, the returned list contains numpy arrays, otherwise it contains torch tensors. """ # We only support a single parameter group. parameters = self.param_groups[0]['params'] predictions = [] if raw_noises is None: # use the mean value (sign) to make predictions raw_noises = [] mean_vector = torch.where(self.state['mu']<=0,torch.zeros_like(self.state['mu']),torch.ones_like(self.state['mu'])) raw_noises.append(mean_vector) # perform inference using the sign of the mean value when there is no sampling for raw_noise in raw_noises: # Sample a parameter vector: vector_to_parameters(2*raw_noise-1, parameters) # Call the forward computation function outputs = forward_function(inputs, *args, **kwargs) if ret_numpy: outputs = outputs.data.cpu().numpy() predictions.append(outputs) return predictions
def act_nn(obs, weights, actions): model = ModelNes(obs.size, len(actions)) vector_to_parameters(torch.from_numpy(weights).float(), model.parameters()) with torch.no_grad(): q_estimate = model(torch.from_numpy(obs).float()) action_i = np.argmax(q_estimate.data.numpy()) return actions[action_i]
def get_dual_predictions(self, jac_closure, mc_samples=10, ret_jac=False): mu = self.state['mu'] precision = self.state['precision'] parameters = self.param_groups[0]['params'] J_list = [] fxs = [] Jv_list = [] for _ in range(mc_samples): # Sample a parameter vector: raw_noise = torch.normal(mean=torch.zeros_like(mu), std=1.0) p = torch.addcdiv(mu, 1., raw_noise, torch.sqrt(precision)) vector_to_parameters(p, parameters) # Get loss and predictions preds, J = jac_closure() fxs.append(preds) J_list.append(J) # each J in n x p Jv_list.append(J @ p) vector_to_parameters(mu, parameters) fx_hat = torch.mean(torch.stack(fxs), 0).flatten() J_hat = torch.mean(torch.stack(J_list), 0) Jv_hat = torch.mean(torch.stack(Jv_list), 0) mu_pred = fx_hat + J_hat @ mu - Jv_hat std_pred = torch.sqrt( torch.diag(J_hat @ torch.diag(1. / precision) @ J_hat.t())) if ret_jac: return (fx_hat.detach().numpy(), (J_hat @ mu).detach().numpy(), Jv_hat.detach().numpy(), std_pred.detach().numpy()) return mu_pred.detach().numpy(), std_pred.detach().numpy()
def SNN_error(self, loader, delta_prime, n_mtcarlo_approx): """ Compute upper bound on the error of the Stochastic neural network by application of Theorem of the sample convergence bound """ samples_errors = 0. snn_error = [] with torch.no_grad(): t = time.time() iter_counter = 10 #00 for i in range(n_mtcarlo_approx): vector_to_parameters(self.sample_weights().detach(), self.model.parameters()) samples_errors += test_error(loader, self.model, self.device) if i == iter_counter: print("It's {}th Monte-Carlo iteration".format(i)) snn_error_intermed = solve_kl_sup( samples_errors / i, (log(2 / delta_prime) / i)) print("SNN-error is {}".format(snn_error_intermed)) snn_error.append(snn_error_intermed) print("Computational time for {} is {}".format( i, time.time() - t)) iter_counter += 10 #00 snn_final_error = solve_kl_sup( samples_errors / n_mtcarlo_approx, (log(2 / delta_prime) / n_mtcarlo_approx)) snn_error.append(snn_final_error) return snn_error
def trpo_update(replay, policy, baseline): gamma = 0.99 tau = 0.95 max_kl = 0.01 ls_max_steps = 15 backtrack_factor = 0.5 old_policy = deepcopy(policy) for step in range(10): states = replay.state() actions = replay.action() rewards = replay.reward() dones = replay.done() next_states = replay.next_state() returns = ch.td.discount(gamma, rewards, dones) baseline.fit(states, returns) values = baseline(states) next_values = baseline(next_states) # Compute KL with th.no_grad(): old_density = old_policy.density(states) new_density = policy.density(states) kl = kl_divergence(old_density, new_density).mean() # Compute surrogate loss old_log_probs = old_density.log_prob(actions).mean(dim=1, keepdim=True) new_log_probs = new_density.log_prob(actions).mean(dim=1, keepdim=True) bootstraps = values * (1.0 - dones) + next_values * dones advantages = ch.pg.generalized_advantage(gamma, tau, rewards, dones, bootstraps, th.zeros(1)) advantages = ch.normalize(advantages).detach() surr_loss = trpo.policy_loss(new_log_probs, old_log_probs, advantages) # Compute the update grad = autograd.grad(surr_loss, policy.parameters(), retain_graph=True) Fvp = trpo.hessian_vector_product(kl, policy.parameters()) grad = parameters_to_vector(grad).detach() step = trpo.conjugate_gradient(Fvp, grad) lagrange_mult = 0.5 * th.dot(step, Fvp(step)) / max_kl step = step / lagrange_mult step_ = [th.zeros_like(p.data) for p in policy.parameters()] vector_to_parameters(step, step_) step = step_ # Line-search for ls_step in range(ls_max_steps): stepsize = backtrack_factor**ls_step clone = deepcopy(policy) for c, u in zip(clone.parameters(), step): c.data.add_(-stepsize, u.data) new_density = clone.density(states) new_kl = kl_divergence(old_density, new_density).mean() new_log_probs = new_density.log_prob(actions).mean(dim=1, keepdim=True) new_loss = trpo.policy_loss(new_log_probs, old_log_probs, advantages) if new_loss < surr_loss and new_kl < max_kl: for p, c in zip(policy.parameters(), clone.parameters()): p.data[:] = c.data[:] break
def update(): net.train() net.vf_targ.eval() # datas batch = replay_buffer.sample_batch(batch_size) x_ph = torch.from_numpy(batch['obs1']) x2_ph = torch.from_numpy(batch['obs2']) a_ph = torch.from_numpy(batch['acts']) r_ph = torch.from_numpy(batch['rews'][:, np.newaxis]) d_ph = torch.from_numpy(batch['done'][:, np.newaxis]) # computation graph mu, pi, logp_pi = net.apply_policy(x_ph) q1, q2 = net.apply_qf(x_ph, a_ph) q1_pi, q2_pi = net.apply_qf(x_ph, pi) v = net.apply_vf(x_ph) with torch.no_grad(): v_targ = net.apply_vf_targ(x2_ph) # Min Double-Q: min_q_pi = torch.min(q1_pi, q2_pi) # Targets for Q and V regression q_backup = r_ph + gamma * (1 - d_ph) * v_targ.detach() v_backup = (min_q_pi - alpha * logp_pi).detach() # Soft actor-critic losses pi_loss = torch.mean(alpha * logp_pi - q1_pi) q1_loss = 0.5 * criterion_mse(q1, q_backup) q2_loss = 0.5 * criterion_mse(q2, q_backup) v_loss = 0.5 * criterion_mse(v, v_backup) value_loss = q1_loss + q2_loss + v_loss # Policy train optimizer_actor.zero_grad() pi_loss.backward() optimizer_actor.step() # Value train optimizer_critic.zero_grad() value_loss.backward() optimizer_critic.step() # update target network param = parameters_to_vector(net.vf.parameters()) param_targ = parameters_to_vector(net.vf_targ.parameters()) param_targ = polyak * param_targ + (1 - polyak) * param vector_to_parameters(param_targ, net.vf_targ.parameters()) logger.store(LossPi=pi_loss.item(), LossQ1=q1_loss.item(), LossQ2=q2_loss.item(), LossV=v_loss.item(), Q1Vals=q1.detach().numpy(), Q2Vals=q2.detach().numpy(), VVals=value_loss.item(), LogPi=logp_pi.detach().numpy())
def perturb_params(self, src_net, dst_net): params = parameters_to_vector(src_net.parameters()) vector_to_parameters(params, dst_net.parameters()) for m in dst_net.modules(): if self.param_noise_filter_func(m): for param in m.parameters(): param.data += torch.randn_like( param.data) * self.param_noise_scale
def from_vec(self, x): """ Unflatten the given vector as the network parameters. Args: x (Tensor): flattened single vector with size consistent of the number of network paramters. """ vector_to_parameters(vec=x, parameters=self.parameters())
def forward(self, images, labels): self.noise = torch.randn(self.d_size).to(self.device) * torch.exp( self.sigma_posterior_) vector_to_parameters(self.flat_params + self.noise, self.model.parameters()) outputs = self.model(images) # loss = self.criterion(outputs.float(), labels.long()) loss = F.cross_entropy(outputs.float(), labels.long()) return loss
def Fvp_fn(theta): # import time # s = time.time() # theta should be a parameter vector. temp_model = copy.deepcopy(model) vector_to_parameters(theta, temp_model.parameters()) full_inp = [temp_model, inputs, outputs, kl_fn, regu_coef] H = eval_F(*full_inp) return H
def set_and_eval(step): vector_to_parameters(old_params - alpha * x * step, net.actor.parameters()) _, logp, _, _, d_kl = net.apply_actor(x_ph, a_ph, old_logp_or_mu=inputs[-1]) ratio = torch.exp(logp - logp_old_ph) # pi(a|s) / pi_old(a|s) pi_loss = -torch.mean(ratio * adv_ph) return mpi_avg(d_kl.item()), mpi_avg(pi_loss.item())
def conjugate_gradient(Ax, b, num_iterations=10, tol=1e-10, eps=1e-8): """ [[Source]](https://github.com/seba-1511/cherry/blob/master/cherry/algorithms/trpo.py) **Description** Computes \\(x = A^{-1}b\\) using the conjugate gradient algorithm. **Credit** Adapted from Kai Arulkumaran's implementation, with additions inspired from John Schulman's implementation. **References** 1. Nocedal and Wright. 2006. "Numerical Optimization, 2nd edition". Springer. 2. Shewchuk et al. 1994. “An Introduction to the Conjugate Gradient Method without the Agonizing Pain.” CMU. **Arguments** * **Ax** (callable) - Given a vector x, computes A@x. * **b** (tensor or list) - The reference vector. * **num_iterations** (int, *optional*, default=10) - Number of conjugate gradient iterations. * **tol** (float, *optional*, default=1e-10) - Tolerance for proposed solution. * **eps** (float, *optional*, default=1e-8) - Numerical stability constant. **Returns** * **x** (tensor or list) - The solution to Ax = b, as a list if b is a list else a tensor. **Example** ~~~python pass ~~~ """ shape = None if not isinstance(b, th.Tensor): shape = [th.zeros_like(b_i) for b_i in b] b = parameters_to_vector(b) x = th.zeros_like(b) r = b p = r r_dot_old = th.dot(r, r) for _ in range(num_iterations): Ap = Ax(p) alpha = r_dot_old / (th.dot(p, Ap) + eps) x += alpha * p r -= alpha * Ap r_dot_new = th.dot(r, r) p = r + (r_dot_new / r_dot_old) * p r_dot_old = r_dot_new if r_dot_new.item() < tol: break if shape is not None: vector_to_parameters(x, shape) x = shape return x
def fitness(batch, model, params, val=False): vector_to_parameters(params, model.parameters()) model.set_decode_type('greedy') model.eval() with torch.no_grad(): length, _ = model(batch) return length.mean()
def eval_f(vparams): vparams = torch.tensor(vparams).to(torch.get_default_dtype()) vector_to_parameters(vparams, params) with torch.no_grad(): loss = criterion(model, criterion_x, **crit_kwargs) vector_to_parameters(vparams0, params) return loss.detach().numpy()
def save(model, params, history, savedir, start, epoch, check=True): vector_to_parameters(params, model.parameters()) if check: torch.save(model,'{}/epoch{}-evo-model.pt'.format(savedir,epoch)) else: hr_time = int(round((time()-start)/3600)) torch.save(model,'{}/{}hr-evo-model.pt'.format(savedir,hr_time)) with open(f'{savedir}/fitness_history_{hr_time}.pickle', 'wb') as f: pickle.dump(history, f, protocol=pickle.HIGHEST_PROTOCOL)
def surrogate_loss(self, theta): """ Returns the surrogate loss w.r.t. the given parameter vector theta (-> float) """ old_theta = parameters_to_vector(self.policy_net.parameters()) prob_old = self.policy_net(self.observations_tensor).gather(1, self.actions).data vector_to_parameters(theta, self.policy_net.parameters()) prob_new = self.policy_net(self.observations_tensor).gather(1, self.actions).data vector_to_parameters(old_theta, self.policy_net.parameters()) return -torch.mean((prob_new / (prob_old + eps)) * self.advantages)
def f_barrier(params, all_obs=all_obs, all_acts=all_acts, all_advs=all_advs): vector_to_parameters(params, policy.parameters()) new_dists = policy(all_obs) new_logp = new_dists.log_prob(all_acts) surr_loss = -((new_logp - old_logp).exp() * all_advs).mean() avg_kl = kl(old_dists, new_dists).mean().item() return surr_loss.item() if avg_kl < delta else float("inf")
def eval_f(vparams): vparams = torch.tensor(vparams) vparams_ = parameters_to_vector(params) vector_to_parameters(vparams, params) with torch.no_grad(): obj = objfun() vector_to_parameters(vparams_, params) return obj.detach().numpy()
def update(policy_update): net.train() net_targ.eval() # datas batch = replay_buffer.sample_batch(batch_size) x_ph = torch.from_numpy(batch['obs1']) x2_ph = torch.from_numpy(batch['obs2']) a_ph = torch.from_numpy(batch['acts']) r_ph = torch.from_numpy(batch['rews'][:, np.newaxis]) d_ph = torch.from_numpy(batch['done'][:, np.newaxis]) # Q-learning update q1, q2 = net.apply_critic(x_ph, a_ph) # compute q target with torch.no_grad(): pi_targ = net_targ.act_limit * net_targ.actor(x2_ph) epsilon = torch.randn_like(pi_targ) * target_noise epsilon = torch.clamp(epsilon, -noise_clip, noise_clip) a2 = torch.clamp(pi_targ + epsilon, -net.act_limit, net.act_limit) with torch.no_grad(): q1_targ, q2_targ = net_targ.apply_critic(x2_ph, a2) min_q_targ = torch.min(q1_targ, q2_targ) backup = r_ph + gamma * (1 - d_ph) * min_q_targ.detach() q_loss = criterion_mse(q1, backup) + criterion_mse(q2, backup) # update optimizer_critic.zero_grad() q_loss.backward() optimizer_critic.step() logger.store(LossQ=q_loss.item(), Q1Vals=q1.detach().numpy(), Q2Vals=q2.detach().numpy()) if policy_update: # Policy update q_pi = net(x_ph) pi_loss = -torch.mean(q_pi) optimizer_actor.zero_grad() pi_loss.backward() optimizer_actor.step() logger.store(LossPi=pi_loss.item()) # update target network param = parameters_to_vector(net.parameters()) param_targ = parameters_to_vector(net_targ.parameters()) param_targ = polyak * param_targ + (1 - polyak) * param vector_to_parameters(param_targ, net_targ.parameters())