def training_step(self, x=None, y=None, loss=None, retain_graph=False, global_net=None): '''Takes a single training step: one forward and one backwards pass''' self.train() self.zero_grad() self.optim.zero_grad() if loss is None: out = self(x) loss = self.loss_fn(out, y) assert not torch.isnan(loss).any(), loss if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self.conv_model) loss.backward(retain_graph=retain_graph) if self.clip_grad: logger.debug(f'Clipping gradient: {self.clip_grad_val}') torch.nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) if global_net is None: self.optim.step() else: # distributed training with global net net_util.push_global_grad(self, global_net) self.optim.step() net_util.pull_global_param(self, global_net) if net_util.to_assert_trained(): assert_trained(self.conv_model, loss) logger.debug(f'Net training_step loss: {loss}') return loss
def training_step(self, xs=None, ys=None, loss=None, retain_graph=False, lr_clock=None): ''' Takes a single training step: one forward and one backwards pass. Both x and y are lists of the same length, one x and y per environment ''' self.lr_scheduler.step(epoch=ps.get(lr_clock, 'total_t')) self.train() self.optim.zero_grad() if loss is None: outs = self(xs) total_loss = torch.tensor(0.0, device=self.device) for out, y in zip(outs, ys): loss = self.loss_fn(out, y) total_loss += loss loss = total_loss assert not torch.isnan(loss).any(), loss if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self) loss.backward(retain_graph=retain_graph) if self.clip_grad_val is not None: nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) self.optim.step() if net_util.to_assert_trained(): assert_trained(self, loss) self.store_grad_norms() logger.debug(f'Net training_step loss: {loss}') return loss
def training_step(self, xs=None, ys=None, loss=None, retain_graph=False, global_net=None): ''' Takes a single training step: one forward and one backwards pass. Both x and y are lists of the same length, one x and y per environment ''' self.train() self.zero_grad() self.optim.zero_grad() if loss is None: outs = self(xs) total_loss = torch.tensor(0.0, device=self.device) for out, y in zip(outs, ys): loss = self.loss_fn(out, y) total_loss += loss loss = total_loss assert not torch.isnan(loss).any(), loss if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self.model_body) loss.backward(retain_graph=retain_graph) if self.clip_grad: logger.debug(f'Clipping gradient: {self.clip_grad_val}') torch.nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) if global_net is None: self.optim.step() else: # distributed training with global net net_util.push_global_grad(self, global_net) self.optim.step() net_util.pull_global_param(self, global_net) if net_util.to_assert_trained(): assert_trained(self.model_body, loss) logger.debug(f'Net training_step loss: {loss}') return loss
def training_step(self, x=None, y=None, loss=None, retain_graph=False, lr_clock=None): ''' Takes a single training step: one forward and one backwards pass More most RL usage, we have custom, often complicated, loss functions. Compute its value and put it in a pytorch tensor then pass it in as loss ''' if hasattr(self, 'model_tails') and x is not None: raise ValueError( 'Loss computation from x,y not supported for multitails') self.lr_scheduler.step(epoch=ps.get(lr_clock, 'total_t')) self.train() self.optim.zero_grad() if loss is None: out = self(x) loss = self.loss_fn(out, y) assert not torch.isnan(loss).any(), loss if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self) loss.backward(retain_graph=retain_graph) if self.clip_grad_val is not None: nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) self.optim.step() if net_util.to_assert_trained(): assert_trained(self, loss) self.store_grad_norms() logger.debug(f'Net training_step loss: {loss}') return loss
def training_step(self, x=None, y=None, loss=None, retain_graph=False, global_net=None): ''' Takes a single training step: one forward and one backwards pass More most RL usage, we have custom, often complicated, loss functions. Compute its value and put it in a pytorch tensor then pass it in as loss ''' self.train() self.zero_grad() self.optim.zero_grad() if loss is None: out = self(x) loss = self.loss_fn(out, y) assert not torch.isnan(loss).any(), loss if net_util.to_assert_trained(): # to accommodate split model in inherited classes model = getattr(self, 'model', None) or getattr(self, 'model_body') assert_trained = net_util.gen_assert_trained(model) loss.backward(retain_graph=retain_graph) if self.clip_grad: logger.debug(f'Clipping gradient: {self.clip_grad_val}') torch.nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) if global_net is None: self.optim.step() else: # distributed training with global net net_util.push_global_grad(self, global_net) self.optim.step() net_util.pull_global_param(self, global_net) if net_util.to_assert_trained(): model = getattr(self, 'model', None) or getattr(self, 'model_body') assert_trained(model, loss) logger.debug(f'Net training_step loss: {loss}') return loss
def training_step(self, xs=None, ys=None, loss=None, retain_graph=False): ''' Takes a single training step: one forward and one backwards pass. Both x and y are lists of the same length, one x and y per environment ''' self.train() self.zero_grad() self.optim.zero_grad() if loss is None: outs = self(xs) total_loss = torch.tensor(0.0) for out, y in zip(outs, ys): loss = self.loss_fn(out, y) total_loss += loss.cpu() assert not torch.isnan(total_loss).any() if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self.model_body) total_loss.backward(retain_graph=retain_graph) if self.clip_grad: logger.debug(f'Clipping gradient') torch.nn.utils.clip_grad_norm(self.parameters(), self.clip_grad_val) self.optim.step() if net_util.to_assert_trained(): assert_trained(self.model_body) return total_loss
def training_step(self, x=None, y=None, loss=None, retain_graph=False): '''Takes a single training step: one forward and one backwards pass''' self.train() self.zero_grad() self.optim.zero_grad() if loss is None: out = self(x) loss = self.loss_fn(out, y) assert not torch.isnan(loss).any() if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self.conv_model) loss.backward(retain_graph=retain_graph) if self.clip_grad: logger.debug(f'Clipping gradient') torch.nn.utils.clip_grad_norm(self.parameters(), self.clip_grad_val) self.optim.step() if net_util.to_assert_trained(): assert_trained(self.conv_model) return loss
def training_step(self, x=None, y=None, loss=None, retain_graph=False): '''Takes a single training step: one forward and one backwards pass''' self.train() self.zero_grad() self.optim.zero_grad() if loss is None: out = self(x) loss = self.loss_fn(out, y) assert not torch.isnan(loss).any() if net_util.to_assert_trained(): assert_trained = net_util.gen_assert_trained(self.rnn_model) loss.backward(retain_graph=retain_graph) if self.clip_grad: logger.debug(f'Clipping gradient') torch.nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_val) self.optim.step() if net_util.to_assert_trained(): assert_trained(self.rnn_model) return loss
def run_trial_test_dist(spec_file, spec_name=False): spec = spec_util.get(spec_file, spec_name) spec = spec_util.override_test_spec(spec) info_space = InfoSpace() info_space.tick('trial') spec['meta']['distributed'] = True spec['meta']['max_session'] = 2 trial = Trial(spec, info_space) # manually run the logic to obtain global nets for testing to ensure global net gets updated global_nets = trial.init_global_nets() # only test first network if ps.is_list(global_nets): # multiagent only test first net = list(global_nets[0].values())[0] else: net = list(global_nets.values())[0] assert_trained = net_util.gen_assert_trained(net) session_datas = trial.parallelize_sessions(global_nets) assert_trained(net, loss=1.0) trial.session_data_dict = {data.index[0]: data for data in session_datas} trial_data = analysis.analyze_trial(trial) trial.close() assert isinstance(trial_data, pd.DataFrame)
def test_training_step(): assert_trained = net_util.gen_assert_trained(net) y = torch.rand((batch_size, out_dim)) loss = net.training_step(x=x, y=y) assert loss != 0.0 assert_trained(net, loss)