def forward(self, inputs, length, initial_inputs=None, static_inputs=None, initial_seq_inputs={}): """ :param inputs: These are sliced by time. Time is the second dimension :param length: Rollout length :param initial_inputs: These are not sliced and are overridden by cell output :param initial_seq_inputs: These can contain partial sequences. Cell output is used after these end. :param static_inputs: These are not sliced and can't be overridden by cell output :return: """ # NOTE! Unrolling the cell directly will result in crash as the hidden state is not being reset # Use this function or CustomLSTMCell.unroll if needed initial_inputs, static_inputs = self.assert_begin( inputs, initial_inputs, static_inputs) step_inputs = initial_inputs.copy() step_inputs.update(static_inputs) lstm_outputs = [] for t in range(length): step_inputs.update(map_dict(lambda x: x[:, t], inputs)) # Slicing step_inputs.update( map_dict( lambda x: x[:, t], filter_dict(lambda x: t < x[1].shape[1], initial_seq_inputs))) output = self.cell(**step_inputs) self.assert_post(output, inputs, initial_inputs, static_inputs) # TODO Test what signature does with *args autoregressive_output = subdict( output, output.keys() & signature(self.cell.forward).parameters) step_inputs.update(autoregressive_output) lstm_outputs.append(output) lstm_outputs = rmap_list(lambda *x: stack(x, dim=1), lstm_outputs) self.cell.reset() return lstm_outputs
def recursive_map(tensors): if tensors is None: return tensors elif isinstance(tensors[0], list) or isinstance(tensors[0], tuple): return type(tensors[0])(map(recursive_map, zip(*tensors))) elif isinstance(tensors[0], dict): return map_dict(recursive_map, listdict2dictlist(tensors)) elif isinstance(tensors[0], TENSOR): return fn(*tensors) elif hasattr(tensors[0], 'to_dict'): old_type = type(tensors[0]) tensors = type(tensors)(map(lambda x: x.to_dict(), tensors)) return old_type(**map_dict(recursive_map, listdict2dictlist(tensors))) else: try: return fn(*tensors) except Exception as e: print("The following error was raised when recursively applying a function:") print(e) raise ValueError("Type {} not supported for recursive map".format(type(tensors)))
def cat(*argv): tree = SubgoalTreeLayer() for attr, val in argv[0].__dict__.items(): if val is None or np.isscalar(val): tree.__dict__[attr] = val elif attr == 'subgoals': tree.__dict__[attr] = map_dict(concat, listdict2dictlist([d.subgoals for d in argv])) elif attr == 'child_layer': tree.__dict__[attr] = SubgoalTreeLayer.cat(*[d.child_layer for d in argv]) else: raise ValueError("Cannot handle data type {} during tree concatenation!".format(type(val))) return tree
def num_parameters(model, level=0): """ Returns the number of parameters used in a module. Known bug: if some of the submodules are repeated, their parameters will be double counted :param model: :param level: if level==1, returns a dictionary of submodule names and corresponding parameter counts :return: """ if level == 0 or len(model.named_children()) == 0: return sum([p.numel() for p in model.parameters()]) else: return map_dict(lambda x: num_parameters(x, level - 1), dict(model.named_children()))
def recursive_map(tensors): if isinstance(tensors, target_class): return fn(tensors, *argv, **kwargs) elif tensors is None: return tensors elif isinstance(tensors, list) or isinstance(tensors, tuple): return type(tensors)(map(recursive_map, tensors)) elif isinstance(tensors, dict): return type(tensors)(map_dict(recursive_map, tensors)) elif hasattr(tensors, 'to_dict'): return type(tensors)(**map_dict(recursive_map, tensors.to_dict())) else: # Misc elements - neither collections nor targets if only_target: return tensors try: assert not strict return fn(tensors, *argv, **kwargs) except Exception as e: print("The following error was raised when recursively applying a function:") print(e) raise ValueError("Type {} not supported for recursive map".format(type(tensors)))
def val(self, test_control=True): print('Running Testing') if self.cmd_args.test_prediction: start = time.time() losses_meter = RecursiveAverageMeter() infer_time = AverageMeter() # self.model.eval() with autograd.no_grad(): for batch_idx, sample_batched in enumerate(self.val_loader): inputs = AttrDict( map_dict(self.try_move_to_dev, sample_batched)) with self.model.val_mode(pred_length=False): infer_start = time.time() output = self.model(inputs, 'test') infer_time.update(time.time() - infer_start) if self.evaluator is not None: # force eval on all batches for reduced noise self.evaluator.eval(inputs, output, self.model) # run train model to get NLL on validation data output_train_mdl = self.model(inputs) losses = self.model.loss(inputs, output_train_mdl) losses.total = self.model.get_total_loss(inputs, losses) losses_meter.update(losses) del losses del output_train_mdl # if batch_idx == 0: # break if not self.cmd_args.dont_save: if self.evaluator is not None: self.evaluator.dump_results(self.global_step) if self.cmd_args.metric: print("Finished Evaluation! Exiting...") exit(0) self.model.log_outputs(output, inputs, losses_meter.avg, self.global_step, log_images=self.cmd_args.log_images, phase='val') print(( '\nTest set: Average loss: {:.4f} in {:.2f}s\n'.format( losses_meter.avg.total.value.item(), time.time() - start))) if self.cmd_args.verbose_timing: print("avg Inference time: {:.3f}s/batch".format( infer_time.avg)) del output
def sample_max_len_video(self, data_dict, start_ind, end_ind): """ This function processes data tensors so as to have length equal to max_seq_len by sampling / padding if necessary """ extra_length = (end_ind - start_ind + 1) - self.spec['max_seq_len'] if self.phase == 'train': offset = max(0, int(np.random.rand() * (extra_length + 1))) + start_ind else: offset = 0 data_dict = map_dict(lambda tensor: self._maybe_pad(tensor, offset, self.spec['max_seq_len']), data_dict) if 'actions' in data_dict: data_dict.actions = data_dict.actions[:-1] end_ind = min(end_ind - offset, self.spec['max_seq_len'] - 1) return 0, end_ind, data_dict # start index gets 0 by design
def train_epoch(self, epoch): self.model.train() epoch_len = len(self.train_loader) end = time.time() batch_time = AverageMeter() upto_log_time = AverageMeter() data_load_time = AverageMeter() forward_backward_time = AverageMeter() self.log_images_interval = int(epoch_len / self.cmd_args.imepoch) print('starting epoch ', epoch) for self.batch_idx, sample_batched in enumerate(self.train_loader): data_load_time.update(time.time() - end) inputs = AttrDict(map_dict(self.try_move_to_dev, sample_batched)) with self.training_context(): self.optimizer.zero_grad() start_fw_bw = time.time() output = self.model(inputs) losses = self.model.loss(inputs, output) losses.total = self.model.get_total_loss(inputs, losses) losses.total.value.backward() self.call_hooks(inputs, output, losses, epoch) self.optimizer.step() self.model.step() forward_backward_time.update(time.time() - start_fw_bw) if self.cmd_args.train_loop_pdb: import pdb pdb.set_trace() upto_log_time.update(time.time() - end) if self.log_outputs_now and not self.cmd_args.dont_save: self.model.log_outputs(output, inputs, losses, self.global_step, log_images=self.log_images_now, phase='train') batch_time.update(time.time() - end) end = time.time() if self.log_outputs_now: print('GPU {}: {}'.format( os.environ["CUDA_VISIBLE_DEVICES"] if self.use_cuda else 'none', self._hp.exp_path)) print( ('itr: {} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'. format(self.global_step, epoch, self.batch_idx, len(self.train_loader), 100. * self.batch_idx / len(self.train_loader), losses.total.value.item()))) print( 'avg time for loading: {:.2f}s, logs: {:.2f}s, compute: {:.2f}s, total: {:.2f}s' .format(data_load_time.avg, batch_time.avg - upto_log_time.avg, upto_log_time.avg - data_load_time.avg, batch_time.avg)) togo_train_time = batch_time.avg * (self._hp.num_epochs - epoch) * epoch_len / 3600. print('ETA: {:.2f}h'.format(togo_train_time)) if self.cmd_args.verbose_timing: print("avg FW/BW time: {:.3f}s/batch".format( forward_backward_time.avg)) del output, losses self.global_step = self.global_step + 1