def fit_supervised(self, train_data: Datasplit): assert not self.args.sm_component_model assert not self.args.sm_constrain_transitions loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=False, batch_size=1) features, labels = [], [] for batch in loader: features.append(batch['features'].squeeze(0)) labels.append(batch['gt_single'].squeeze(0)) self.model.fit_supervised(features, labels)
def predict(self, test_data: Datasplit): predictions = {} loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1) for batch in loader: features = batch['features'].squeeze(0) num_timesteps = features.size(0) tasks = batch['task_name'] assert len(tasks) == 1 task = next(iter(tasks)) videos = batch['video_name'] assert len(videos) == 1 video = next(iter(videos)) predictions[video] = self.predict_single(task, num_timesteps) return predictions
def fit(self, train_data: Datasplit, use_labels: bool, callback_fn=None): assert use_labels IGNORE = -100 loss = nn.CrossEntropyLoss(ignore_index=IGNORE) optimizer, scheduler = make_optimizer(self.args, self.model.parameters()) loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=True, batch_size=self.args.batch_size) for epoch in range(self.args.epochs): # call here since we may set eval in callback_fn self.model.train() losses = [] assert self.args.batch_accumulation <= 1 for batch in tqdm.tqdm(loader, ncols=80): # for batch in loader: tasks = batch['task_name'] videos = batch['video_name'] features = batch['features'] gt_single = batch['gt_single'] task_indices = batch['task_indices'] max_len = features.size(1) lengths = batch['lengths'] invalid_mask = torch.arange(max_len).expand(len(lengths), max_len) >= lengths.unsqueeze(1) if self.args.cuda: features = features.cuda() lengths = lengths.cuda() task_indices = [indx.cuda() for indx in task_indices] gt_single = gt_single.cuda() invalid_mask = invalid_mask.cuda() gt_single.masked_fill_(invalid_mask, IGNORE) # batch_size x max_len x num_classes logits = self.model(features, lengths, valid_classes_per_instance=task_indices) this_loss = loss(logits.view(-1, logits.size(-1)), gt_single.flatten()) losses.append(this_loss.item()) this_loss.backward() optimizer.step() self.model.zero_grad() train_loss = np.mean(losses) if scheduler is not None: scheduler.step(train_loss) callback_fn(epoch, {'train_loss': train_loss})
def predict(self, test_data: Datasplit): self.model.eval() predictions = {} loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1) for batch in loader: features = batch['features'] lengths = batch['lengths'] task_indices = batch['task_indices'] if self.args.cuda: features = features.cuda() lengths = lengths.cuda() task_indices = [indx.cuda() for indx in task_indices] videos = batch['video_name'] assert all_equal(videos) video = next(iter(videos)) # batch_size x length x num_classes with torch.no_grad(): logits = self.model(features, lengths, valid_classes_per_instance=task_indices) preds = logits.max(dim=-1)[1] preds = preds.squeeze(0) assert preds.ndim == 1 predictions[video] = preds.detach().cpu().numpy() return predictions
def predict(self, test_data: Datasplit): predictions = {} loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1) for batch in loader: features = batch['features'].squeeze(0) num_timesteps = features.size(0) tasks = batch['task_name'] assert len(tasks) == 1 task = next(iter(tasks)) videos = batch['video_name'] assert len(videos) == 1 video = next(iter(videos)) # constraints: T x K constraints = batch['constraints'].squeeze(0) assert constraints.size(0) == num_timesteps step_indices = self.ordered_nonbackground_indices_by_task[task] background_indices = self.background_indices_by_task[task] active_step = constraints.argmax(dim=1) active_step.apply_(lambda ix: step_indices[ix]) if not test_data.remove_background: active_step[constraints.sum(dim=1) == 0] = background_indices[0] predictions[video] = active_step.cpu().numpy() else: preds = active_step.cpu().numpy() zero_indices = (constraints.sum(dim=1) == 0).nonzero().flatten() baseline_preds = self.canonical.predict_single(task, num_timesteps) for ix in zero_indices: preds[ix] = baseline_preds[ix] predictions[video] = preds # just arbitrarily choose a background index, they will get canonicalized anyway return predictions
def predict(self, test_data): self.model.eval() self.model.flatten_parameters() predictions = {} loader = make_data_loader(self.args, test_data, shuffle=False, batch_by_task=True, batch_size=self.args.batch_size) # print('{} videos in prediction data'.format(len(loader.dataset))) # for batch in tqdm.tqdm(loader, ncols=80): for batch in loader: features = batch['features'] task_indices = batch['task_indices'] lengths = batch['lengths'] # add a batch dimension # lengths = torch.LongTensor([features.size(0)]).unsqueeze(0) # features = features.unsqueeze(0) # task_indices = task_indices.unsqueeze(0) videos = batch['video_name'] tasks = batch['task_name'] assert len(set(tasks)) == 1 task = next(iter(tasks)) if 'test' in self.args.sm_constrain_with_narration: assert all_equal(tasks) constraints_expanded = self.expand_constraints( test_data, task, task_indices[0], 1 - batch['constraints'] ) constraints_expanded *= self.args.sm_constrain_narration_weight else: constraints_expanded = None if self.args.cuda: features = features.cuda() task_indices = [ti.cuda() for ti in task_indices] lengths = lengths.cuda() if constraints_expanded is not None: constraints_expanded = constraints_expanded.cuda() addl_allowed_ends = self.make_additional_allowed_ends(tasks, lengths) def predict(constraints): # TODO: figure out under which eval conditions use_mean_z should be False pred_spans, elp = self.model.viterbi(features, lengths, task_indices, add_eos=True, use_mean_z=True, additional_allowed_ends_per_instance=addl_allowed_ends, constraints=constraints, return_elp=True) pred_labels = semimarkov_utils.spans_to_labels(pred_spans) # if self.args.sm_predict_single: # # pred_spans: batch_size x T # pred_labels_single = torch.zeros_like(pred_labels) # for i in pred_labels.size(0): # for lab in torch.unique(pred_labels[i,:lengths[i]]): # #emission_scores: b x N x C # pred_labels # pass # if self.args.sm_constrain_transitions: # all_pred_span_indices = [ # [ix for ix, count in this_rle_spans] # for this_rle_spans in semimarkov_utils.rle_spans(pred_spans, lengths) # ] # for i, indices in enumerate(all_pred_span_indices): # remove_cons_dups = [ix for ix, group in itertools.groupby(indices) # if not ix in test_data.corpus._background_indices] # non_bg_indices = [ # ix for ix in test_data.corpus.indices_by_task(task) # if ix not in test_data.corpus._background_indices # ] # if len(remove_cons_dups) != len(non_bg_indices) and lengths[i].item() != len(remove_cons_dups): # print("deduped: {}, indices: {}, length {}".format( # remove_cons_dups, non_bg_indices, lengths[i].item() # )) # # assert lengths[i].item() < len(non_bg_indices) pred_labels_trim_s = self.model.trim(pred_labels, lengths, check_eos=True) return pred_labels_trim_s pred_labels_trim_s = predict(constraints_expanded) # assert len(pred_labels_trim_s) == 1, "batch size should be 1" for ix, (video, pred_labels_trim) in enumerate(zip(videos, pred_labels_trim_s)): preds = pred_labels_trim.numpy() predictions[video] = preds # if constraints_expanded is not None: # this_cons = batch['constraints'][ix] # if this_cons.sum() > 0: # step_indices = test_data.get_ordered_indices_no_background()[task] # for t, label in enumerate(preds): # if label in step_indices: # label_ix = step_indices.index(label) # assert batch['constraints'][ix,t,label_ix] == 1 assert self.model.n_classes not in predictions[video], "predictions should not contain EOS: {}".format( predictions[video]) return predictions
def fit(self, train_data: Datasplit, use_labels: bool, callback_fn=None): self.model.train() self.model.flatten_parameters() if use_labels: assert not self.args.sm_constrain_transitions initialize = True if use_labels and self.args.sm_supervised_method in ['closed-form', 'closed-then-gradient']: self.fit_supervised(train_data) if self.args.sm_supervised_method == 'closed-then-gradient': initialize = False callback_fn(-1, {}) else: return if self.args.sm_init_non_projection_parameters_from: initialize = False if callback_fn: callback_fn(-1, {}) optimizer, scheduler = make_optimizer(self.args, self.model.parameters()) big_loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=True, batch_size=100) samp = next(iter(big_loader)) big_features = samp['features'] big_lengths = samp['lengths'] if self.args.cuda: big_features = big_features.cuda() big_lengths = big_lengths.cuda() if initialize: self.model.initialize_gaussian(big_features, big_lengths) loader = make_data_loader(self.args, train_data, batch_by_task=True, shuffle=True, batch_size=self.args.batch_size) # print('{} videos in training data'.format(len(loader.dataset))) # all_features = [sample['features'] for batch in loader for sample in batch] # if self.args.cuda: # all_features = [feats.cuda() for feats in all_features] C = self.n_classes K = self.args.sm_max_span_length for epoch in range(self.args.epochs): start_time = time.time() # call here since we may set eval in callback_fn self.model.train() losses = [] multi_batch_losses = [] nlls = [] kls = [] log_dets = [] num_frames = 0 num_videos = 0 train_nll = 0 train_kl = 0 train_log_det = 0 # for batch_ix, batch in enumerate(tqdm.tqdm(loader, ncols=80)): for batch_ix, batch in enumerate(loader): if self.args.train_limit and batch_ix >= self.args.train_limit: break # if self.args.cuda: # features = features.cuda() # task_indices = task_indices.cuda() # gt_single = gt_single.cuda() tasks = batch['task_name'] videos = batch['video_name'] features = batch['features'] task_indices = batch['task_indices'] lengths = batch['lengths'] if 'train' in self.args.sm_constrain_with_narration: assert all_equal(tasks) constraints_expanded = self.expand_constraints( train_data, tasks[0], task_indices[0], 1 - batch['constraints'] ) constraints_expanded *= self.args.sm_constrain_narration_weight else: constraints_expanded = None num_frames += lengths.sum().item() num_videos += len(lengths) # assert len( task_indices) == self.n_classes, "remove_background and multi-task fit() not implemented" if self.args.cuda: features = features.cuda() lengths = lengths.cuda() if constraints_expanded is not None: constraints_expanded = constraints_expanded.cuda() if use_labels: labels = batch['gt_single'] if self.args.cuda: labels = labels.cuda() spans = semimarkov_utils.labels_to_spans(labels, max_k=K) use_mean_z = True else: spans = None use_mean_z = False addl_allowed_ends = self.make_additional_allowed_ends(tasks, lengths) ll, log_det = self.model.log_likelihood(features, lengths, valid_classes_per_instance=task_indices, spans=spans, add_eos=True, use_mean_z=use_mean_z, additional_allowed_ends_per_instance=addl_allowed_ends, constraints=constraints_expanded) nll = -ll kl = self.model.kl.mean() if use_labels: this_loss = nll - log_det else: this_loss = nll - log_det + kl multi_batch_losses.append(this_loss) nlls.append(nll.item()) kls.append(kl.item()) log_dets.append(log_det.item()) train_nll += (nll.item() * len(videos)) train_kl += (kl.item() * len(videos)) train_log_det += (log_det.item() * len(videos)) losses.append(this_loss.item()) if len(multi_batch_losses) >= self.args.batch_accumulation: loss = sum(multi_batch_losses) / len(multi_batch_losses) loss.backward() multi_batch_losses = [] if self.args.print_every and (batch_ix % self.args.print_every == 0): param_norm = sum([p.norm()**2 for p in self.model.parameters() if p.requires_grad]).item()**0.5 gparam_norm = sum([p.grad.norm()**2 for p in self.model.parameters() if p.requires_grad and p.grad is not None]).item()**0.5 log_str = 'Epoch: %02d, Batch: %03d/%03d, |Param|: %.6f, |GParam|: %.2f, lr: %.2E, ' + \ 'loss: %.4f, recon: %.4f, kl: %.4f, log_det: %.4f, recon_bound: %.2f, Throughput: %.2f vid / sec' print(log_str % (epoch, batch_ix, len(loader), param_norm, gparam_norm, optimizer.param_groups[0]["lr"], (train_nll + train_kl + train_log_det) / num_videos, # loss train_nll / num_frames, # recon train_kl / num_frames, # kl train_log_det / num_videos, # log_det (train_nll + train_kl) / num_frames, # recon_bound num_videos / (time.time() - start_time))) # Throughput if self.args.max_grad_norm is not None: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm) optimizer.step() self.model.zero_grad() train_loss = np.mean(losses) if scheduler is not None: scheduler.step(train_loss) callback_fn(epoch, {'train_loss': train_loss, 'train_nll_frame_avg': train_nll / num_frames, 'train_kl_vid_avg': train_kl / num_videos, 'train_recon_bound': (train_nll + train_kl) / num_frames})