Example #1
0
 def fit_supervised(self, train_data: Datasplit):
     assert not self.args.sm_component_model
     assert not self.args.sm_constrain_transitions
     loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=False, batch_size=1)
     features, labels = [], []
     for batch in loader:
         features.append(batch['features'].squeeze(0))
         labels.append(batch['gt_single'].squeeze(0))
     self.model.fit_supervised(features, labels)
Example #2
0
    def predict(self, test_data: Datasplit):
        predictions = {}
        loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1)

        for batch in loader:
            features = batch['features'].squeeze(0)
            num_timesteps = features.size(0)

            tasks = batch['task_name']
            assert len(tasks) == 1
            task = next(iter(tasks))
            videos = batch['video_name']
            assert len(videos) == 1
            video = next(iter(videos))

            predictions[video] = self.predict_single(task, num_timesteps)
        return predictions
Example #3
0
    def fit(self, train_data: Datasplit, use_labels: bool, callback_fn=None):
        assert use_labels
        IGNORE = -100
        loss = nn.CrossEntropyLoss(ignore_index=IGNORE)
        optimizer, scheduler = make_optimizer(self.args, self.model.parameters())
        loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=True, batch_size=self.args.batch_size)

        for epoch in range(self.args.epochs):
            # call here since we may set eval in callback_fn
            self.model.train()
            losses = []
            assert self.args.batch_accumulation <= 1
            for batch in tqdm.tqdm(loader, ncols=80):
            # for batch in loader:
                tasks = batch['task_name']
                videos = batch['video_name']
                features = batch['features']
                gt_single = batch['gt_single']
                task_indices = batch['task_indices']
                max_len = features.size(1)
                lengths = batch['lengths']
                invalid_mask = torch.arange(max_len).expand(len(lengths), max_len) >= lengths.unsqueeze(1)
                if self.args.cuda:
                    features = features.cuda()
                    lengths = lengths.cuda()
                    task_indices = [indx.cuda() for indx in task_indices]
                    gt_single = gt_single.cuda()
                    invalid_mask = invalid_mask.cuda()
                gt_single.masked_fill_(invalid_mask, IGNORE)
                # batch_size x max_len x num_classes
                logits = self.model(features, lengths, valid_classes_per_instance=task_indices)

                this_loss = loss(logits.view(-1, logits.size(-1)), gt_single.flatten())
                losses.append(this_loss.item())
                this_loss.backward()

                optimizer.step()
                self.model.zero_grad()
            train_loss = np.mean(losses)
            if scheduler is not None:
                scheduler.step(train_loss)
            callback_fn(epoch, {'train_loss': train_loss})
Example #4
0
 def predict(self, test_data: Datasplit):
     self.model.eval()
     predictions = {}
     loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1)
     for batch in loader:
         features = batch['features']
         lengths = batch['lengths']
         task_indices = batch['task_indices']
         if self.args.cuda:
             features = features.cuda()
             lengths = lengths.cuda()
             task_indices = [indx.cuda() for indx in task_indices]
         videos = batch['video_name']
         assert all_equal(videos)
         video = next(iter(videos))
         # batch_size x length x num_classes
         with torch.no_grad():
             logits = self.model(features, lengths, valid_classes_per_instance=task_indices)
             preds = logits.max(dim=-1)[1]
             preds = preds.squeeze(0)
             assert preds.ndim == 1
             predictions[video] = preds.detach().cpu().numpy()
     return predictions
Example #5
0
    def predict(self, test_data: Datasplit):
        predictions = {}
        loader = make_data_loader(self.args, test_data, batch_by_task=False, shuffle=False, batch_size=1)


        for batch in loader:
            features = batch['features'].squeeze(0)
            num_timesteps = features.size(0)

            tasks = batch['task_name']
            assert len(tasks) == 1
            task = next(iter(tasks))
            videos = batch['video_name']
            assert len(videos) == 1
            video = next(iter(videos))

            # constraints: T x K
            constraints = batch['constraints'].squeeze(0)
            assert constraints.size(0) == num_timesteps

            step_indices = self.ordered_nonbackground_indices_by_task[task]
            background_indices = self.background_indices_by_task[task]

            active_step = constraints.argmax(dim=1)
            active_step.apply_(lambda ix: step_indices[ix])
            if not test_data.remove_background:
                active_step[constraints.sum(dim=1) == 0] = background_indices[0]
                predictions[video] = active_step.cpu().numpy()
            else:
                preds = active_step.cpu().numpy()
                zero_indices = (constraints.sum(dim=1) == 0).nonzero().flatten()
                baseline_preds = self.canonical.predict_single(task, num_timesteps)
                for ix in zero_indices:
                    preds[ix] = baseline_preds[ix]
                predictions[video] = preds
                # just arbitrarily choose a background index, they will get canonicalized anyway
        return predictions
Example #6
0
    def predict(self, test_data):
        self.model.eval()
        self.model.flatten_parameters()
        predictions = {}
        loader = make_data_loader(self.args, test_data, shuffle=False, batch_by_task=True, batch_size=self.args.batch_size)
        # print('{} videos in prediction data'.format(len(loader.dataset)))
        # for batch in tqdm.tqdm(loader, ncols=80):
        for batch in loader:
            features = batch['features']
            task_indices = batch['task_indices']
            lengths = batch['lengths']

            # add a batch dimension
            # lengths = torch.LongTensor([features.size(0)]).unsqueeze(0)
            # features = features.unsqueeze(0)
            # task_indices = task_indices.unsqueeze(0)

            videos = batch['video_name']
            tasks = batch['task_name']
            assert len(set(tasks)) == 1
            task = next(iter(tasks))

            if 'test' in self.args.sm_constrain_with_narration:
                assert all_equal(tasks)
                constraints_expanded = self.expand_constraints(
                    test_data, task, task_indices[0], 1 - batch['constraints']
                )
                constraints_expanded *= self.args.sm_constrain_narration_weight
            else:
                constraints_expanded = None

            if self.args.cuda:
                features = features.cuda()
                task_indices = [ti.cuda() for ti in task_indices]
                lengths = lengths.cuda()
                if constraints_expanded is not None:
                    constraints_expanded = constraints_expanded.cuda()

            addl_allowed_ends = self.make_additional_allowed_ends(tasks, lengths)

            def predict(constraints):
                # TODO: figure out under which eval conditions use_mean_z should be False
                pred_spans, elp = self.model.viterbi(features, lengths, task_indices, add_eos=True, use_mean_z=True,
                                                additional_allowed_ends_per_instance=addl_allowed_ends,
                                                constraints=constraints, return_elp=True)
                pred_labels = semimarkov_utils.spans_to_labels(pred_spans)
                # if self.args.sm_predict_single:
                #     # pred_spans: batch_size x T
                #     pred_labels_single = torch.zeros_like(pred_labels)
                #     for i in pred_labels.size(0):
                #         for lab in torch.unique(pred_labels[i,:lengths[i]]):
                #             #emission_scores: b x N x C
                #             pred_labels
                #             pass

                # if self.args.sm_constrain_transitions:
                #     all_pred_span_indices = [
                #         [ix for ix, count in this_rle_spans]
                #         for this_rle_spans in semimarkov_utils.rle_spans(pred_spans, lengths)
                #     ]
                #     for i, indices in enumerate(all_pred_span_indices):
                #         remove_cons_dups = [ix for ix, group in itertools.groupby(indices)
                #                             if not ix in test_data.corpus._background_indices]
                #         non_bg_indices = [
                #             ix for ix in test_data.corpus.indices_by_task(task)
                #             if ix not in test_data.corpus._background_indices
                #         ]
                #         if len(remove_cons_dups) != len(non_bg_indices) and lengths[i].item() != len(remove_cons_dups):
                #             print("deduped: {}, indices: {}, length {}".format(
                #                 remove_cons_dups, non_bg_indices, lengths[i].item()
                #             ))
                #             # assert lengths[i].item() < len(non_bg_indices)

                pred_labels_trim_s = self.model.trim(pred_labels, lengths, check_eos=True)
                return pred_labels_trim_s

            pred_labels_trim_s = predict(constraints_expanded)

            # assert len(pred_labels_trim_s) == 1, "batch size should be 1"
            for ix, (video, pred_labels_trim) in enumerate(zip(videos, pred_labels_trim_s)):
                preds = pred_labels_trim.numpy()
                predictions[video] = preds
                # if constraints_expanded is not None:
                #     this_cons = batch['constraints'][ix]
                #     if this_cons.sum() > 0:
                #         step_indices = test_data.get_ordered_indices_no_background()[task]
                #         for t, label in enumerate(preds):
                #             if label in step_indices:
                #                 label_ix = step_indices.index(label)
                #                 assert batch['constraints'][ix,t,label_ix] == 1
                assert self.model.n_classes not in predictions[video], "predictions should not contain EOS: {}".format(
                    predictions[video])
        return predictions
Example #7
0
    def fit(self, train_data: Datasplit, use_labels: bool, callback_fn=None):
        self.model.train()
        self.model.flatten_parameters()
        if use_labels:
            assert not self.args.sm_constrain_transitions
        initialize = True
        if use_labels and self.args.sm_supervised_method in ['closed-form', 'closed-then-gradient']:
            self.fit_supervised(train_data)
            if self.args.sm_supervised_method == 'closed-then-gradient':
                initialize = False
                callback_fn(-1, {})
            else:
                return
        if self.args.sm_init_non_projection_parameters_from:
            initialize = False
            if callback_fn:
                callback_fn(-1, {})
        optimizer, scheduler = make_optimizer(self.args, self.model.parameters())
        big_loader = make_data_loader(self.args, train_data, batch_by_task=False, shuffle=True, batch_size=100)
        samp = next(iter(big_loader))
        big_features = samp['features']
        big_lengths = samp['lengths']
        if self.args.cuda:
            big_features = big_features.cuda()
            big_lengths = big_lengths.cuda()

        if initialize:
            self.model.initialize_gaussian(big_features, big_lengths)

        loader = make_data_loader(self.args, train_data, batch_by_task=True, shuffle=True, batch_size=self.args.batch_size)

        # print('{} videos in training data'.format(len(loader.dataset)))

        # all_features = [sample['features'] for batch in loader for sample in batch]
        # if self.args.cuda:
        #     all_features = [feats.cuda() for feats in all_features]

        C = self.n_classes
        K = self.args.sm_max_span_length

        for epoch in range(self.args.epochs):
            start_time = time.time()
            # call here since we may set eval in callback_fn
            self.model.train()
            losses = []
            multi_batch_losses = []
            nlls = []
            kls = []
            log_dets = []
            num_frames = 0
            num_videos = 0
            train_nll = 0
            train_kl = 0
            train_log_det = 0
            # for batch_ix, batch in enumerate(tqdm.tqdm(loader, ncols=80)):
            for batch_ix, batch in enumerate(loader):
                if self.args.train_limit and batch_ix >= self.args.train_limit:
                    break
                # if self.args.cuda:
                #     features = features.cuda()
                #     task_indices = task_indices.cuda()
                #     gt_single = gt_single.cuda()
                tasks = batch['task_name']
                videos = batch['video_name']
                features = batch['features']
                task_indices = batch['task_indices']
                lengths = batch['lengths']

                if 'train' in self.args.sm_constrain_with_narration:
                    assert all_equal(tasks)
                    constraints_expanded = self.expand_constraints(
                        train_data, tasks[0], task_indices[0], 1 - batch['constraints']
                    )
                    constraints_expanded *= self.args.sm_constrain_narration_weight
                else:
                    constraints_expanded = None

                num_frames += lengths.sum().item()
                num_videos += len(lengths)

                # assert len( task_indices) == self.n_classes, "remove_background and multi-task fit() not implemented"

                if self.args.cuda:
                    features = features.cuda()
                    lengths = lengths.cuda()
                    if constraints_expanded is not None:
                        constraints_expanded = constraints_expanded.cuda()

                if use_labels:
                    labels = batch['gt_single']
                    if self.args.cuda:
                        labels = labels.cuda()
                    spans = semimarkov_utils.labels_to_spans(labels, max_k=K)
                    use_mean_z = True
                else:
                    spans = None
                    use_mean_z = False

                addl_allowed_ends = self.make_additional_allowed_ends(tasks, lengths)

                ll, log_det = self.model.log_likelihood(features,
                                                 lengths,
                                                 valid_classes_per_instance=task_indices,
                                                 spans=spans,
                                                 add_eos=True,
                                                 use_mean_z=use_mean_z,
                                                 additional_allowed_ends_per_instance=addl_allowed_ends,
                                                 constraints=constraints_expanded)
                nll = -ll
                kl = self.model.kl.mean()
                if use_labels:
                    this_loss = nll - log_det
                else:
                    this_loss = nll - log_det + kl
                multi_batch_losses.append(this_loss)
                nlls.append(nll.item())
                kls.append(kl.item())
                log_dets.append(log_det.item())

                train_nll += (nll.item() * len(videos))
                train_kl += (kl.item() * len(videos))
                train_log_det += (log_det.item() * len(videos))

                losses.append(this_loss.item())

                if len(multi_batch_losses) >= self.args.batch_accumulation:
                    loss = sum(multi_batch_losses) / len(multi_batch_losses)
                    loss.backward()
                    multi_batch_losses = []

                    if self.args.print_every and (batch_ix % self.args.print_every == 0):
                        param_norm = sum([p.norm()**2 for p in self.model.parameters()
                                          if p.requires_grad]).item()**0.5
                        gparam_norm = sum([p.grad.norm()**2 for p in self.model.parameters()
                                           if p.requires_grad and p.grad is not None]).item()**0.5
                        log_str = 'Epoch: %02d, Batch: %03d/%03d, |Param|: %.6f, |GParam|: %.2f, lr: %.2E, ' + \
                                  'loss: %.4f, recon: %.4f, kl: %.4f, log_det: %.4f, recon_bound: %.2f, Throughput: %.2f vid / sec'
                        print(log_str %
                              (epoch, batch_ix, len(loader), param_norm, gparam_norm,
                               optimizer.param_groups[0]["lr"],
                               (train_nll + train_kl + train_log_det) / num_videos, # loss
                               train_nll / num_frames, # recon
                               train_kl / num_frames, # kl
                               train_log_det / num_videos, # log_det
                               (train_nll + train_kl) / num_frames, # recon_bound
                              num_videos / (time.time() - start_time))) # Throughput
                    if self.args.max_grad_norm is not None:
                        torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm)

                    optimizer.step()
                    self.model.zero_grad()
            train_loss = np.mean(losses)
            if scheduler is not None:
                scheduler.step(train_loss)
            callback_fn(epoch, {'train_loss': train_loss,
                                'train_nll_frame_avg': train_nll / num_frames,
                                'train_kl_vid_avg': train_kl / num_videos,
                                'train_recon_bound': (train_nll + train_kl) / num_frames})