Esempio n. 1
0
    def forward_with_reality(self, intput, time_len):
        """encode z / u with ground truth I_{t+T} w/o resample"""
        vid_batch = vid_batch_to_cuda(intput)
        vid_batch = self._forward_image_encode(vid_batch)

        # 2. get u, kl_loss, using f1, f2
        img_z, kl_loss, ori_z = self.encoder.no_sample(vid_batch)
        predictions = self._forward_with_z(vid_batch, img_z, time_len)
        return predictions
Esempio n. 2
0
    def forward_inception(self, intput, time_len, seed=None):
        """sample z / u from a distribution"""
        vid_batch = vid_batch_to_cuda(intput)
        vid_batch = self._forward_image_encode(vid_batch)

        # 2. get u, kl_loss, using f1, f2
        V = vid_batch['bbox'].size(1)
        img_z = self.encoder.batch_sample(V, time_len, seed, vid_batch['image'][0])
        predictions = self._forward_with_z(vid_batch, img_z, time_len)
        return predictions
Esempio n. 3
0
    def forward(self, in_vecs):
        vid_batch = vid_batch_to_cuda(in_vecs)
        # 1. Feature Extraction: 'feats' = 'appr' | 'bbox'
        vid_batch = self._forward_image_encode(vid_batch)
        # 2. get z, kl_loss, using only I_0, I_dt-1
        img_z, kl_loss, long_u = self.encoder(vid_batch)

        preds = self._forward_with_z(vid_batch, img_z, self.dt)

        preds['kl_loss'] = kl_loss
        preds['orig_z'] = long_u
        return preds
Esempio n. 4
0
    def forward_lp(self, intput, time_len, sample):
        """todo: clean!"""
        vid_batch = vid_batch_to_cuda(intput)
        # 1. 'feats' = 'appr' | 'bbox', with image_tower AND factorize
        vid_batch = self._forward_image_encode(vid_batch)
        V = vid_batch['image'].size(1)
        bg_feat = self.get_bg_feat(vid_batch['bg_feat'], 0)

        predictions = []
        cur_frame = self.filter_time_stamp(0, vid_batch)
        cur_frame['pred_recon'] = vid_batch['image'][1]  # reality
        trip = self._next_trip(cur_frame['bbox'], cur_frame['trip'])
        self.encoder.init_hidden(vid_batch)
        for t in range(time_len):
            img_z, kl_loss, orig_z = self.encoder.one_sample(
                cur_frame['pred_recon'], sample)
            # img_z: dt=1, V, D -> (V, O, D)
            obj_z = img_z[0].unsqueeze(1)
            stop = True if (self.stop_grad and t == 0) else False
            feat_pred = self.graph_net(cur_frame['appr'], cur_frame['bbox'],
                                       obj_z, trip, stop)
            feat_pred['appr'] = self._apply_feat_constraint(feat_pred['appr'])

            out = self.decoder(self._unsqz(cur_frame['appr'], 0),
                               self._unsqz(cur_frame['bbox'], 0),
                               self.decoder_target_size, bg_feat)
            feat_pred['pred_recon'] = out['recon'].squeeze(0)

            cur_frame.update(feat_pred)
            predictions.append(feat_pred.copy())

        predictions = collate_batch(predictions, time_len, V)

        src_recon = self.decoder(vid_batch['appr'], vid_batch['bbox'],
                                 self.decoder_target_size, bg_feat)
        for key in src_recon:
            new_key = 'real_' + key
            predictions[new_key] = src_recon[key]
        return predictions
Esempio n. 5
0
    def forward_with_reality(self, intput, time_len):
        """encode z / u with ground truth I_{t+T} w/o resample"""
        vid_batch = vid_batch_to_cuda(intput)
        vid_batch = self._forward_image_encode(vid_batch)

        # 2. get u, kl_loss, using f1, f2
        img_z, kl_loss, ori_z, src_feats = self.encoder.no_sample(vid_batch)

        for k in vid_batch.keys():
            if k == 'bg_feat':
                for i in range(len(vid_batch[k])):
                    vid_batch[k][i] = vid_batch[k][i][3:]
            elif k == 'index':
                vid_batch[k] = vid_batch[k][3 * 3:]
            else:
                vid_batch[k] = vid_batch[k][3:]

        predictions = self._forward_with_z(vid_batch, img_z,
                                           time_len - self.show_length)
        assert type(predictions) == dict
        predictions['src_feats'] = src_feats
        return predictions
Esempio n. 6
0
    def forward(self, in_vecs):
        vid_batch = vid_batch_to_cuda(in_vecs)
        # 1. Feature Extraction: 'feats' = 'appr' | 'bbox'
        vid_batch = self._forward_image_encode(vid_batch)
        # 2. get z, kl_loss, using only I_0, I_dt-1
        img_z, kl_loss, long_u, src_feats = self.encoder(vid_batch)

        for k in vid_batch.keys():
            if k == 'bg_feat':
                for i in range(len(vid_batch[k])):
                    vid_batch[k][i] = vid_batch[k][i][3:]
            elif k == 'index':
                vid_batch[k] = vid_batch[k][3 * 3:]
            else:
                vid_batch[k] = vid_batch[k][3:]

        # before graph neural network
        preds = self._forward_with_z(vid_batch, img_z,
                                     self.dt - self.show_length)

        preds['kl_loss'] = kl_loss
        preds['orig_z'] = long_u
        # preds['img_z'] = img_z
        return preds