Exemple #1
0
    def forward(self, imgs, objs, boxes, masks, obj_to_img, z_rand):
        crops_input = crop_bbox_batch(imgs, boxes, obj_to_img, self.obj_size)
        z_rec, mu, logvar = self.crop_encoder(crops_input, objs)
        # print(z_rec.shape)

        # semantic and spatial information
        latent_z, pos_z = self.latent_generator(objs, boxes, z_rec)
        # print(latent_z.shape)
        latent_z_rand, pos_z_rand = self.latent_generator(objs, boxes, z_rand)
        composed_z, mask = featuremap_composition(latent_z, obj_to_img)
        print(composed_z.shape)
        print(mask.shape)
        composed_z_rand, mask_rand = featuremap_composition(latent_z_rand, obj_to_img)
        feat = self.transformer_decoder(composed_z, mask)
        feat_rand = self.transformer_decoder(composed_z_rand, mask)
        B = feat.size(0)
        C = feat.size(2)
        feat = feat.permute(0, 2, 1).contiguous().view(B, C, 8, 8)
        feat_rand = feat_rand.permute(0, 2, 1).contiguous().view(B, C, 8, 8)
        # print(feat.shape)
        h_rec = self.layout_encoder(feat)
        # print(h_rec.shape)
        h_rand = self.layout_encoder(feat_rand)

        img_rec = self.decoder(h_rec)
        img_rand = self.decoder(h_rand)

        crops_rand = crop_bbox_batch(img_rand, boxes, obj_to_img, self.obj_size)
        _, z_rand_rec, _ = self.crop_encoder(crops_rand, objs)

        crops_input_rec = crop_bbox_batch(img_rec, boxes, obj_to_img, self.obj_size)

        return crops_input, crops_input_rec, crops_rand, img_rec, img_rand, mu, logvar, z_rand_rec
Exemple #2
0
    def forward(self, imgs, objs, boxes, masks, obj_to_img, z_rand):
        crops_input = crop_bbox_batch(imgs, boxes, obj_to_img, self.obj_size)
        z_rec, mu, logvar = self.crop_encoder(crops_input, objs)
        # print(z_rec.shape)
        # print(z_rand.shape)

        # (n, clstm_dim*2, 8, 8)
        h_rec = self.layout_encoder(objs, masks, obj_to_img, z_rec)
        h_rand = self.layout_encoder(objs, masks, obj_to_img, z_rand)

        img_rec = self.decoder(h_rec)
        img_rand = self.decoder(h_rand)

        crops_rand = crop_bbox_batch(img_rand, boxes, obj_to_img,
                                     self.obj_size)
        _, z_rand_rec, _ = self.crop_encoder(crops_rand, objs)

        crops_input_rec = crop_bbox_batch(img_rec, boxes, obj_to_img,
                                          self.obj_size)

        return crops_input, crops_input_rec, crops_rand, img_rec, img_rand, mu, logvar, z_rand_rec