def forward(self, imgs, objs, boxes, masks, obj_to_img, z_rand): crops_input = crop_bbox_batch(imgs, boxes, obj_to_img, self.obj_size) z_rec, mu, logvar = self.crop_encoder(crops_input, objs) # print(z_rec.shape) # semantic and spatial information latent_z, pos_z = self.latent_generator(objs, boxes, z_rec) # print(latent_z.shape) latent_z_rand, pos_z_rand = self.latent_generator(objs, boxes, z_rand) composed_z, mask = featuremap_composition(latent_z, obj_to_img) print(composed_z.shape) print(mask.shape) composed_z_rand, mask_rand = featuremap_composition(latent_z_rand, obj_to_img) feat = self.transformer_decoder(composed_z, mask) feat_rand = self.transformer_decoder(composed_z_rand, mask) B = feat.size(0) C = feat.size(2) feat = feat.permute(0, 2, 1).contiguous().view(B, C, 8, 8) feat_rand = feat_rand.permute(0, 2, 1).contiguous().view(B, C, 8, 8) # print(feat.shape) h_rec = self.layout_encoder(feat) # print(h_rec.shape) h_rand = self.layout_encoder(feat_rand) img_rec = self.decoder(h_rec) img_rand = self.decoder(h_rand) crops_rand = crop_bbox_batch(img_rand, boxes, obj_to_img, self.obj_size) _, z_rand_rec, _ = self.crop_encoder(crops_rand, objs) crops_input_rec = crop_bbox_batch(img_rec, boxes, obj_to_img, self.obj_size) return crops_input, crops_input_rec, crops_rand, img_rec, img_rand, mu, logvar, z_rand_rec
def forward(self, imgs, objs, boxes, masks, obj_to_img, z_rand): crops_input = crop_bbox_batch(imgs, boxes, obj_to_img, self.obj_size) z_rec, mu, logvar = self.crop_encoder(crops_input, objs) # print(z_rec.shape) # print(z_rand.shape) # (n, clstm_dim*2, 8, 8) h_rec = self.layout_encoder(objs, masks, obj_to_img, z_rec) h_rand = self.layout_encoder(objs, masks, obj_to_img, z_rand) img_rec = self.decoder(h_rec) img_rand = self.decoder(h_rand) crops_rand = crop_bbox_batch(img_rand, boxes, obj_to_img, self.obj_size) _, z_rand_rec, _ = self.crop_encoder(crops_rand, objs) crops_input_rec = crop_bbox_batch(img_rec, boxes, obj_to_img, self.obj_size) return crops_input, crops_input_rec, crops_rand, img_rec, img_rand, mu, logvar, z_rand_rec