def get_losses_outputs(self, i_hat, poses): ''' VGG loss (perceptual loss) for i_hat reconstruction ''' # losses vgg_model = self.build_vgg_model(i_hat.shape.as_list()[1:]) vgg_rec_outputs = vgg_model(i_hat) vgg_ori_outputs = vgg_model(inp) vgg_outputs = [ Lambda(self.stack_vgg_outputs)( [vgg_rec_outputs[i], vgg_ori_outputs[i]]) for i in range(len(vgg_rec_outputs)) ] print("Type vgg output %s" % type(vgg_outputs)) for i, out in enumerate(vgg_outputs): print("Shape layer %s: %s" % (i, out.shape)) pose_losses = [pose_loss()] * self.n_blocks vgg_losses = [vgg_loss()] * len(vgg_outputs) losses = vgg_losses + pose_losses # outputs outputs = vgg_outputs outputs.extend(poses) return losses, outputs
def build(self): inp = Input(shape=self.input_shape) # encoders time_1 = time.time() z_a = self.appearance_encoder(inp) time_2 = time.time() z_p = self.pose_encoder(inp) time_3 = time.time() print("Build E_a %s, build E_p %s" % (time_2 - time_1, time_3 - time_2)) print(type(z_a), type(z_p)) print("Shape z_a %s" % str(z_a.shape)) # decoder concat = self.concat(z_a, z_p) print("Shape concat %s" % str(concat.shape)) i_hat = self.decoder(concat) outputs = [i_hat] outputs.extend(z_p) self.model = Model(inputs=inp, outputs=outputs) print("Outputs shape %s" % self.model.output_shape) ploss = [pose_loss()] * len(z_p) losses = [reconstruction_loss()] losses.extend(ploss) self.model.compile(loss=losses, optimizer=RMSprop(lr=self.start_lr)) self.model.summary()
def build(self): ''' override of build! -> first part (up to i_hat) is the same -> after i_hat we put the specific cycle thing TODO: refactor (uglyyyyyyy) Outputs for reduced cycle (in that order): - i_hat (None, 256, 256, 3) - pose (None, 17, 4) (times nblock) - concat_z_a (None, 16, 16, 256) - concat_z_p (None, 16, 16, 256) - i_hat_shuffled (None, 256, 256, 3) ''' # build everything self.build_everything() # reduced decoder inp = Input(shape=self.input_shape, name='image_input') self.log("Input shape %s" % str(inp.shape)) # encoders z_a, z_p, poses = self.call_encoders(inp) # decoder i_hat = self.concat_and_decode(z_a, z_p) i_hat = Lambda(lambda x: x, name='i_hat')( i_hat) # naming to differentiate from mixed # shuffle z_a and z_p from images from the batch and create new images concat_shuffled = self.shuffle(z_a, z_p) i_hat_mixed = self.decoder_model(concat_shuffled) i_hat_mixed = Lambda(lambda x: x, name='i_hat_mixed')(i_hat_mixed) # re-encode mixed images and get new z_a and z_p cycle_z_a = self.appearance_model(i_hat_mixed) cycle_pose_outputs = self.pose_model(i_hat_mixed) cycle_poses, cycle_z_p = self.check_pose_output(cycle_pose_outputs) # concat z_a and z_a', z_p and z_p' to have an output usable by the cycle loss concat_z_a = concatenate([z_a, cycle_z_a], name='cycle_za_concat') concat_z_p = concatenate([z_p, cycle_z_p], name='cycle_zp_concat') # build the whole model outputs = [i_hat] + poses + [concat_z_a] + [concat_z_p] + [i_hat_mixed] self.model = Model(inputs=inp, outputs=outputs) print("Outputs shape %s" % self.model.output_shape) ploss = [pose_loss()] * len(poses) losses = [reconstruction_loss()] + ploss + [ cycle_loss(), cycle_loss(), noop_loss() ] self.model.compile(loss=losses, optimizer=RMSprop(lr=self.start_lr)) if self.verbose: self.log("Final model summary") self.model.summary()
def get_losses_outputs(self, i_hat, poses): pose_losses = [pose_loss()] * self.n_blocks losses = [reconstruction_loss()] + pose_losses outputs = [i_hat] + poses return losses, outputs
def build_pose_only(self): ''' Only the pose branch will be built and activated, no concat, no decoder -> for baselines and ablation study ''' self.model = self.build_pose_model(self.input_shape, pose_only=True) ploss = [pose_loss()] * self.n_blocks self.model.compile(loss=ploss, optimizer=RMSprop(lr=self.start_lr)) if self.verbose: self.model.summary()
def build_pose_only(self): ''' Only the pose branch will be built and activated, no concat, no decoder -> for baselines and ablation study ''' inp = Input(shape=self.input_shape) z_p = self.pose_encoder(inp) self.model = Model(inputs=inp, outputs=z_p) ploss = [pose_loss()] * len(z_p) self.model.compile(loss=ploss, optimizer=RMSprop(lr=self.start_lr)) self.model.summary()
def build(self): # self.appearance_model = self.build_appearance_model(self.input_shape) self.pose_model = self.build_pose_model(self.input_shape) print("pose model summary") self.pose_model.summary() self.decoder_model = self.build_decoder_model( (8, 8, 2048)) # i.e. 2048 for the regular model inp = Input(shape=self.input_shape) # encoders z_a = self.appearance_encoder(inp) # z_a = self.appearance_model(inp) z_p = self.pose_model(inp) print(type(z_a), type(z_p)) print("Shape z_a HELLO %s" % str(z_a.shape)) print("Shape z_p %s" % str(z_p.shape)) # decoder concat = self.concat(z_a, z_p) print("Shape concat %s" % str(concat.shape)) i_hat = self.decoder_model(concat) outputs = [i_hat, z_p] # outputs.extend(z_p) self.model = Model(inputs=inp, outputs=outputs) print("Outputs shape %s" % self.model.output_shape) # ploss = [pose_loss()] * len(z_p) ploss = [pose_loss()] * self.n_blocks losses = [reconstruction_loss()] losses.extend(ploss) self.model.compile(loss=losses, optimizer=RMSprop(lr=self.start_lr)) self.model.summary()
def build(self): ''' Outputs of this model is a ton of things so they can properly be used in losses. Outputs in order: - first the reconstructed image i_hat (None, 256, 256, 3) -> reconstruction loss - then n_block * pose output (None, n_joints, dim + 1) (+1 for visibility prob) -> pose loss - then the concatenated z_a and z_a' -> cycle consistency loss - then the concatenated z_p and z_p' -> cycle consistency loss - then the intermediate mixed reconstructed images, for viz -> noop loss ''' # build everything time_1 = time.time() self.appearance_model = self.build_appearance_model(self.input_shape) time_2 = time.time() self.pose_model = self.build_pose_model(self.input_shape) time_3 = time.time() self.decoder_model = self.build_decoder_model((16, 16, 2048)) # ... time_4 = time.time() print("Build E_a %s, build E_p %s, decoder D %s" % (time_2 - time_1, time_3 - time_2, time_4 - time_3)) inp = Input(shape=self.input_shape) print("Input shape %s" % str(inp.shape)) # encoders z_a = self.appearance_model(inp) assert z_a.shape.as_list() == [ None, 16, 16, 1024 ], 'wrong shape for z_a %s' % str(z_a.shape.as_list()) pose_outputs = self.pose_model(inp) poses, z_p = self.check_pose_output(pose_outputs) print("Shape z_a %s, shape z_p %s" % (str(z_a.shape), str(z_p.shape))) # decoder concat = self.concat(z_a, z_p) assert concat.shape.as_list() == [ None, 16, 16, 2048 ], 'wrong concat shape %s' % str(concat.shape) i_hat = self.decoder_model(concat) # shuffle z_a and z_p from images from the batch and create new images concat_shuffled = self.shuffle(z_a, z_p) i_hat_mixed = self.decoder_model(concat_shuffled) # re-encode mixed images and get new z_a and z_p cycle_z_a = self.appearance_model(i_hat_mixed) cycle_pose_outputs = self.pose_model(i_hat_mixed) cycle_poses, cycle_z_p = self.check_pose_output(cycle_pose_outputs) # concat z_a and z_a', z_p and z_p' to have an output usable by the cycle loss concat_z_a = concatenate([z_a, cycle_z_a]) concat_z_p = concatenate([z_p, cycle_z_p]) # build the whole model outputs = [i_hat] + poses + [concat_z_a] + [concat_z_p] + [i_hat_mixed] self.model = Model(inputs=inp, outputs=outputs) print("Outputs shape %s" % self.model.output_shape) ploss = [pose_loss()] * len(poses) losses = [reconstruction_loss()] + ploss + [ cycle_loss(), cycle_loss(), noop_loss() ] # loss = mean_squared_error self.model.compile(loss=losses, optimizer=RMSprop(lr=self.start_lr)) self.model.summary()