def train(FLAGS): graph = Train_Graph(FLAGS) graph.build() summary_op = Summary.collect_inpainter_summary(graph, FLAGS) with tf.name_scope("parameter_count"): total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables()]) inpainter_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables(scope='Inpainter')]) save_vars = tf.global_variables('Inpainter')+tf.global_variables('train_op')+tf.global_variables('summary_vars') saver = tf.train.Saver(save_vars, max_to_keep=100) sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "Inpainter_Sum"), global_step=graph.global_step, saver=saver, checkpoint_basename='Inpainter', save_model_secs=FLAGS.ckpt_secs, summary_op=summary_op, #summary_writer=USE_DEFAULT, save_summaries_secs=FLAGS.summaries_secs) with sv.managed_session() as sess: myprint ("Number of total params: {0} \n".format( \ sess.run(total_parameter_count))) start_time = time.time() step = sess.run(graph.global_step) progbar = Progbar(target=100000) #100k while (time.time()-start_time)<FLAGS.max_training_hrs*3600: if sv.should_stop(): break fetches = { "train_op":graph.train_ops['Inpainter'], "loss": graph.loss['Inpainter'], "global_step_inc": graph.incr_global_step } results = sess.run(fetches, feed_dict={graph.is_training: True}) if step%1000 == 0: print (" Step:%3dk time:%4.4fmin InpainterLoss%4.2f "%(step/1000, (time.time()-start_time)/60, results['loss'])) if step % 100000 == 0: progbar = Progbar(target=100000) progbar.update(step%100000) step += 1 myprint("Training completed")
def save_log(source, trg_dir, print_flags_dict, sha): file_name = source.split('/')[-1] new_file = os.path.join(trg_dir, file_name) log_name = 'log' while os.path.isfile(new_file): new_file = new_file[:-3]+'_c.sh' #.sh log_name += '_c' os.system('cp '+source+' '+ new_file) myprint ("Save "+source +" as "+new_file) log_file = os.path.join(trg_dir, log_name+'.txt') with open(log_file,'w') as log_stream: log_stream.write('commit:' + sha + '\n') pprint.pprint(print_flags_dict, log_stream) with open(new_file, 'a') as sh_stream: sh_stream.write('\n#commit:'+sha) myprint('Corresponding log file '+log_file) myinput("Enter to continue") os.system('chmod a=rx '+log_file) os.system('chmod a=rx '+new_file) return
def train(FLAGS): # learner graph = Train_Graph(FLAGS) graph.build() summary_op, tex_latent_summary_op, bg_latent_summary_op, eval_summary_op = Summary.collect_end2end_summary(graph, FLAGS) # train #define model saver with tf.name_scope("parameter_count"): total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables()]) save_vars = tf.global_variables() # tf.global_variables('Inpainter')+tf.global_variables('Generator')+ \ # tf.global_variables('VAE')+tf.global_variables('Fusion') \ # +tf.global_variables('train_op') #including global step if FLAGS.resume_CIS: CIS_vars = tf.global_variables('Inpainter')+tf.global_variables('Generator') CIS_saver = tf.train.Saver(CIS_vars, max_to_keep=100) mask_saver = tf.train.Saver(tf.global_variables('VAE//separate/maskVAE/'), max_to_keep=100) tex_saver = tf.train.Saver(tf.global_variables('VAE//separate/texVAE/'), max_to_keep=100) saver = tf.train.Saver(save_vars, max_to_keep=100) branch_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir,'branch'+str(m))) for m in range(FLAGS.num_branch)] tex_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "tex_latent"+str(m))) for m in range(FLAGS.tex_dim)] bg_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "bg_latent"+str(m))) for m in range(FLAGS.bg_dim)] #mask_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "mask_latent"+str(m))) for m in range(FLAGS.mask_dim)] sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "end2end_Sum"), saver=None, save_summaries_secs=0) #not saved automatically for flexibility with sv.managed_session() as sess: myprint ("Number of total params: {0} \n".format( \ sess.run(total_parameter_count))) if FLAGS.resume_fullmodel: assert os.path.isfile(FLAGS.fullmodel_ckpt+'.index') saver.restore(sess, FLAGS.fullmodel_ckpt) myprint ("Resumed training from model {}".format(FLAGS.fullmodel_ckpt)) myprint ("Start from step {} vae_step{}".format(sess.run(graph.global_step), sess.run(graph.vae_global_step))) myprint ("Save checkpoint in {}".format(FLAGS.checkpoint_dir)) if not os.path.dirname(FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir: print ("\033[0;30;41m"+"Warning: checkpoint dir and fullmodel ckpt do not match"+"\033[0m") #myprint ("Please make sure that the checkpoint will be saved in the same dir with the resumed model") else: if os.path.isfile(FLAGS.mask_ckpt+'.index'): mask_saver.restore(sess, FLAGS.mask_ckpt) myprint ("Load pretrained maskVAE {}".format(FLAGS.mask_ckpt)) if os.path.isfile(FLAGS.tex_ckpt+'.index'): tex_saver.restore(sess, FLAGS.tex_ckpt) myprint ("Load pretrained texVAE {}".format(FLAGS.tex_ckpt)) if FLAGS.resume_CIS: assert os.path.isfile(FLAGS.CIS_ckpt+'.index') CIS_saver.restore(sess, FLAGS.CIS_ckpt) myprint ("Load pretrained inpainter and generator {}".format(FLAGS.CIS_ckpt)) else: myprint ("Train from scratch") myinput('Press enter to continue') start_time = time.time() step = sess.run(graph.global_step) vae_step = sess.run(graph.vae_global_step) progbar = Progbar(target=FLAGS.ckpt_steps) #100k sum_iters = FLAGS.iters_gen_vae + FLAGS.iters_inp while (time.time()-start_time)<FLAGS.max_training_hrs*3600: if sv.should_stop(): break fetches = {"global_step_inc": graph.incr_global_step, "step": graph.global_step} if step%sum_iters < FLAGS.iters_inp: fetches['train_op'] = graph.train_ops['Inpainter'] mask_capacity = vae_step*FLAGS.mask_capacity_inc else: fetches['train_op'] = graph.train_ops #'VAE//separate/texVAE/','VAE//separate/texVAE_BG/', 'VAE//fusion', 'Fusion' mask_capacity = vae_step*FLAGS.mask_capacity_inc #-> should have an VAE step fetches['vae_global_step'], fetches['vae_global_step_inc'] = graph.vae_global_step, graph.incr_vae_global_step if step % FLAGS.summaries_steps == 0: fetches["Inpainter_Loss"],fetches["Generator_Loss"] = graph.loss['Inpainter'], graph.loss['Generator'] fetches["VAE//texVAE"], fetches["VAE//texVAE_BG"], fetches['VAE//fusion'] = graph.loss['VAE//separate/texVAE/'], graph.loss['VAE//separate/texVAE_BG/'], graph.loss['VAE//fusion'] fetches['tex_kl'], fetches['bg_kl'] = graph.loss['tex_kl'], graph.loss['bg_kl'] fetches['summary'] = summary_op if step % FLAGS.ckpt_steps == 0: fetches['generated_masks'] = graph.generated_masks fetches['GT_masks'] = graph.GT_masks results = sess.run(fetches, feed_dict={graph.is_training: True, graph.mask_capacity: mask_capacity}) progbar.update(step%FLAGS.ckpt_steps) if step % FLAGS.summaries_steps == 0 : print (" Step:%3dk time:%4.4fmin VAELoss%4.2f" \ %(step/1000, (time.time()-start_time)/60, results["VAE//texVAE"]+results['VAE//fusion']+results['VAE//texVAE_BG'])) sv.summary_writer.add_summary(results['summary'], step) for d in range(FLAGS.tex_dim): tex_summary = sess.run(tex_latent_summary_op, feed_dict={graph.loss['tex_kl_var']: results['tex_kl'][d]}) tex_latent_writers[d].add_summary(tex_summary, step) for d in range(FLAGS.bg_dim): bg_summary = sess.run(bg_latent_summary_op, feed_dict={graph.loss['bg_kl_var']: results['bg_kl'][d]}) bg_latent_writers[d].add_summary(bg_summary, step) # for d in range(FLAGS.mask_dim): # mask_summary = sess.run(mask_latent_summary_op, feed_dict={graph.loss['mask_kl_var']: results['mask_kl'][d]}) # mask_latent_writers[d].add_summary(mask_summary, step) if step % FLAGS.ckpt_steps == 0: saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=step) progbar = Progbar(target=FLAGS.ckpt_steps) #evaluation sess.run(graph.val_iterator.initializer) fetches = {'GT_masks':graph.GT_masks, 'generated_masks':graph.generated_masks} if FLAGS.dataset in ['multi_texture', 'flying_animals']: #note that for multi_texture bg_num is just a fake number it represents number of samples for each type of image score = [[]]*FLAGS.max_num for bg in range(FLAGS.bg_num): results_val=sess.run(fetches, feed_dict={graph.is_training: False}) for k in range(FLAGS.max_num): #score[k].append(Permute_IoU(results_val['GT_masks'][k], results_val['generated_masks'][k])) score[k] = score[k] + [Permute_IoU(label=results_val['GT_masks'][k], pred=results_val['generated_masks'][k])] for k in range(FLAGS.max_num): eval_summary = sess.run(eval_summary_op, feed_dict={graph.loss['EvalIoU_var']: np.mean(score[k])}) branch_writers[k+1].add_summary(eval_summary, step) else: num_sample = FLAGS.skipnum niter = num_sample//FLAGS.batch_size assert num_sample%FLAGS.batch_size==0 score = 0 for it in range(niter): results_val = sess.run(fetches, feed_dict={graph.is_training:False}) for k in range(FLAGS.batch_size): score += Permute_IoU(label=results_val['GT_masks'][k], pred=results_val['generated_masks'][k]) score = score/num_sample eval_summary = sess.run(eval_summary_op, feed_dict={graph.loss['EvalIoU_var']: score}) sv.summary_writer.add_summary(eval_summary, step) step = results['step'] vae_step = results['vae_global_step'] myprint("Training completed")
def train(FLAGS): # learner graph = Train_Graph(FLAGS) graph.build() summary_op, latent_summary_op = Summary.collect_globalVAE_summary(graph, FLAGS) # train #define model saver with tf.name_scope("parameter_count"): total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables()]) save_vars = tf.global_variables() saver = tf.train.Saver(save_vars, max_to_keep=100) latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "latent"+str(m))) \ for m in range(FLAGS.tex_dim)] sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "globalVAE_Sum"), saver=None, save_summaries_secs=0) #not saved automatically for flexibility with sv.managed_session() as sess: myprint ("Number of total params: {0} \n".format( \ sess.run(total_parameter_count))) if FLAGS.resume_fullmodel: assert os.path.isfile(FLAGS.fullmodel_ckpt+'.index') saver.restore(sess, FLAGS.fullmodel_ckpt) myprint ("Resumed training from model {}".format(FLAGS.fullmodel_ckpt)) myprint ("Start from step {}".format(sess.run(graph.global_step))) myprint ("Save checkpoint in {}".format(FLAGS.checkpoint_dir)) if not os.path.dirname(FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir: print ("\033[0;30;41m"+"Warning: checkpoint dir and fullmodel ckpt do not match"+"\033[0m") #myprint ("Please make sure that the checkpoint will be saved in the same dir with the resumed model") else: myprint ("Train from scratch") myinput('Press enter to continue') start_time = time.time() step = sess.run(graph.global_step) progbar = Progbar(target=FLAGS.ckpt_steps) #100k while (time.time()-start_time)<FLAGS.max_training_hrs*3600: if sv.should_stop(): break fetches = {"global_step_inc": graph.incr_global_step, "step": graph.global_step, "train_op": graph.train_ops} if step % FLAGS.summaries_steps == 0: fetches["Loss"] = graph.loss fetches["kl_dim"] = graph.latent_loss_dim #dim, fetches['summary'] = summary_op results = sess.run(fetches) progbar.update(step%FLAGS.ckpt_steps) if step % FLAGS.summaries_steps == 0 : print (" Step:%3dk time:%4.4fmin Loss%4.2f " \ %(step/1000, (time.time()-start_time)/60, results['Loss'])) sv.summary_writer.add_summary(results['summary'], step) for m in range(FLAGS.tex_dim): kl_summary = sess.run(latent_summary_op, feed_dict={graph.kl_var: results['kl_dim'][m]}) latent_writers[m].add_summary(kl_summary, step) if step % FLAGS.ckpt_steps == 0: saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=step) progbar = Progbar(target=FLAGS.ckpt_steps) step = results['step'] myprint("Training completed")
def train(FLAGS): graph = Train_Graph(FLAGS) graph.build() summary_op, generator_summary_op, branch_summary_op, eval_summary_op = Summary.collect_CIS_summary( graph, FLAGS) with tf.name_scope("parameter_count"): total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables()]) save_vars = tf.global_variables('Inpainter')+tf.global_variables('Generator')+ \ tf.global_variables('train_op') #including global step if FLAGS.resume_inpainter: assert os.path.isfile(FLAGS.inpainter_ckpt + '.index') inpainter_saver = tf.train.Saver(tf.trainable_variables( 'Inpainter')) #only restore the trainable variables if FLAGS.resume_resnet: assert os.path.isfile(FLAGS.resnet_ckpt) resnet_reader = tf.compat.v1.train.NewCheckpointReader( FLAGS.resnet_ckpt) resnet_map = resnet_reader.get_variable_to_shape_map() resnet_dict = dict() for v in tf.trainable_variables('Generator//resnet_v2'): if 'resnet_v2_50/' + v.op.name[21:] in resnet_map.keys(): resnet_dict['resnet_v2_50/' + v.op.name[21:]] = v resnet_var_name = [v.name for v in tf.trainable_variables('Generator//resnet_v2') \ if 'resnet_v2_50/'+v.op.name[21:] in resnet_map.keys()] resnet_saver = tf.train.Saver(resnet_dict) saver = tf.train.Saver(save_vars, max_to_keep=100) branch_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "branch"+str(m))) \ for m in range(FLAGS.num_branch)] #save generator loss for each branch sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "CIS_Sum"), saver=None, save_summaries_secs=0) with sv.managed_session() as sess: myprint ("Number of total params: {0} \n".format( \ sess.run(total_parameter_count))) if FLAGS.resume_fullmodel: assert os.path.isfile(FLAGS.fullmodel_ckpt + '.index') saver.restore(sess, FLAGS.fullmodel_ckpt) myprint("Resumed training from model {}".format( FLAGS.fullmodel_ckpt)) myprint("Start from step {}".format(sess.run(graph.global_step))) myprint("Save checkpoint in {}".format( FLAGS.checkpoint_dir)) if not os.path.dirname( FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir: print( "\033[0;30;41m" + "Warning: checkpoint dir and fullmodel ckpt do not match" + "\033[0m") myprint( "Please make sure that new checkpoint will be saved in the same dir with the resumed model" ) else: if FLAGS.resume_inpainter: assert os.path.isfile(FLAGS.inpainter_ckpt + '.index') inpainter_saver.restore(sess, FLAGS.inpainter_ckpt) myprint("Load pretrained inpainter {}".format( FLAGS.inpainter_ckpt)) if FLAGS.resume_resnet: resnet_saver.restore(sess, FLAGS.resnet_ckpt) myprint("Load pretrained resnet {}".format(FLAGS.resnet_ckpt)) if not FLAGS.resume_resnet and not FLAGS.resume_inpainter: myprint("Train from scratch") myinput('Press enter to continue') start_time = time.time() step = sess.run(graph.global_step) progbar = Progbar(target=FLAGS.ckpt_steps) #100k sum_iters = FLAGS.iters_gen + FLAGS.iters_inp while (time.time() - start_time) < FLAGS.max_training_hrs * 3600: if sv.should_stop(): break fetches = { "global_step_inc": graph.incr_global_step, "step": graph.global_step } if step % sum_iters < FLAGS.iters_inp: fetches['train_op'] = graph.train_ops['Inpainter'] else: fetches['train_op'] = graph.train_ops['Generator'] if step % FLAGS.summaries_steps == 0: fetches["Inpainter_Loss"], fetches["Generator_Loss"] \ = graph.loss['Inpainter'], graph.loss['Generator'] fetches["Inpainter_branch_Loss"], fetches["Generator_branch_Loss"] \ = graph.loss['Inpainter_branch'], graph.loss['Generator_branch'] fetches['Generator_Loss_denominator'] = graph.loss[ 'Generator_denominator'] fetches['summary'] = summary_op if step % FLAGS.ckpt_steps == 0: fetches['generated_masks'] = graph.generated_masks fetches['GT_masks'] = graph.GT_masks results = sess.run(fetches, feed_dict={graph.is_training: True}) progbar.update(step % FLAGS.ckpt_steps) if step % FLAGS.summaries_steps == 0: print (" Step:%3dk time:%4.4fmin InpainterLoss%4.2f GeneratorLoss%4.2f " \ %(step/1000, (time.time()-start_time)/60, results['Inpainter_Loss'], results['Generator_Loss'])) sv.summary_writer.add_summary(results['summary'], step) generator_summary = sess.run(generator_summary_op, feed_dict={ graph.loss['Generator_var']: results['Generator_Loss'] }) sv.summary_writer.add_summary(generator_summary, step) for m in range(FLAGS.num_branch): branch_summary = sess.run( branch_summary_op, feed_dict={ graph.loss['Inpainter_branch_var']: np.mean(results['Inpainter_branch_Loss'][:, m], axis=0), graph.loss['Generator_branch_var']: np.mean(results['Generator_branch_Loss'][:, m], axis=0), graph.loss['Generator_denominator_var']: np.mean(results['Generator_Loss_denominator'][:, m], axis=0) }) branch_writers[m].add_summary(branch_summary, step) if step % FLAGS.ckpt_steps == 0: saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=step) progbar = Progbar(target=FLAGS.ckpt_steps) #evaluation sess.run(graph.val_iterator.initializer) fetches = { 'GT_masks': graph.GT_masks, 'generated_masks': graph.generated_masks } num_sample = 200 niter = num_sample // FLAGS.batch_size score = 0 for it in range(niter): results_val = sess.run( fetches, feed_dict={graph.is_training: False}) for k in range(FLAGS.batch_size): score += Permute_IoU( label=results_val['GT_masks'][k], pred=results_val['generated_masks'][k]) score = score / num_sample eval_summary = sess.run( eval_summary_op, feed_dict={graph.loss['EvalIoU_var']: score}) sv.summary_writer.add_summary(eval_summary, step) step = results['step'] myprint("Training completed")
def train(FLAGS): # learner graph = Train_Graph(FLAGS) graph.build() summary_op, eval_summary_op = Summary.collect_PC_summary(graph, FLAGS) saver_vars = [v for v in tf.global_variables('Inpainter')+tf.global_variables('Generator')+ \ tf.global_variables('VAE') + tf.global_variables('Fusion') if not 'Adam' in v.op.name] saver = tf.train.Saver(saver_vars, max_to_keep=100) sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "end2end_Sum"), saver=None, save_summaries_secs=0) with sv.managed_session() as sess: assert os.path.isfile(FLAGS.fullmodel_ckpt + '.index') saver.restore(sess, FLAGS.fullmodel_ckpt) myprint("Finetune model {} for perceptual consistency".format( FLAGS.fullmodel_ckpt)) saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'newmodel'), global_step=0) myinput('Press enter to continue') start_time = time.time() step = sess.run(graph.global_step) progbar = Progbar(target=FLAGS.ckpt_steps) #100k sum_iters = FLAGS.iters_gen + FLAGS.iters_inp while (time.time() - start_time) < FLAGS.max_training_hrs * 3600: if sv.should_stop(): break fetches = { "global_step_inc": graph.incr_global_step, "step": graph.global_step } if step % sum_iters < FLAGS.iters_inp: fetches['train_op'] = graph.train_ops['Inpainter'] else: fetches['train_op'] = graph.train_ops['Generator'] if step % FLAGS.summaries_steps == 0: fetches['summary'] = summary_op results = sess.run(fetches, feed_dict={graph.is_training: True}) progbar.update(step % FLAGS.ckpt_steps) if step % FLAGS.summaries_steps == 0: print (" Step:%3dk time:%4.4fmin" \ %(step/1000, (time.time()-start_time)/60)) sv.summary_writer.add_summary(results['summary'], step) if step % FLAGS.ckpt_steps == 0: saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=step) progbar = Progbar(target=FLAGS.ckpt_steps) if step % (100 * FLAGS.summaries_steps) == 0 and not step == 0: #evaluation sess.run(graph.val_iterator.initializer) fetches = { 'GT_masks': graph.GT_masks, 'generated_masks': graph.generated_masks } num_sample = 9 * 9 * 9 * 9 - 1 niter = num_sample // FLAGS.batch_size assert num_sample % FLAGS.batch_size == 0 score = 0 arg_maxIoUs = [] for it in range(niter): results_val = sess.run( fetches, feed_dict={graph.is_training: False}) for k in range(FLAGS.batch_size): k_score, arg_maxIoU = Permute_IoU( label=results_val['GT_masks'][k], pred=results_val['generated_masks'][k]) score += k_score arg_maxIoUs.append(arg_maxIoU) score = score / num_sample arg_maxIoUs = np.stack(arg_maxIoUs, axis=0) #400, 3 count = np.sum( arg_maxIoUs, axis=0) #3 0 square // 1 ellipse // 2 background switching_rate = np.min(count) / num_sample eval_summary = sess.run(eval_summary_op, feed_dict={ graph.loss['EvalIoU_var']: score, graph.switching_rate: switching_rate }) sv.summary_writer.add_summary(eval_summary, step) step = results['step'] myprint("Training completed")
def train(FLAGS): # learner graph = Train_Graph(FLAGS) graph.build() summary_op, tex_latent_summary_op, mask_latent_summary_op, bg_latent_summary_op = Summary.collect_VAE_summary(graph, FLAGS) # train #define model saver with tf.name_scope("parameter_count"): total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in tf.trainable_variables()]) save_vars = tf.global_variables() if FLAGS.resume_CIS: CIS_vars = tf.global_variables('Inpainter')+tf.global_variables('Generator') CIS_saver = tf.train.Saver(CIS_vars, max_to_keep=100) saver = tf.train.Saver(save_vars, max_to_keep=100) tex_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "tex_latent"+str(m))) for m in range(FLAGS.tex_dim)] bg_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "bg_latent"+str(m))) for m in range(FLAGS.bg_dim)] mask_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "mask_latent"+str(m))) for m in range(FLAGS.mask_dim)] sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "VAE_Sum"), saver=None, save_summaries_secs=0) #not saved automatically for flexibility with sv.managed_session() as sess: myprint ("Number of total params: {0} \n".format( \ sess.run(total_parameter_count))) if FLAGS.resume_fullmodel: assert os.path.isfile(FLAGS.fullmodel_ckpt+'.index') saver.restore(sess, FLAGS.fullmodel_ckpt) myprint ("Resumed training from model {}".format(FLAGS.fullmodel_ckpt)) myprint ("Start from vae_step{}".format(sess.run(graph.vae_global_step))) myprint ("Save checkpoint in {}".format(FLAGS.checkpoint_dir)) if not os.path.dirname(FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir: print ("\033[0;30;41m"+"Warning: checkpoint dir and fullmodel ckpt do not match"+"\033[0m") #myprint ("Please make sure that the checkpoint will be saved in the same dir with the resumed model") else: if FLAGS.resume_CIS: assert os.path.isfile(FLAGS.CIS_ckpt+'.index') CIS_saver.restore(sess, FLAGS.CIS_ckpt) myprint ("Load pretrained inpainter and generator {}".format(FLAGS.CIS_ckpt)) else: myprint ("Train from scratch") myinput('Press enter to continue') start_time = time.time() #step = sess.run(graph.global_step) vae_step = sess.run(graph.vae_global_step) progbar = Progbar(target=FLAGS.ckpt_steps) #100k while (time.time()-start_time)<FLAGS.max_training_hrs*3600: if sv.should_stop(): break fetches = {"vae_global_step_inc": graph.incr_vae_global_step, "vae_step": graph.vae_global_step} fetches['train_op'] = graph.train_ops mask_capacity = vae_step*FLAGS.mask_capacity_inc #-> should have an VAE step if vae_step % FLAGS.summaries_steps == 0: fetches['tex_kl'], fetches['mask_kl'], fetches['bg_kl'] = graph.loss['tex_kl'], graph.loss['mask_kl'], graph.loss['bg_kl'] fetches['Fusion'] = graph.loss['Fusion'] fetches['summary'] = summary_op results = sess.run(fetches, feed_dict={graph.is_training: True, graph.mask_capacity: mask_capacity}) progbar.update(vae_step%FLAGS.ckpt_steps) if vae_step % FLAGS.summaries_steps == 0 : print (" Step:%3dk time:%4.4fmin " \ %(vae_step/1000, (time.time()-start_time)/60)) sv.summary_writer.add_summary(results['summary'], vae_step) for d in range(FLAGS.tex_dim): tex_summary = sess.run(tex_latent_summary_op, feed_dict={graph.loss['tex_kl_var']: results['tex_kl'][d]}) tex_latent_writers[d].add_summary(tex_summary, vae_step) for d in range(FLAGS.bg_dim): bg_summary = sess.run(bg_latent_summary_op, feed_dict={graph.loss['bg_kl_var']: results['bg_kl'][d]}) bg_latent_writers[d].add_summary(bg_summary, vae_step) for d in range(FLAGS.mask_dim): mask_summary = sess.run(mask_latent_summary_op, feed_dict={graph.loss['mask_kl_var']: results['mask_kl'][d]}) mask_latent_writers[d].add_summary(mask_summary, vae_step) if vae_step % FLAGS.ckpt_steps == 0: saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=vae_step) progbar = Progbar(target=FLAGS.ckpt_steps) vae_step = results['vae_step'] myprint("Training completed")
def eval(FLAGS): graph = Traverse_Graph(FLAGS) graph.build() restore_vars = tf.global_variables('VAE') + tf.global_variables( 'Generator') + tf.global_variables('Fusion') saver = tf.train.Saver(restore_vars) #CIS_saver = tf.train.Saver(tf.global_variables('Generator')) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) assert os.path.isfile(FLAGS.fullmodel_ckpt + '.index') saver.restore(sess, FLAGS.fullmodel_ckpt) # CIS_saver.restore(sess, FLAGS.CIS_ckpt) #saver.save(sess, '/home/yutong/Learning-to-manipulate-individual-objects-in-an-image-Implementation/save_checkpoint/md/model', global_step=0) myprint("resume model {}".format(FLAGS.fullmodel_ckpt)) fetches = { 'image_batch': graph.image_batch, 'generated_masks': graph.generated_masks, 'traverse_results': graph.traverse_results, 'out_bg': graph.out_bg, 'in_bg': graph.in_bg } assert FLAGS.batch_size == 1 input_img = convert2float(imageio.imread(FLAGS.input_img)) input_img = np.expand_dims(input_img, axis=0) results = sess.run(fetches, feed_dict={graph.image_batch0: input_img}) img = convert2int(results['image_batch'][0]) imageio.imwrite(os.path.join(FLAGS.checkpoint_dir, 'img.png'), img) for i in range(FLAGS.num_branch): imageio.imwrite( os.path.join(FLAGS.checkpoint_dir, 'segment_{}.png'.format(i)), convert2int(results['generated_masks'][0, :, :, :, i] * results['image_batch'][0])) outputs = np.array(results['traverse_results']) if FLAGS.traverse_type == 'tex': nch = 3 ndim = FLAGS.tex_dim elif FLAGS.traverse_type == 'bg': nch = 3 ndim = FLAGS.bg_dim else: nch = 1 ndim = FLAGS.mask_dim if FLAGS.traverse_type == 'bg': traverse_branch = [FLAGS.num_branch - 1] else: traverse_branch = [ i for i in range(0, FLAGS.num_branch) if FLAGS.traverse_branch == 'all' or str(i) in FLAGS.traverse_branch.split(',') ] traverse_value = list( np.linspace(FLAGS.traverse_start, FLAGS.traverse_end, 60)) if FLAGS.dataset == 'flying_animals': outputs = np.reshape(outputs, [ len(traverse_branch), FLAGS.top_kdim, len(traverse_value), FLAGS.img_height // 2, FLAGS.img_width // 2, -1 ]) else: outputs = np.reshape(outputs, [ len(traverse_branch), FLAGS.top_kdim, len(traverse_value), FLAGS.img_height, FLAGS.img_width, -1 ]) #tbranch * tdim * step * H * W * 3 branches = [] for i in range(len(traverse_branch)): values = [[None for jj in range(FLAGS.top_kdim)] for ii in range(len(traverse_value))] b = traverse_branch[i] out = outputs[i] #tdim * step* H * W * 3 for d in range(FLAGS.top_kdim): gif_imgs = [] for j in range(len(traverse_value)): img = (out[d, j, :, :, :] * 255).astype(np.uint8) gif_imgs.append(img) values[j][d] = pad_img(img) name = 'branch{}_var{}.gif'.format(b, d) imageio.mimsave(os.path.join(FLAGS.checkpoint_dir, name), gif_imgs, duration=1 / 30) #values len(traverse_value) * kdim (img) value_slices = [ np.concatenate(values[j], axis=1) for j in range(len(traverse_value)) ] #group different dimensions along the axis x #len(traverse_value)*(H*W*3) branches.append(value_slices) merge_slices = [ np.concatenate( [branches[i][j] for i in range(len(traverse_branch))], axis=0) for j in range(len(traverse_value)) ]