Example #1
0
def train(FLAGS):
    graph = Train_Graph(FLAGS)  
    graph.build()

    summary_op = Summary.collect_inpainter_summary(graph, FLAGS)

    with tf.name_scope("parameter_count"):
        total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                for v in tf.trainable_variables()])
        inpainter_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                for v in tf.trainable_variables(scope='Inpainter')])
    save_vars = tf.global_variables('Inpainter')+tf.global_variables('train_op')+tf.global_variables('summary_vars')
    saver = tf.train.Saver(save_vars, max_to_keep=100)
    sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "Inpainter_Sum"),
                                 global_step=graph.global_step,
                                 saver=saver, checkpoint_basename='Inpainter', save_model_secs=FLAGS.ckpt_secs, 
                                 summary_op=summary_op, #summary_writer=USE_DEFAULT, 
                                 save_summaries_secs=FLAGS.summaries_secs)

    with sv.managed_session() as sess:
        myprint ("Number of total params: {0} \n".format( \
            sess.run(total_parameter_count)))
        start_time = time.time()
        step = sess.run(graph.global_step)
        progbar = Progbar(target=100000) #100k
        while (time.time()-start_time)<FLAGS.max_training_hrs*3600:
            if sv.should_stop():
                break
            fetches = {
                "train_op":graph.train_ops['Inpainter'],
                "loss": graph.loss['Inpainter'],
                "global_step_inc": graph.incr_global_step
            }
            results = sess.run(fetches, feed_dict={graph.is_training: True})

            if step%1000 == 0:
                print ("   Step:%3dk time:%4.4fmin   InpainterLoss%4.2f "%(step/1000, 
                    (time.time()-start_time)/60, results['loss']))

            if step % 100000 == 0:
                progbar = Progbar(target=100000)
            progbar.update(step%100000)
            step += 1

        myprint("Training completed")
def save_log(source, trg_dir, print_flags_dict, sha):
    file_name = source.split('/')[-1]
    new_file = os.path.join(trg_dir, file_name)
    log_name = 'log'
    while os.path.isfile(new_file):
        new_file =  new_file[:-3]+'_c.sh' #.sh
        log_name += '_c'
    os.system('cp '+source+' '+ new_file)
    myprint ("Save "+source +" as "+new_file)
    log_file = os.path.join(trg_dir, log_name+'.txt')
    with open(log_file,'w') as log_stream:
        log_stream.write('commit:' + sha + '\n')
        pprint.pprint(print_flags_dict, log_stream)
    with open(new_file, 'a') as sh_stream:
        sh_stream.write('\n#commit:'+sha)
    myprint('Corresponding log file '+log_file)
    myinput("Enter to continue")
    os.system('chmod a=rx '+log_file)
    os.system('chmod a=rx '+new_file)
    return 
def train(FLAGS):
    # learner
    graph = Train_Graph(FLAGS)  
    graph.build()

    summary_op, tex_latent_summary_op, bg_latent_summary_op, eval_summary_op = Summary.collect_end2end_summary(graph, FLAGS)
    # train
    #define model saver
    with tf.name_scope("parameter_count"):
        total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                for v in tf.trainable_variables()])

    save_vars = tf.global_variables()
    # tf.global_variables('Inpainter')+tf.global_variables('Generator')+ \
    #     tf.global_variables('VAE')+tf.global_variables('Fusion') \
    #     +tf.global_variables('train_op') #including global step
    
    if FLAGS.resume_CIS:
        CIS_vars = tf.global_variables('Inpainter')+tf.global_variables('Generator')
        CIS_saver = tf.train.Saver(CIS_vars, max_to_keep=100)

    mask_saver = tf.train.Saver(tf.global_variables('VAE//separate/maskVAE/'), max_to_keep=100)
    tex_saver = tf.train.Saver(tf.global_variables('VAE//separate/texVAE/'), max_to_keep=100)

    saver = tf.train.Saver(save_vars, max_to_keep=100)
    branch_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir,'branch'+str(m))) for m in range(FLAGS.num_branch)]
    tex_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "tex_latent"+str(m))) for m in range(FLAGS.tex_dim)]
    bg_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "bg_latent"+str(m))) for m in range(FLAGS.bg_dim)]
    #mask_latent_writers =  [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "mask_latent"+str(m))) for m in range(FLAGS.mask_dim)]



    sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "end2end_Sum"),
                                 saver=None, save_summaries_secs=0)  #not saved automatically for flexibility

    with sv.managed_session() as sess:
        myprint ("Number of total params: {0} \n".format( \
            sess.run(total_parameter_count)))
        if FLAGS.resume_fullmodel:
            assert os.path.isfile(FLAGS.fullmodel_ckpt+'.index')
            saver.restore(sess, FLAGS.fullmodel_ckpt)
            myprint ("Resumed training from model {}".format(FLAGS.fullmodel_ckpt))
            myprint ("Start from step {} vae_step{}".format(sess.run(graph.global_step), sess.run(graph.vae_global_step)))
            myprint ("Save checkpoint in          {}".format(FLAGS.checkpoint_dir))
            if not os.path.dirname(FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir:
                print ("\033[0;30;41m"+"Warning: checkpoint dir and fullmodel ckpt do not match"+"\033[0m")
            #myprint ("Please make sure that the checkpoint will be saved in the same dir with the resumed model")
        else:
            if os.path.isfile(FLAGS.mask_ckpt+'.index'):
                mask_saver.restore(sess, FLAGS.mask_ckpt)
                myprint ("Load pretrained maskVAE {}".format(FLAGS.mask_ckpt))
            if os.path.isfile(FLAGS.tex_ckpt+'.index'):   
                tex_saver.restore(sess, FLAGS.tex_ckpt)
                myprint ("Load pretrained texVAE {}".format(FLAGS.tex_ckpt))
            if FLAGS.resume_CIS:
                assert os.path.isfile(FLAGS.CIS_ckpt+'.index')  
                CIS_saver.restore(sess, FLAGS.CIS_ckpt)
                myprint ("Load pretrained inpainter and generator {}".format(FLAGS.CIS_ckpt))
            else:
                myprint ("Train from scratch")
        myinput('Press enter to continue')

        start_time = time.time()
        step = sess.run(graph.global_step)
        vae_step = sess.run(graph.vae_global_step)
        progbar = Progbar(target=FLAGS.ckpt_steps) #100k

        sum_iters = FLAGS.iters_gen_vae + FLAGS.iters_inp

        while (time.time()-start_time)<FLAGS.max_training_hrs*3600:
            if sv.should_stop():
                break

            fetches = {"global_step_inc": graph.incr_global_step, "step": graph.global_step}

            if step%sum_iters < FLAGS.iters_inp:
                fetches['train_op'] = graph.train_ops['Inpainter']
                mask_capacity = vae_step*FLAGS.mask_capacity_inc
            else:
                fetches['train_op'] = graph.train_ops #'VAE//separate/texVAE/','VAE//separate/texVAE_BG/', 'VAE//fusion', 'Fusion'
                mask_capacity = vae_step*FLAGS.mask_capacity_inc  #-> should have an VAE step
                fetches['vae_global_step'], fetches['vae_global_step_inc'] = graph.vae_global_step, graph.incr_vae_global_step

            if step % FLAGS.summaries_steps == 0:
                fetches["Inpainter_Loss"],fetches["Generator_Loss"] = graph.loss['Inpainter'], graph.loss['Generator']
                fetches["VAE//texVAE"], fetches["VAE//texVAE_BG"], fetches['VAE//fusion'] = graph.loss['VAE//separate/texVAE/'], graph.loss['VAE//separate/texVAE_BG/'], graph.loss['VAE//fusion']
                fetches['tex_kl'], fetches['bg_kl'] = graph.loss['tex_kl'], graph.loss['bg_kl']
                fetches['summary'] = summary_op

            if step % FLAGS.ckpt_steps == 0:
                fetches['generated_masks'] = graph.generated_masks
                fetches['GT_masks'] = graph.GT_masks

            results = sess.run(fetches, feed_dict={graph.is_training: True, graph.mask_capacity: mask_capacity})
            progbar.update(step%FLAGS.ckpt_steps)

            if step % FLAGS.summaries_steps == 0 :
                print ("   Step:%3dk time:%4.4fmin   VAELoss%4.2f" \
                    %(step/1000, (time.time()-start_time)/60, results["VAE//texVAE"]+results['VAE//fusion']+results['VAE//texVAE_BG']))
                sv.summary_writer.add_summary(results['summary'], step)

                for d in range(FLAGS.tex_dim):
                    tex_summary = sess.run(tex_latent_summary_op, feed_dict={graph.loss['tex_kl_var']: results['tex_kl'][d]})
                    tex_latent_writers[d].add_summary(tex_summary, step)
                    
                for d in range(FLAGS.bg_dim):
                    bg_summary = sess.run(bg_latent_summary_op, feed_dict={graph.loss['bg_kl_var']: results['bg_kl'][d]})
                    bg_latent_writers[d].add_summary(bg_summary, step)

                # for d in range(FLAGS.mask_dim):
                #     mask_summary = sess.run(mask_latent_summary_op, feed_dict={graph.loss['mask_kl_var']: results['mask_kl'][d]})
                #     mask_latent_writers[d].add_summary(mask_summary, step)
                
              
            if step % FLAGS.ckpt_steps == 0:
                saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=step)
                progbar = Progbar(target=FLAGS.ckpt_steps)

                #evaluation
                sess.run(graph.val_iterator.initializer)
                fetches = {'GT_masks':graph.GT_masks, 'generated_masks':graph.generated_masks}

                if FLAGS.dataset in ['multi_texture', 'flying_animals']:
                    #note that for multi_texture bg_num is just a fake number it represents number of samples for each type of image
                    score = [[]]*FLAGS.max_num
                    for bg in range(FLAGS.bg_num):
                        results_val=sess.run(fetches, feed_dict={graph.is_training: False})
                        for k in range(FLAGS.max_num):
                            #score[k].append(Permute_IoU(results_val['GT_masks'][k], results_val['generated_masks'][k]))
                            score[k] = score[k] + [Permute_IoU(label=results_val['GT_masks'][k], pred=results_val['generated_masks'][k])]
                    for k in range(FLAGS.max_num):
                        eval_summary = sess.run(eval_summary_op, feed_dict={graph.loss['EvalIoU_var']: np.mean(score[k])})
                        branch_writers[k+1].add_summary(eval_summary, step)
                else:
                    num_sample = FLAGS.skipnum
                    niter = num_sample//FLAGS.batch_size
                    assert num_sample%FLAGS.batch_size==0
                    score = 0
                    for it in range(niter):
                        results_val = sess.run(fetches, feed_dict={graph.is_training:False})
                        for k in range(FLAGS.batch_size):
                            score += Permute_IoU(label=results_val['GT_masks'][k], pred=results_val['generated_masks'][k])
                    score = score/num_sample
                    eval_summary = sess.run(eval_summary_op, feed_dict={graph.loss['EvalIoU_var']: score})
                    sv.summary_writer.add_summary(eval_summary, step)

            step = results['step']
            vae_step = results['vae_global_step']

        myprint("Training completed")
def train(FLAGS):
    # learner
    graph = Train_Graph(FLAGS)  
    graph.build()

    summary_op, latent_summary_op = Summary.collect_globalVAE_summary(graph, FLAGS)
    # train
    #define model saver
    with tf.name_scope("parameter_count"):
        total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                for v in tf.trainable_variables()])

    save_vars = tf.global_variables()
    saver = tf.train.Saver(save_vars, max_to_keep=100)

    latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "latent"+str(m))) \
        for m in range(FLAGS.tex_dim)] 
    sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "globalVAE_Sum"),
                                 saver=None, save_summaries_secs=0)  #not saved automatically for flexibility

    with sv.managed_session() as sess:
        myprint ("Number of total params: {0} \n".format( \
            sess.run(total_parameter_count)))
        if FLAGS.resume_fullmodel:
            assert os.path.isfile(FLAGS.fullmodel_ckpt+'.index')
            saver.restore(sess, FLAGS.fullmodel_ckpt)
            myprint ("Resumed training from model {}".format(FLAGS.fullmodel_ckpt))
            myprint ("Start from step {}".format(sess.run(graph.global_step)))
            myprint ("Save checkpoint in          {}".format(FLAGS.checkpoint_dir))
            if not os.path.dirname(FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir:
                print ("\033[0;30;41m"+"Warning: checkpoint dir and fullmodel ckpt do not match"+"\033[0m")
            #myprint ("Please make sure that the checkpoint will be saved in the same dir with the resumed model")
        else:
            myprint ("Train from scratch")
        myinput('Press enter to continue')

        start_time = time.time()
        step = sess.run(graph.global_step)
        progbar = Progbar(target=FLAGS.ckpt_steps) #100k

        while (time.time()-start_time)<FLAGS.max_training_hrs*3600:
            if sv.should_stop():
                break

            fetches = {"global_step_inc": graph.incr_global_step, "step": graph.global_step, "train_op": graph.train_ops}

            if step % FLAGS.summaries_steps == 0:
                fetches["Loss"] = graph.loss
                fetches["kl_dim"] = graph.latent_loss_dim #dim,
                fetches['summary'] = summary_op

            results = sess.run(fetches)
            progbar.update(step%FLAGS.ckpt_steps)

            if step % FLAGS.summaries_steps == 0 :
                print ("   Step:%3dk time:%4.4fmin   Loss%4.2f  " \
                    %(step/1000, (time.time()-start_time)/60, results['Loss']))
                sv.summary_writer.add_summary(results['summary'], step)

                for m in range(FLAGS.tex_dim):
                    kl_summary = sess.run(latent_summary_op,
                        feed_dict={graph.kl_var: results['kl_dim'][m]})
                    latent_writers[m].add_summary(kl_summary, step)

        

            if step % FLAGS.ckpt_steps == 0:
                saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=step)
                progbar = Progbar(target=FLAGS.ckpt_steps)

            step = results['step']

        myprint("Training completed")
Example #5
0
def train(FLAGS):
    graph = Train_Graph(FLAGS)
    graph.build()

    summary_op, generator_summary_op, branch_summary_op, eval_summary_op = Summary.collect_CIS_summary(
        graph, FLAGS)
    with tf.name_scope("parameter_count"):
        total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                for v in tf.trainable_variables()])

    save_vars = tf.global_variables('Inpainter')+tf.global_variables('Generator')+ \
        tf.global_variables('train_op') #including global step
    if FLAGS.resume_inpainter:
        assert os.path.isfile(FLAGS.inpainter_ckpt + '.index')
        inpainter_saver = tf.train.Saver(tf.trainable_variables(
            'Inpainter'))  #only restore the trainable variables

    if FLAGS.resume_resnet:
        assert os.path.isfile(FLAGS.resnet_ckpt)
        resnet_reader = tf.compat.v1.train.NewCheckpointReader(
            FLAGS.resnet_ckpt)
        resnet_map = resnet_reader.get_variable_to_shape_map()
        resnet_dict = dict()
        for v in tf.trainable_variables('Generator//resnet_v2'):
            if 'resnet_v2_50/' + v.op.name[21:] in resnet_map.keys():
                resnet_dict['resnet_v2_50/' + v.op.name[21:]] = v
        resnet_var_name = [v.name for v in tf.trainable_variables('Generator//resnet_v2') \
                if 'resnet_v2_50/'+v.op.name[21:] in resnet_map.keys()]
        resnet_saver = tf.train.Saver(resnet_dict)

    saver = tf.train.Saver(save_vars, max_to_keep=100)
    branch_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "branch"+str(m))) \
        for m in range(FLAGS.num_branch)] #save generator loss for each branch
    sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir,
                                                 "CIS_Sum"),
                             saver=None,
                             save_summaries_secs=0)

    with sv.managed_session() as sess:
        myprint ("Number of total params: {0} \n".format( \
            sess.run(total_parameter_count)))
        if FLAGS.resume_fullmodel:
            assert os.path.isfile(FLAGS.fullmodel_ckpt + '.index')
            saver.restore(sess, FLAGS.fullmodel_ckpt)
            myprint("Resumed training from model {}".format(
                FLAGS.fullmodel_ckpt))
            myprint("Start from step {}".format(sess.run(graph.global_step)))
            myprint("Save checkpoint in          {}".format(
                FLAGS.checkpoint_dir))
            if not os.path.dirname(
                    FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir:
                print(
                    "\033[0;30;41m" +
                    "Warning: checkpoint dir and fullmodel ckpt do not match" +
                    "\033[0m")
                myprint(
                    "Please make sure that new checkpoint will be saved in the same dir with the resumed model"
                )
        else:
            if FLAGS.resume_inpainter:
                assert os.path.isfile(FLAGS.inpainter_ckpt + '.index')
                inpainter_saver.restore(sess, FLAGS.inpainter_ckpt)
                myprint("Load pretrained inpainter {}".format(
                    FLAGS.inpainter_ckpt))

            if FLAGS.resume_resnet:
                resnet_saver.restore(sess, FLAGS.resnet_ckpt)
                myprint("Load pretrained resnet {}".format(FLAGS.resnet_ckpt))
            if not FLAGS.resume_resnet and not FLAGS.resume_inpainter:
                myprint("Train from scratch")
        myinput('Press enter to continue')

        start_time = time.time()
        step = sess.run(graph.global_step)
        progbar = Progbar(target=FLAGS.ckpt_steps)  #100k

        sum_iters = FLAGS.iters_gen + FLAGS.iters_inp

        while (time.time() - start_time) < FLAGS.max_training_hrs * 3600:
            if sv.should_stop():
                break

            fetches = {
                "global_step_inc": graph.incr_global_step,
                "step": graph.global_step
            }

            if step % sum_iters < FLAGS.iters_inp:
                fetches['train_op'] = graph.train_ops['Inpainter']
            else:
                fetches['train_op'] = graph.train_ops['Generator']

            if step % FLAGS.summaries_steps == 0:
                fetches["Inpainter_Loss"], fetches["Generator_Loss"] \
                    = graph.loss['Inpainter'], graph.loss['Generator']
                fetches["Inpainter_branch_Loss"], fetches["Generator_branch_Loss"] \
                    = graph.loss['Inpainter_branch'], graph.loss['Generator_branch']
                fetches['Generator_Loss_denominator'] = graph.loss[
                    'Generator_denominator']
                fetches['summary'] = summary_op

            if step % FLAGS.ckpt_steps == 0:
                fetches['generated_masks'] = graph.generated_masks
                fetches['GT_masks'] = graph.GT_masks

            results = sess.run(fetches, feed_dict={graph.is_training: True})
            progbar.update(step % FLAGS.ckpt_steps)

            if step % FLAGS.summaries_steps == 0:
                print ("   Step:%3dk time:%4.4fmin   InpainterLoss%4.2f  GeneratorLoss%4.2f " \
                    %(step/1000, (time.time()-start_time)/60, results['Inpainter_Loss'], results['Generator_Loss']))
                sv.summary_writer.add_summary(results['summary'], step)

                generator_summary = sess.run(generator_summary_op,
                                             feed_dict={
                                                 graph.loss['Generator_var']:
                                                 results['Generator_Loss']
                                             })
                sv.summary_writer.add_summary(generator_summary, step)

                for m in range(FLAGS.num_branch):
                    branch_summary = sess.run(
                        branch_summary_op,
                        feed_dict={
                            graph.loss['Inpainter_branch_var']:
                            np.mean(results['Inpainter_branch_Loss'][:, m],
                                    axis=0),
                            graph.loss['Generator_branch_var']:
                            np.mean(results['Generator_branch_Loss'][:, m],
                                    axis=0),
                            graph.loss['Generator_denominator_var']:
                            np.mean(results['Generator_Loss_denominator'][:,
                                                                          m],
                                    axis=0)
                        })
                    branch_writers[m].add_summary(branch_summary, step)

            if step % FLAGS.ckpt_steps == 0:
                saver.save(sess,
                           os.path.join(FLAGS.checkpoint_dir, 'model'),
                           global_step=step)
                progbar = Progbar(target=FLAGS.ckpt_steps)

                #evaluation
                sess.run(graph.val_iterator.initializer)
                fetches = {
                    'GT_masks': graph.GT_masks,
                    'generated_masks': graph.generated_masks
                }

                num_sample = 200
                niter = num_sample // FLAGS.batch_size
                score = 0
                for it in range(niter):
                    results_val = sess.run(
                        fetches, feed_dict={graph.is_training: False})
                    for k in range(FLAGS.batch_size):
                        score += Permute_IoU(
                            label=results_val['GT_masks'][k],
                            pred=results_val['generated_masks'][k])
                score = score / num_sample
                eval_summary = sess.run(
                    eval_summary_op,
                    feed_dict={graph.loss['EvalIoU_var']: score})
                sv.summary_writer.add_summary(eval_summary, step)
            step = results['step']

        myprint("Training completed")
def train(FLAGS):
    # learner
    graph = Train_Graph(FLAGS)
    graph.build()

    summary_op, eval_summary_op = Summary.collect_PC_summary(graph, FLAGS)
    saver_vars = [v for v in tf.global_variables('Inpainter')+tf.global_variables('Generator')+ \
            tf.global_variables('VAE') + tf.global_variables('Fusion') if not 'Adam' in v.op.name]
    saver = tf.train.Saver(saver_vars, max_to_keep=100)

    sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir,
                                                 "end2end_Sum"),
                             saver=None,
                             save_summaries_secs=0)

    with sv.managed_session() as sess:
        assert os.path.isfile(FLAGS.fullmodel_ckpt + '.index')
        saver.restore(sess, FLAGS.fullmodel_ckpt)
        myprint("Finetune model {} for perceptual consistency".format(
            FLAGS.fullmodel_ckpt))

        saver.save(sess,
                   os.path.join(FLAGS.checkpoint_dir, 'newmodel'),
                   global_step=0)
        myinput('Press enter to continue')

        start_time = time.time()
        step = sess.run(graph.global_step)
        progbar = Progbar(target=FLAGS.ckpt_steps)  #100k
        sum_iters = FLAGS.iters_gen + FLAGS.iters_inp

        while (time.time() - start_time) < FLAGS.max_training_hrs * 3600:
            if sv.should_stop():
                break
            fetches = {
                "global_step_inc": graph.incr_global_step,
                "step": graph.global_step
            }
            if step % sum_iters < FLAGS.iters_inp:
                fetches['train_op'] = graph.train_ops['Inpainter']
            else:
                fetches['train_op'] = graph.train_ops['Generator']

            if step % FLAGS.summaries_steps == 0:
                fetches['summary'] = summary_op

            results = sess.run(fetches, feed_dict={graph.is_training: True})
            progbar.update(step % FLAGS.ckpt_steps)
            if step % FLAGS.summaries_steps == 0:
                print ("   Step:%3dk time:%4.4fmin" \
                    %(step/1000, (time.time()-start_time)/60))
                sv.summary_writer.add_summary(results['summary'], step)

            if step % FLAGS.ckpt_steps == 0:
                saver.save(sess,
                           os.path.join(FLAGS.checkpoint_dir, 'model'),
                           global_step=step)
                progbar = Progbar(target=FLAGS.ckpt_steps)

            if step % (100 * FLAGS.summaries_steps) == 0 and not step == 0:
                #evaluation
                sess.run(graph.val_iterator.initializer)
                fetches = {
                    'GT_masks': graph.GT_masks,
                    'generated_masks': graph.generated_masks
                }
                num_sample = 9 * 9 * 9 * 9 - 1
                niter = num_sample // FLAGS.batch_size
                assert num_sample % FLAGS.batch_size == 0
                score = 0
                arg_maxIoUs = []
                for it in range(niter):
                    results_val = sess.run(
                        fetches, feed_dict={graph.is_training: False})
                    for k in range(FLAGS.batch_size):
                        k_score, arg_maxIoU = Permute_IoU(
                            label=results_val['GT_masks'][k],
                            pred=results_val['generated_masks'][k])
                        score += k_score
                        arg_maxIoUs.append(arg_maxIoU)

                score = score / num_sample
                arg_maxIoUs = np.stack(arg_maxIoUs, axis=0)  #400, 3
                count = np.sum(
                    arg_maxIoUs,
                    axis=0)  #3    0 square // 1 ellipse // 2 background
                switching_rate = np.min(count) / num_sample
                eval_summary = sess.run(eval_summary_op,
                                        feed_dict={
                                            graph.loss['EvalIoU_var']: score,
                                            graph.switching_rate:
                                            switching_rate
                                        })
                sv.summary_writer.add_summary(eval_summary, step)

            step = results['step']

        myprint("Training completed")
Example #7
0
def train(FLAGS):
    # learner
    graph = Train_Graph(FLAGS)  
    graph.build()

    summary_op, tex_latent_summary_op, mask_latent_summary_op, bg_latent_summary_op = Summary.collect_VAE_summary(graph, FLAGS)
    # train
    #define model saver
    with tf.name_scope("parameter_count"):
        total_parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \
                                for v in tf.trainable_variables()])

    save_vars = tf.global_variables()
    
    if FLAGS.resume_CIS:
        CIS_vars = tf.global_variables('Inpainter')+tf.global_variables('Generator')
        CIS_saver = tf.train.Saver(CIS_vars, max_to_keep=100)

    saver = tf.train.Saver(save_vars, max_to_keep=100)
    tex_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "tex_latent"+str(m))) for m in range(FLAGS.tex_dim)]
    bg_latent_writers = [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "bg_latent"+str(m))) for m in range(FLAGS.bg_dim)]
    mask_latent_writers =  [tf.summary.FileWriter(os.path.join(FLAGS.checkpoint_dir, "mask_latent"+str(m))) for m in range(FLAGS.mask_dim)]



    sv = tf.train.Supervisor(logdir=os.path.join(FLAGS.checkpoint_dir, "VAE_Sum"),
                                 saver=None, save_summaries_secs=0)  #not saved automatically for flexibility

    with sv.managed_session() as sess:
        myprint ("Number of total params: {0} \n".format( \
            sess.run(total_parameter_count)))
        if FLAGS.resume_fullmodel:
            assert os.path.isfile(FLAGS.fullmodel_ckpt+'.index')
            saver.restore(sess, FLAGS.fullmodel_ckpt)
            myprint ("Resumed training from model {}".format(FLAGS.fullmodel_ckpt))
            myprint ("Start from vae_step{}".format(sess.run(graph.vae_global_step)))
            myprint ("Save checkpoint in          {}".format(FLAGS.checkpoint_dir))
            if not os.path.dirname(FLAGS.fullmodel_ckpt) == FLAGS.checkpoint_dir:
                print ("\033[0;30;41m"+"Warning: checkpoint dir and fullmodel ckpt do not match"+"\033[0m")
            #myprint ("Please make sure that the checkpoint will be saved in the same dir with the resumed model")
        else:
            if FLAGS.resume_CIS:
                assert os.path.isfile(FLAGS.CIS_ckpt+'.index')  
                CIS_saver.restore(sess, FLAGS.CIS_ckpt)
                myprint ("Load pretrained inpainter and generator {}".format(FLAGS.CIS_ckpt))
            else:
                myprint ("Train from scratch")
        myinput('Press enter to continue')

        start_time = time.time()
        #step = sess.run(graph.global_step)
        vae_step = sess.run(graph.vae_global_step)
        progbar = Progbar(target=FLAGS.ckpt_steps) #100k

        while (time.time()-start_time)<FLAGS.max_training_hrs*3600:
            if sv.should_stop():
                break

            fetches = {"vae_global_step_inc": graph.incr_vae_global_step, "vae_step": graph.vae_global_step}
            fetches['train_op'] = graph.train_ops
            mask_capacity = vae_step*FLAGS.mask_capacity_inc  #-> should have an VAE step

            if vae_step % FLAGS.summaries_steps == 0:
                fetches['tex_kl'], fetches['mask_kl'], fetches['bg_kl'] = graph.loss['tex_kl'], graph.loss['mask_kl'], graph.loss['bg_kl']
                fetches['Fusion'] = graph.loss['Fusion']
                fetches['summary'] = summary_op


            results = sess.run(fetches, feed_dict={graph.is_training: True, graph.mask_capacity: mask_capacity})
            progbar.update(vae_step%FLAGS.ckpt_steps)

            if vae_step % FLAGS.summaries_steps == 0 :
                print ("   Step:%3dk time:%4.4fmin " \
                    %(vae_step/1000, (time.time()-start_time)/60))
                sv.summary_writer.add_summary(results['summary'], vae_step)

                for d in range(FLAGS.tex_dim):
                    tex_summary = sess.run(tex_latent_summary_op, feed_dict={graph.loss['tex_kl_var']: results['tex_kl'][d]})
                    tex_latent_writers[d].add_summary(tex_summary, vae_step)
                    
                for d in range(FLAGS.bg_dim):
                    bg_summary = sess.run(bg_latent_summary_op, feed_dict={graph.loss['bg_kl_var']: results['bg_kl'][d]})
                    bg_latent_writers[d].add_summary(bg_summary, vae_step)

                for d in range(FLAGS.mask_dim):
                    mask_summary = sess.run(mask_latent_summary_op, feed_dict={graph.loss['mask_kl_var']: results['mask_kl'][d]})
                    mask_latent_writers[d].add_summary(mask_summary, vae_step)
                
              
            if vae_step % FLAGS.ckpt_steps == 0:
                saver.save(sess, os.path.join(FLAGS.checkpoint_dir, 'model'), global_step=vae_step)
                progbar = Progbar(target=FLAGS.ckpt_steps)

            vae_step = results['vae_step']

        myprint("Training completed")
def eval(FLAGS):
    graph = Traverse_Graph(FLAGS)
    graph.build()

    restore_vars = tf.global_variables('VAE') + tf.global_variables(
        'Generator') + tf.global_variables('Fusion')
    saver = tf.train.Saver(restore_vars)

    #CIS_saver = tf.train.Saver(tf.global_variables('Generator'))
    with tf.Session() as sess:
        sess.run(tf.compat.v1.global_variables_initializer())
        assert os.path.isfile(FLAGS.fullmodel_ckpt + '.index')
        saver.restore(sess, FLAGS.fullmodel_ckpt)
        # CIS_saver.restore(sess, FLAGS.CIS_ckpt)

        #saver.save(sess, '/home/yutong/Learning-to-manipulate-individual-objects-in-an-image-Implementation/save_checkpoint/md/model', global_step=0)
        myprint("resume model {}".format(FLAGS.fullmodel_ckpt))
        fetches = {
            'image_batch': graph.image_batch,
            'generated_masks': graph.generated_masks,
            'traverse_results': graph.traverse_results,
            'out_bg': graph.out_bg,
            'in_bg': graph.in_bg
        }
        assert FLAGS.batch_size == 1
        input_img = convert2float(imageio.imread(FLAGS.input_img))
        input_img = np.expand_dims(input_img, axis=0)

        results = sess.run(fetches, feed_dict={graph.image_batch0: input_img})
        img = convert2int(results['image_batch'][0])

        imageio.imwrite(os.path.join(FLAGS.checkpoint_dir, 'img.png'), img)
        for i in range(FLAGS.num_branch):
            imageio.imwrite(
                os.path.join(FLAGS.checkpoint_dir, 'segment_{}.png'.format(i)),
                convert2int(results['generated_masks'][0, :, :, :, i] *
                            results['image_batch'][0]))

        outputs = np.array(results['traverse_results'])

        if FLAGS.traverse_type == 'tex':
            nch = 3
            ndim = FLAGS.tex_dim
        elif FLAGS.traverse_type == 'bg':
            nch = 3
            ndim = FLAGS.bg_dim
        else:
            nch = 1
            ndim = FLAGS.mask_dim

        if FLAGS.traverse_type == 'bg':
            traverse_branch = [FLAGS.num_branch - 1]
        else:
            traverse_branch = [
                i for i in range(0, FLAGS.num_branch)
                if FLAGS.traverse_branch == 'all'
                or str(i) in FLAGS.traverse_branch.split(',')
            ]
        traverse_value = list(
            np.linspace(FLAGS.traverse_start, FLAGS.traverse_end, 60))

        if FLAGS.dataset == 'flying_animals':
            outputs = np.reshape(outputs, [
                len(traverse_branch), FLAGS.top_kdim,
                len(traverse_value), FLAGS.img_height // 2,
                FLAGS.img_width // 2, -1
            ])
        else:
            outputs = np.reshape(outputs, [
                len(traverse_branch), FLAGS.top_kdim,
                len(traverse_value), FLAGS.img_height, FLAGS.img_width, -1
            ])
        #tbranch * tdim * step *  H * W * 3

        branches = []
        for i in range(len(traverse_branch)):
            values = [[None for jj in range(FLAGS.top_kdim)]
                      for ii in range(len(traverse_value))]
            b = traverse_branch[i]
            out = outputs[i]  #tdim * step* H * W * 3
            for d in range(FLAGS.top_kdim):
                gif_imgs = []
                for j in range(len(traverse_value)):
                    img = (out[d, j, :, :, :] * 255).astype(np.uint8)
                    gif_imgs.append(img)
                    values[j][d] = pad_img(img)
                name = 'branch{}_var{}.gif'.format(b, d)
                imageio.mimsave(os.path.join(FLAGS.checkpoint_dir, name),
                                gif_imgs,
                                duration=1 / 30)

            #values  len(traverse_value) * kdim (img)
            value_slices = [
                np.concatenate(values[j], axis=1)
                for j in range(len(traverse_value))
            ]  #group different dimensions along the axis x
            #len(traverse_value)*(H*W*3)
            branches.append(value_slices)
        merge_slices = [
            np.concatenate(
                [branches[i][j] for i in range(len(traverse_branch))], axis=0)
            for j in range(len(traverse_value))
        ]