Esempio n. 1
0
def main(_):
    args = config_distill.get_args_for_config(FLAGS.config_name)
    args.logdir = FLAGS.logdir
    args.solver.num_workers = FLAGS.num_workers
    args.solver.task = FLAGS.task
    args.solver.ps_tasks = FLAGS.ps_tasks
    args.solver.master = FLAGS.master

    args.buildinger.env_class = nav_env.MeshMapper
    fu.makedirs(args.logdir)
    args.buildinger.logdir = args.logdir
    R = nav_env.get_multiplexor_class(args.buildinger, args.solver.task)

    if False:
        pr = cProfile.Profile()
        pr.enable()
        rng = np.random.RandomState(0)
        for i in range(1):
            b, instances_perturbs = R.sample_building(rng)
            inputs = b.worker(*(instances_perturbs))
            for j in range(inputs['imgs'].shape[0]):
                p = os.path.join('tmp', '{:d}.png'.format(j))
                img = inputs['imgs'][j, 0, :, :, :3] * 1
                img = (img).astype(np.uint8)
                fu.write_image(p, img)
            print(inputs['imgs'].shape)
            inputs = R.pre(inputs)
        pr.disable()
        pr.print_stats(2)

    if args.control.train:
        if not gfile.Exists(args.logdir):
            gfile.MakeDirs(args.logdir)

        m = utils.Foo()
        m.tf_graph = tf.Graph()

        config = tf.ConfigProto()
        config.device_count['GPU'] = 1
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.8

        with m.tf_graph.as_default():
            with tf.device(tf.train.replica_device_setter(
                    args.solver.ps_tasks)):
                m = distill.setup_to_run(m,
                                         args,
                                         is_training=True,
                                         batch_norm_is_training=True)

                train_step_kwargs = distill.setup_train_step_kwargs_mesh(
                    m,
                    R,
                    os.path.join(args.logdir, 'train_bkp'),
                    rng_seed=args.solver.task,
                    is_chief=args.solver.task == 0,
                    iters=1,
                    train_display_interval=args.summary.display_interval)

                final_loss = slim.learning.train(
                    train_op=m.train_op,
                    logdir=args.logdir,
                    master=args.solver.master,
                    is_chief=args.solver.task == 0,
                    number_of_steps=args.solver.max_steps,
                    train_step_fn=tf_utils.train_step_custom,
                    train_step_kwargs=train_step_kwargs,
                    global_step=m.global_step_op,
                    init_op=m.init_op,
                    init_fn=m.init_fn,
                    sync_optimizer=m.sync_optimizer,
                    saver=m.saver_op,
                    summary_op=None,
                    session_config=config)

    if args.control.test:
        m = utils.Foo()
        m.tf_graph = tf.Graph()
        checkpoint_dir = os.path.join(format(args.logdir))
        with m.tf_graph.as_default():
            m = distill.setup_to_run(m,
                                     args,
                                     is_training=False,
                                     batch_norm_is_training=args.control.
                                     force_batchnorm_is_training_at_test)

            train_step_kwargs = distill.setup_train_step_kwargs_mesh(
                m,
                R,
                os.path.join(args.logdir, args.control.test_name),
                rng_seed=args.solver.task + 1,
                is_chief=args.solver.task == 0,
                iters=args.summary.test_iters,
                train_display_interval=None)

            sv = slim.learning.supervisor.Supervisor(
                graph=ops.get_default_graph(),
                logdir=None,
                init_op=m.init_op,
                summary_op=None,
                summary_writer=None,
                global_step=None,
                saver=m.saver_op)

            last_checkpoint = None
            while True:
                last_checkpoint = slim.evaluation.wait_for_new_checkpoint(
                    checkpoint_dir, last_checkpoint)
                checkpoint_iter = int(
                    os.path.basename(last_checkpoint).split('-')[1])
                start = time.time()
                logging.info(
                    'Starting evaluation at %s using checkpoint %s.',
                    time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()),
                    last_checkpoint)

                config = tf.ConfigProto()
                config.device_count['GPU'] = 1
                config.gpu_options.allow_growth = True
                config.gpu_options.per_process_gpu_memory_fraction = 0.8

                with sv.managed_session(args.solver.master,
                                        config=config,
                                        start_standard_services=False) as sess:
                    sess.run(m.init_op)
                    sv.saver.restore(sess, last_checkpoint)
                    sv.start_queue_runners(sess)
                    vals, _ = tf_utils.train_step_custom(sess,
                                                         None,
                                                         m.global_step_op,
                                                         train_step_kwargs,
                                                         mode='val')
                    if checkpoint_iter >= args.solver.max_steps:
                        break
Esempio n. 2
0
def main(_):
  args = config_distill.get_args_for_config(FLAGS.config_name)
  args.logdir = FLAGS.logdir
  args.solver.num_workers = FLAGS.num_workers
  args.solver.task = FLAGS.task
  args.solver.ps_tasks = FLAGS.ps_tasks
  args.solver.master = FLAGS.master
  
  args.buildinger.env_class = nav_env.MeshMapper
  fu.makedirs(args.logdir)
  args.buildinger.logdir = args.logdir
  R = nav_env.get_multiplexor_class(args.buildinger, args.solver.task)
  
  if False:
    pr = cProfile.Profile()
    pr.enable()
    rng = np.random.RandomState(0)
    for i in range(1):
      b, instances_perturbs = R.sample_building(rng)
      inputs = b.worker(*(instances_perturbs))
      for j in range(inputs['imgs'].shape[0]):
        p = os.path.join('tmp', '{:d}.png'.format(j))
        img = inputs['imgs'][j,0,:,:,:3]*1
        img = (img).astype(np.uint8)
        fu.write_image(p, img)
      print(inputs['imgs'].shape)
      inputs = R.pre(inputs)
    pr.disable()
    pr.print_stats(2)

  if args.control.train:
    if not gfile.Exists(args.logdir):
      gfile.MakeDirs(args.logdir)
   
    m = utils.Foo()
    m.tf_graph = tf.Graph()
    
    config = tf.ConfigProto()
    config.device_count['GPU'] = 1
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.8
    
    with m.tf_graph.as_default():
      with tf.device(tf.train.replica_device_setter(args.solver.ps_tasks)):
        m = distill.setup_to_run(m, args, is_training=True,
                                batch_norm_is_training=True)

        train_step_kwargs = distill.setup_train_step_kwargs_mesh(
            m, R, os.path.join(args.logdir, 'train'),
            rng_seed=args.solver.task, is_chief=args.solver.task==0, iters=1,
            train_display_interval=args.summary.display_interval)

        final_loss = slim.learning.train(
            train_op=m.train_op,
            logdir=args.logdir,
            master=args.solver.master,
            is_chief=args.solver.task == 0,
            number_of_steps=args.solver.max_steps,
            train_step_fn=tf_utils.train_step_custom,
            train_step_kwargs=train_step_kwargs,
            global_step=m.global_step_op,
            init_op=m.init_op,
            init_fn=m.init_fn,
            sync_optimizer=m.sync_optimizer,
            saver=m.saver_op,
            summary_op=None, session_config=config)
 
  if args.control.test:
    m = utils.Foo()
    m.tf_graph = tf.Graph()
    checkpoint_dir = os.path.join(format(args.logdir))
    with m.tf_graph.as_default():
      m = distill.setup_to_run(m, args, is_training=False,
                              batch_norm_is_training=args.control.force_batchnorm_is_training_at_test)
      
      train_step_kwargs = distill.setup_train_step_kwargs_mesh(
          m, R, os.path.join(args.logdir, args.control.test_name),
          rng_seed=args.solver.task+1, is_chief=args.solver.task==0,
          iters=args.summary.test_iters, train_display_interval=None)
      
      sv = slim.learning.supervisor.Supervisor(
          graph=ops.get_default_graph(), logdir=None, init_op=m.init_op,
          summary_op=None, summary_writer=None, global_step=None, saver=m.saver_op)

      last_checkpoint = None
      while True:
        last_checkpoint = slim.evaluation.wait_for_new_checkpoint(checkpoint_dir, last_checkpoint)
        checkpoint_iter = int(os.path.basename(last_checkpoint).split('-')[1])
        start = time.time()
        logging.info('Starting evaluation at %s using checkpoint %s.', 
                     time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()),
                     last_checkpoint)
        
        config = tf.ConfigProto()
        config.device_count['GPU'] = 1
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.8
        
        with sv.managed_session(args.solver.master,config=config,
                                start_standard_services=False) as sess:
          sess.run(m.init_op)
          sv.saver.restore(sess, last_checkpoint)
          sv.start_queue_runners(sess)
          vals, _ = tf_utils.train_step_custom(
              sess, None, m.global_step_op, train_step_kwargs, mode='val')
          if checkpoint_iter >= args.solver.max_steps:
            break