Exemple #1
0
  def testLocalTrainOp(self):
    g = tf.Graph()
    with g.as_default():
      tf.set_random_seed(0)
      tf_inputs = tf.constant(self._inputs, dtype=tf.float32)
      tf_labels = tf.constant(self._labels, dtype=tf.float32)

      model_fn = BatchNormClassifier
      model_args = (tf_inputs, tf_labels)
      deploy_config = model_deploy.DeploymentConfig(num_clones=2,
                                                    clone_on_cpu=True)

      optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0)

      self.assertEqual(slim.get_variables(), [])
      model = model_deploy.deploy(deploy_config, model_fn, model_args,
                                  optimizer=optimizer)

      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      self.assertEqual(len(update_ops), 4)
      self.assertEqual(len(model.clones), 2)
      self.assertEqual(model.total_loss.op.name, 'total_loss')
      self.assertEqual(model.summary_op.op.name, 'summary_op/summary_op')
      self.assertEqual(model.train_op.op.name, 'train_op')

      with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        moving_mean = tf.contrib.framework.get_variables_by_name(
            'moving_mean')[0]
        moving_variance = tf.contrib.framework.get_variables_by_name(
            'moving_variance')[0]
        initial_loss = sess.run(model.total_loss)
        initial_mean, initial_variance = sess.run([moving_mean,
                                                   moving_variance])
        self.assertAllClose(initial_mean, [0.0, 0.0, 0.0, 0.0])
        self.assertAllClose(initial_variance, [1.0, 1.0, 1.0, 1.0])
        for _ in range(10):
          sess.run(model.train_op)
        final_loss = sess.run(model.total_loss)
        self.assertLess(final_loss, initial_loss / 5.0)

        final_mean, final_variance = sess.run([moving_mean,
                                               moving_variance])
        expected_mean = np.array([0.125, 0.25, 0.375, 0.25])
        expected_var = np.array([0.109375, 0.1875, 0.234375, 0.1875])
        expected_var = self._addBesselsCorrection(16, expected_var)
        self.assertAllClose(final_mean, expected_mean)
        self.assertAllClose(final_variance, expected_var)
Exemple #2
0
  def testNoSummariesOnGPUForEvals(self):
    with tf.Graph().as_default():
      deploy_config = model_deploy.DeploymentConfig(num_clones=2)

      # clone function creates a fully_connected layer with a regularizer loss.
      def ModelFn():
        inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
        reg = tf.contrib.layers.l2_regularizer(0.001)
        tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)

      # No optimizer here, it's an eval.
      model = model_deploy.deploy(deploy_config, ModelFn)
      # The model summary op should have a few summary inputs and all of them
      # should be on the CPU.
      self.assertTrue(model.summary_op.op.inputs)
      for inp in  model.summary_op.op.inputs:
        self.assertEqual('/device:CPU:0', inp.device)
  def testNoSummariesOnGPU(self):
    with tf.Graph().as_default():
      deploy_config = model_deploy.DeploymentConfig(num_clones=2)

      # clone function creates a fully_connected layer with a regularizer loss.
      def ModelFn():
        inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
        reg = tf.keras.regularizers.l2(0.5 * (0.001))
        tf.contrib.layers.fully_connected(inputs, 30, weights_regularizer=reg)

      model = model_deploy.deploy(
          deploy_config, ModelFn,
          optimizer=tf.compat.v1.train.GradientDescentOptimizer(1.0))
      # The model summary op should have a few summary inputs and all of them
      # should be on the CPU.
      self.assertTrue(model.summary_op.op.inputs)
      for inp in  model.summary_op.op.inputs:
        self.assertEqual('/device:CPU:0', inp.device)
Exemple #4
0
    def testNoSummariesOnGPUForEvals(self):
        with tf.Graph().as_default():
            deploy_config = model_deploy.DeploymentConfig(num_clones=2)

            # clone function creates a fully_connected layer with a regularizer loss.
            def ModelFn():
                inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
                reg = tf.contrib.layers.l2_regularizer(0.001)
                tf.contrib.layers.fully_connected(inputs,
                                                  30,
                                                  weights_regularizer=reg)

            # No optimizer here, it's an eval_bkp.
            model = model_deploy.deploy(deploy_config, ModelFn)
            # The model summary op should have a few summary inputs and all of them
            # should be on the CPU.
            self.assertTrue(model.summary_op.op.inputs)
            for inp in model.summary_op.op.inputs:
                self.assertEqual('/device:CPU:0', inp.device)
def _run(args):
    network = networks.catalogue[args.network](args)

    if args.cpu:
        deploy_config = _configure_deployment(num_clones=1, clone_on_cpu=True)
        sess = tf.Session(config=tf.ConfigProto(
            inter_op_parallelism_threads=args.num_inter_threads,
            intra_op_parallelism_threads=args.num_intra_threads))

    else:
        deploy_config = _configure_deployment(num_clones=args.num_gpus,
                                              clone_on_cpu=False)
        sess = tf.Session(config=_configure_session(
            inter=args.num_inter_threads, intra=args.num_intra_threads))

    with tf.device(deploy_config.variables_device()):
        global_step = tf.train.create_global_step()

    with tf.device(deploy_config.optimizer_device()):
        lr = tf.train.polynomial_decay(args.learning_rate,
                                       global_step=global_step,
                                       end_learning_rate=args.min_lr,
                                       decay_steps=args.decay_steps,
                                       power=1,
                                       cycle=True)
        if args.optimizer.startswith('a'):
            optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        elif args.optimizer.startswith('r'):
            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=lr,
                momentum=0.9,
                epsilon=1.0,
            )
        else:
            optimizer = tf.train.MomentumOptimizer(
                learning_rate=lr,
                momentum=0.9,
                use_nesterov=True,
            )
    '''Inputs'''
    with tf.device(deploy_config.inputs_device()), tf.name_scope('inputs'):
        pipeline = inputs.Pipeline(args, sess)
        examples, labels = pipeline.data
        images = examples['image']

        image_splits = tf.split(value=images,
                                num_or_size_splits=deploy_config.num_clones,
                                name='split_images')
        label_splits = tf.split(value=labels,
                                num_or_size_splits=deploy_config.num_clones,
                                name='split_labels')
    '''Model Creation'''
    model_dp = model_deploy.deploy(
        config=deploy_config,
        model_fn=_clone_fn,
        optimizer=optimizer,
        kwargs={
            'images': image_splits,
            'labels': label_splits,
            'index_iter': iter(range(deploy_config.num_clones)),
            'network': network,
            'is_training': False  # pipeline.is_training
        })
    '''Metrics'''
    with tf.name_scope('outputs'):
        train_metrics = metrics.Metrics(
            labels=labels,
            clone_logits=[
                clone.outputs['logits'] for clone in model_dp.clones
            ],
            clone_predictions=[
                clone.outputs['predictions'] for clone in model_dp.clones
            ],
            device=deploy_config.variables_device(),
            name='training')
        validation_metrics = metrics.Metrics(
            labels=labels,
            clone_logits=[
                clone.outputs['logits'] for clone in model_dp.clones
            ],
            clone_predictions=[
                clone.outputs['predictions'] for clone in model_dp.clones
            ],
            device=deploy_config.variables_device(),
            name='validation',
            padded_data=True)
        validation_init_op = tf.group(pipeline.validation_iterator.initializer,
                                      validation_metrics.reset_op)
        train_op = tf.group(model_dp.train_op, train_metrics.update_op)
    '''Summaries'''
    with tf.device(deploy_config.variables_device()):
        train_writer = tf.summary.FileWriter(args.model_dir, sess.graph)
        eval_dir = os.path.join(args.model_dir, 'eval')
        eval_writer = tf.summary.FileWriter(eval_dir, sess.graph)
        tf.summary.scalar('accuracy', train_metrics.accuracy)
        tf.summary.scalar('loss', model_dp.total_loss)
        tf.summary.scalar('learning_rate', lr)
        all_summaries = tf.summary.merge_all()
    '''Model Checkpoints'''
    saver = tf.train.Saver(max_to_keep=args.keep_last_n_checkpoints)
    save_path = os.path.join(args.model_dir, 'model.ckpt')
    '''Model Initialization'''
    last_checkpoint = tf.train.latest_checkpoint(args.model_dir)
    if last_checkpoint:
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
        saver.restore(sess, last_checkpoint)
    else:
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())
        sess.run(init_op)
    starting_step = sess.run(global_step)
    '''Save pb graph that C++ can load and run.'''
    tf.train.write_graph(sess.graph_def, './graph/', 'sqz.pb', False)

    def _eval(args):
        dtime = []
        for i in range(args.eval_steps):
            try:
                if args.timeline > 0 and i > 0 and i % args.timeline == 0:
                    options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()

                    t0 = time.time()
                    sess.run(
                        fetches=validation_metrics.update_op,
                        feed_dict=pipeline.validation_data,
                        options=options,
                        run_metadata=run_metadata,
                    )
                    duration = time.time() - t0

                    fetched_timeline = timeline.Timeline(
                        run_metadata.step_stats)
                    chrome_trace = fetched_timeline.\
                        generate_chrome_trace_format()
                    with open(
                            'timeline/timeline_cpu_clone1_{0:03}.json'.format(
                                i), 'w') as f:
                        f.write(chrome_trace)
                else:
                    t0 = time.time()
                    sess.run(
                        fetches=validation_metrics.update_op,
                        feed_dict=pipeline.validation_data,
                    )
                    duration = time.time() - t0

                if args.verbose:
                    print('step {:03}: {:.4f} sec'.format(i, duration))
                dtime.append(duration)

            except:
                i = i - 1
                break

        num_examples = (i) * args.batch_size
        if len(dtime) <= 1:
            print('Only one warm-up step was executed! Run more steps')
        else:
            warmup = dtime[0]
            dtime = dtime[1:]
            t_sum = np.sum(dtime)
            t_mean = np.mean(dtime)
            t_median = np.median(dtime)
            t_min = np.min(dtime)
            t_max = np.max(dtime)
            t_std = np.std(dtime)
            """
            print('''{:.0f} batches x {:.0f} bs = total {:.0f} images
                throughput[avg] = {:.1f} ips 
                throughput[med] = {:.1f} ips
                latency[median] = {:>.4} ms
                latency[averge] = {:>.4} ms''' 
                    .format( i, args.batch_size, num_examples, 
                        num_examples/t_sum,
                        args.batch_size/t_median,
                        t_median*1000/args.batch_size,
                        t_sum*1000/num_examples))
            """
            print('''SqueezeNet Inference Summary:
            {:.0f} batches x {:.0f} bs = total {:.0f} images evaluated
            batch size = {}
            throughput[med] = {:.1f} image/sec
            latency[median] = {:>.4} ms
            '''.format(i, args.batch_size, num_examples, args.batch_size,
                       args.batch_size / t_median,
                       t_median * 1000 / args.batch_size))

        # Reinitialize dataset and metrics after going through all validation
        # examples
        sess.run(validation_init_op)

    def _train(args):
        '''Main Loop'''
        for train_step in range(starting_step, args.max_train_steps):
            sess.run(train_op, feed_dict=pipeline.training_data)
            '''Summary Hook'''
            if train_step % args.summary_interval == 0:
                results = sess.run(fetches={
                    'accuracy': train_metrics.accuracy,
                    'summary': all_summaries
                },
                                   feed_dict=pipeline.training_data)
                train_writer.add_summary(results['summary'], train_step)

                print('*** Step {:<5}'.format(train_step))
                print('Train: acc= {:>.4}%'.format(results['accuracy'] * 100))
            '''Checkpoint Hooks'''
            if train_step % args.checkpoint_interval == 0:
                saver.save(sess, save_path, global_step)

            sess.run(train_metrics.reset_op)
            '''Eval Hook'''
            if train_step % args.validation_interval == 0:
                while True:
                    try:
                        sess.run(fetches=validation_metrics.update_op,
                                 feed_dict=pipeline.validation_data)
                    except tf.errors.OutOfRangeError:
                        break
                results = sess.run({'accuracy': validation_metrics.accuracy})
                print('Validation: acc= {:>.4}%'.format(results['accuracy'] *
                                                        100))

                summary = tf.Summary(value=[
                    tf.Summary.Value(tag='accuracy',
                                     simple_value=results['accuracy']),
                ])
                eval_writer.add_summary(summary, train_step)

                # Reinitialize dataset and metrics
                sess.run(validation_init_op)

    if args.inference_only:
        _eval(args)
    else:
        _train(args)