Example #1
0
    def __build_layer_ops(self):
        """Build layer-wise fine-tuning operations.
        Returns:
        * layer_ops: list of training and initialization operations for each layer
        * lrn_rates_pgd: list of layer-wise learning rate
        * prune_perctls: list of layer-wise pruning percentiles
        """

        layer_ops = []
        lrn_rates_pgd = []  # list of layer-wise learning rate
        prune_perctls = []  # list of layer-wise pruning percentiles
        for idx, var_prnd in enumerate(self.vars_prnd['maskable']):
            # create placeholders
            lrn_rate_pgd = tf.placeholder(tf.float32,
                                          shape=[],
                                          name='lrn_rate_pgd_%d' % idx)
            prune_perctl = tf.placeholder(tf.float32,
                                          shape=[],
                                          name='prune_perctl_%d' % idx)

            # select channels for the current convolutional layer
            optimizer = tf.train.GradientDescentOptimizer(lrn_rate_pgd)
            if FLAGS.enbl_multi_gpu:
                optimizer = mgw.DistributedOptimizer(optimizer)
            grads = optimizer.compute_gradients(self.reg_losses[idx],
                                                [var_prnd])
            with tf.control_dependencies(self.update_ops_all):
                var_prnd_new = var_prnd - lrn_rate_pgd * grads[0][0]
                var_norm = tf.sqrt(
                    tf.reduce_sum(tf.square(var_prnd_new),
                                  axis=[0, 1, 3],
                                  keepdims=True))
                threshold = tf.contrib.distributions.percentile(
                    var_norm, prune_perctl)
                shrk_vec = tf.maximum(1.0 - threshold / var_norm, 0.0)
                prune_op = var_prnd.assign(var_prnd_new * shrk_vec)

            # fine-tune with selected channels only
            optimizer_base = tf.train.AdamOptimizer(FLAGS.cpg_lrn_rate_adam)
            if not FLAGS.enbl_multi_gpu:
                optimizer = optimizer_base
            else:
                optimizer = mgw.DistributedOptimizer(optimizer_base)
            grads_origin = optimizer.compute_gradients(self.reg_losses[idx],
                                                       [var_prnd])
            grads_pruned = self.__calc_grads_pruned(grads_origin)
            with tf.control_dependencies(self.update_ops_all):
                finetune_op = optimizer.apply_gradients(grads_pruned)
            init_opt_op = tf.variables_initializer(optimizer_base.variables())

            # append layer-wise operations & variables
            layer_ops += [{
                'prune': prune_op,
                'finetune': finetune_op,
                'init_opt': init_opt_op
            }]
            lrn_rates_pgd += [lrn_rate_pgd]
            prune_perctls += [prune_perctl]

        return layer_ops, lrn_rates_pgd, prune_perctls
Example #2
0
    def __init__(self, sm_writer, model_helper):
        """Constructor function.

    Args:
    * sm_writer: TensorFlow's summary writer
    * model_helper: model helper with definitions of model & dataset
    """

        # initialize attributes
        self.sm_writer = sm_writer
        self.data_scope = 'data'
        self.model_scope = 'model'

        # initialize Horovod / TF-Plus for multi-gpu training
        if FLAGS.enbl_multi_gpu:
            mgw.init()
            from mpi4py import MPI
            self.mpi_comm = MPI.COMM_WORLD
        else:
            self.mpi_comm = None

        # obtain the function interface provided by the model helper
        self.build_dataset_train = model_helper.build_dataset_train
        self.build_dataset_eval = model_helper.build_dataset_eval
        self.forward_train = model_helper.forward_train
        self.forward_eval = model_helper.forward_eval
        self.calc_loss = model_helper.calc_loss
        self.model_name = model_helper.model_name
        self.dataset_name = model_helper.dataset_name

        # checkpoint path determined by model's & dataset's names
        self.ckpt_file = 'models_%s_at_%s.tar.gz' % (self.model_name,
                                                     self.dataset_name)
Example #3
0
def is_primary_worker(scope='global'):
    """Check whether is the primary worker of all nodes (global) or the current node (local).

  Args:
  * scope: check scope ('global' OR 'local')

  Returns:
  * flag: whether is the primary worker
  """

    if scope == 'global':
        return True if not FLAGS.enbl_multi_gpu else mgw.rank() == 0
    elif scope == 'local':
        return True if not FLAGS.enbl_multi_gpu else mgw.local_rank() == 0
    else:
        raise ValueError('unrecognized worker scope: ' + scope)
    def setup_lrn_rate(self, global_step):
        """Setup the learning rate (and number of training iterations)."""

        batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else
                                         mgw.size())
        if FLAGS.mobilenet_version == 1:
            nb_epochs = 100
            nb_epochs = 412
            idxs_epoch = [12000, 20000]
            step_rate = [200, 200, 4000]
            epoch_step = setup_lrn_rate_piecewise_constant(
                global_step, batch_size, idxs_epoch, step_rate)
            decay_rates = [0.985, 0.980, 0.505]
            decay_rate = setup_lrn_rate_piecewise_constant(
                global_step, batch_size, idxs_epoch, decay_rates)
            lrn_rate = setup_lrn_rate_exponential_decay(
                global_step, batch_size, epoch_step, decay_rate)
            nb_iters = int(30000)
        elif FLAGS.mobilenet_version == 2:
            nb_epochs = 412
            epoch_step = 500
            decay_rate = 0.9  # which is better, 0.98 OR (0.98 ** epoch_step)?
            lrn_rate = setup_lrn_rate_exponential_decay(
                global_step, batch_size, epoch_step, decay_rate)
            nb_iters = int(15000)
        else:
            raise ValueError('invalid MobileNet version: {}'.format(
                FLAGS.mobilenet_version))

        return lrn_rate, nb_iters
Example #5
0
    def __monitor_progress(self, summary, log_rslt, time_prev, idx_iter):
        # early break for non-primary workers
        if not self.is_primary_worker():
            return None

        # write summaries for TensorBoard visualization
        self.sm_writer.add_summary(summary, idx_iter)

        # display monitored statistics
        speed = FLAGS.batch_size * FLAGS.summ_step / (timer() - time_prev)
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        if FLAGS.enbl_dst:
            lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], \
            log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4], log_rslt[5]
            tf.logging.info('iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec' \
                % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5, speed))
        else:
            lrn_rate, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], \
            log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4]
            tf.logging.info('iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec' \
                % (idx_iter + 1, lrn_rate, model_loss, loss, acc_top1, acc_top5, speed))

        return timer()
Example #6
0
    def __monitor_progress(self, summary, log_rslt, idx_iter, time_step):
        """Monitor the training progress.

        Args:
        * summary: summary protocol buffer
        * log_rslt: logging operations' results
        * idx_iter: index of the training iteration
        * time_step: time step between two summary operations
        """

        # write summaries for TensorBoard visualization
        self.sm_writer.add_summary(summary, idx_iter)

        # compute the training speed
        speed = FLAGS.batch_size * FLAGS.summ_step / time_step
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        # display monitored statistics
        log_str = ' | '.join([
            '%s = %.4e' % (name, value)
            for name, value in zip(self.log_op_names, log_rslt)
        ])
        tf.logging.info('iter #%d: %s | speed = %.2f pics / sec' %
                        (idx_iter + 1, log_str, speed))
    def __build_eval(self, model_helper):
        """Build the evaluation graph for the 'optimal' protocol.

    Args:
    * model_helper: model helper with definitions of model & dataset
    """

        with tf.Graph().as_default():
            # create a TF session for the current graph
            config = tf.ConfigProto()
            config.gpu_options.visible_device_list = str(
                mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)  # pylint: disable=no-member
            self.sess_eval = tf.Session(config=config)

            # data input pipeline
            with tf.variable_scope(self.data_scope):
                __, iterator = model_helper.build_dataset_train(
                    enbl_trn_val_split=True)
                images, labels = iterator.get_next()

            # model definition - weight sparsified network
            with tf.variable_scope(self.model_scope_prnd):
                logits = model_helper.forward_eval(images)
                vars_prnd = get_vars_by_scope(self.model_scope_prnd)
                self.loss_eval, self.metrics_eval = \
                    model_helper.calc_loss(labels, logits, vars_prnd['trainable'])
                self.saver_prnd_eval = tf.train.Saver(vars_prnd['all'])
Example #8
0
  def __build_layer_ops(self):
    """Build layer-wise fine-tuning operations.

    Returns:
    * layer_train_ops: list of training operations for each layer
    * layer_init_opt_ops: list of initialization operations for each layer's optimizer
    * layer_grad_norms: list of gradient norm vectors for each layer
    """

    layer_train_ops = []
    layer_init_opt_ops = []
    grad_norms = []
    for idx, var_prnd in enumerate(self.vars_prnd['maskable']):
      optimizer_base = tf.train.AdamOptimizer(FLAGS.dcp_lrn_rate_adam)
      if not FLAGS.enbl_multi_gpu:
        optimizer = optimizer_base
      else:
        optimizer = mgw.DistributedOptimizer(optimizer_base)
      loss_all = self.reg_losses[idx] + self.dis_losses[self.idxs_layer_to_block[idx]]
      grads_origin = optimizer.compute_gradients(loss_all, [var_prnd])
      grads_pruned = self.__calc_grads_pruned(grads_origin)
      with tf.control_dependencies(self.update_ops_all):
        layer_train_ops += [optimizer.apply_gradients(grads_pruned)]
      layer_init_opt_ops += [tf.variables_initializer(optimizer_base.variables())]
      grad_norms += [tf.reduce_sum(grads_origin[0][0] ** 2, axis=[0, 1, 3])]

    return layer_train_ops, layer_init_opt_ops, grad_norms
Example #9
0
    def __build_block_ops(self):
        """Build block-wise fine-tuning operations.

    Returns:
    * block_train_ops: list of training operations for each block
    * block_init_opt_ops: list of initialization operations for each block's optimizer
    """

        block_train_ops = []
        block_init_opt_ops = []
        for dis_loss in self.dis_losses:
            optimizer_base = tf.train.AdamOptimizer(FLAGS.dcp_lrn_rate_adam)
            if not FLAGS.enbl_multi_gpu:
                optimizer = optimizer_base
            else:
                optimizer = mgw.DistributedOptimizer(optimizer_base)
            loss_all = dis_loss + self.dis_losses[
                -1]  # current stage + final loss
            grads_origin = optimizer.compute_gradients(loss_all,
                                                       self.trainable_vars_all)
            grads_pruned = self.__calc_grads_pruned(grads_origin)
            with tf.control_dependencies(self.update_ops_all):
                block_train_ops += [optimizer.apply_gradients(grads_pruned)]
            block_init_opt_ops += [
                tf.variables_initializer(optimizer_base.variables())
            ]

        return block_train_ops, block_init_opt_ops
Example #10
0
def setup_bnds_decay_rates(model_name, dataset_name):
    """ NOTE: The bnd_decay_rates here is mgw_size invariant """

    batch_size = FLAGS.batch_size if not FLAGS.enbl_multi_gpu else FLAGS.batch_size * mgw.size(
    )
    nb_batches_per_epoch = int(FLAGS.nb_smpls_train / batch_size)
    mgw_size = int(mgw.size()) if FLAGS.enbl_multi_gpu else 1
    init_lr = FLAGS.lrn_rate_init * FLAGS.batch_size * mgw_size / FLAGS.batch_size_norm if FLAGS.enbl_multi_gpu else FLAGS.lrn_rate_init
    if dataset_name == 'cifar_10':
        if model_name.startswith('resnet'):
            bnds = [nb_batches_per_epoch * 15, nb_batches_per_epoch * 40]
            decay_rates = [1e-3, 1e-4, 1e-5]
        elif model_name.startswith('lenet'):
            bnds = [nb_batches_per_epoch * 5, nb_batches_per_epoch * 30]
            decay_rates = [1e-4, 1e-5, 1e-6]
    elif dataset_name == 'ilsvrc_12':
        if model_name.startswith('resnet'):
            bnds = [nb_batches_per_epoch * 5, nb_batches_per_epoch * 20]
            decay_rates = [1e-4, 1e-5, 1e-6]
        elif model_name.startswith('mobilenet'):
            bnds = [nb_batches_per_epoch * 5, nb_batches_per_epoch * 30]
            decay_rates = [1e-4, 1e-5, 1e-6]
    finetune_steps = nb_batches_per_epoch * FLAGS.uql_quant_epochs
    init_lr = init_lr if FLAGS.enbl_warm_start else FLAGS.lrn_rate_init
    return init_lr, bnds, decay_rates, finetune_steps
    def __build_eval(self):
        """Build the evaluation graph."""

        with tf.Graph().as_default() as graph:
            # create a TF session for the current graph
            config = tf.ConfigProto()
            config.gpu_options.visible_device_list = str(
                mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)  # pylint: disable=no-member
            config.gpu_options.allow_growth = True  # pylint: disable=no-member
            self.sess_eval = tf.Session(config=config)

            # data input pipeline
            with tf.variable_scope(self.data_scope):
                iterator = self.build_dataset_eval()
                images, labels = iterator.get_next()

            # model definition - uniform quantized model - part 1
            with tf.variable_scope(self.model_scope_quan):
                logits = self.forward_eval(images)
                if not isinstance(logits, dict):
                    outputs = tf.nn.softmax(logits)
                else:
                    outputs = tf.nn.softmax(logits['cls_pred'])
                tf.contrib.quantize.experimental_create_eval_graph(
                    weight_bits=FLAGS.uqtf_weight_bits,
                    activation_bits=FLAGS.uqtf_activation_bits,
                    scope=self.model_scope_quan)
                for node_name in self.unquant_node_names:
                    insert_quant_op(graph, node_name, is_train=False)
                vars_quan = get_vars_by_scope(self.model_scope_quan)

            # model definition - distilled model
            if FLAGS.enbl_dst:
                logits_dst = self.helper_dst.calc_logits(
                    self.sess_eval, images)

            # model definition - uniform quantized model -part 2
            with tf.variable_scope(self.model_scope_quan):
                # loss & extra evaluation metrics
                loss, metrics = self.calc_loss(labels, logits,
                                               vars_quan['trainable'])
                if FLAGS.enbl_dst:
                    loss += self.helper_dst.calc_loss(logits, logits_dst)

                # TF operations for evaluation
                vars_quan = get_vars_by_scope(self.model_scope_quan)
                self.eval_op = [loss] + list(metrics.values())
                self.eval_op_names = ['loss'] + list(metrics.keys())
                self.outputs_eval = logits
                self.saver_quan_eval = tf.train.Saver(vars_quan['all'])

            # add input & output tensors to certain collections
            if not isinstance(images, dict):
                tf.add_to_collection('images_final', images)
            else:
                tf.add_to_collection('images_final', images['image'])
            if not isinstance(logits, dict):
                tf.add_to_collection('logits_final', logits)
            else:
                tf.add_to_collection('logits_final', logits['cls_pred'])
Example #12
0
    def __monitor_progress(self, idx_iter, log_rslt, time_prev):
        if not self.__is_primary_worker():
            return None

        # display monitored statistics
        speed = FLAGS.batch_size * self.tune_global_disp_steps / (timer() - time_prev)
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        if self.dataset_name == 'coco2017-pose':
            if FLAGS.enbl_dst:
                lrn_rate, dst_loss, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:8]
                tf.logging.info(
                    'iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll, speed))
            else:
                lrn_rate, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:7]
                tf.logging.info(
                    'iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, model_loss, loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll, speed))

        if FLAGS.enbl_dst:
            lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4], log_rslt[5]
            tf.logging.info(
                'iter #%d: lr = %e | dst_loss = %e | model_loss = %e | loss = %e | acc_top1 = %e | acc_top5 = %e | speed = %.2f pics / sec '
                % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5, speed))
        else:
            lrn_rate, model_loss, loss, acc_top1, acc_top5 = log_rslt[0], log_rslt[1], log_rslt[2], log_rslt[3], log_rslt[4]
            tf.logging.info(
                'iter #%d: lr = %e | model_loss = %e | loss = %e | acc_top1 = %e | acc_top5 = %e| speed = %.2f pics / sec'
                % (idx_iter + 1, lrn_rate, model_loss, loss, acc_top1, acc_top5, speed))
        return timer()
    def setup_lrn_rate(self, global_step):
        """Setup the learning rate (and number of training iterations)."""

        batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else
                                         mgw.size())
        if FLAGS.mobilenet_version == 1:
            nb_epochs = 100
            idxs_epoch = [30, 60, 80, 90]
            decay_rates = [1.0, 0.1, 0.01, 0.001, 0.0001]
            lrn_rate = setup_lrn_rate_piecewise_constant(
                global_step, batch_size, idxs_epoch, decay_rates)
            nb_iters = int(FLAGS.nb_smpls_train * nb_epochs *
                           FLAGS.nb_epochs_rat / batch_size)
        elif FLAGS.mobilenet_version == 2:
            nb_epochs = 412
            epoch_step = 2.5
            decay_rate = 0.98**epoch_step  # which is better, 0.98 OR (0.98 ** epoch_step)?
            lrn_rate = setup_lrn_rate_exponential_decay(
                global_step, batch_size, epoch_step, decay_rate)
            nb_iters = int(FLAGS.nb_smpls_train * nb_epochs *
                           FLAGS.nb_epochs_rat / batch_size)
        else:
            raise ValueError('invalid MobileNet version: {}'.format(
                FLAGS.mobilenet_version))

        return lrn_rate, nb_iters
Example #14
0
    def __build_eval(self):
        with tf.Graph().as_default():
            # TensorFlow session
            # create a TF session for the current graph
            config = tf.ConfigProto()
            config.gpu_options.visible_device_list = str(
                mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)
            self.sess_eval = tf.Session(config=config)

            # data input pipeline
            with tf.variable_scope(self.data_scope):
                iterator = self.build_dataset_eval()
                images, labels = iterator.get_next()
                # images.set_shape((FLAGS.batch_size, images.shape[1], images.shape[2], images.shape[3]))
                images.set_shape((FLAGS.batch_size_eval, images.shape[1],
                                  images.shape[2], images.shape[3]))
                self.images_eval = images

            # model definition - distilled model
            if FLAGS.enbl_dst:
                logits_dst = self.helper_dst.calc_logits(
                    self.sess_eval, images)

            # model definition
            with tf.variable_scope(self.model_scope, reuse=tf.AUTO_REUSE):
                # forward pass
                logits = self.forward_eval(images)

                self.__quantize_eval_graph()

                # loss & accuracy
                loss, metrics = self.calc_loss(labels, logits,
                                               self.trainable_vars)
                if self.dataset_name == 'cifar_10':
                    acc_top1, acc_top5 = metrics['accuracy'], tf.constant(0.)
                elif self.dataset_name == 'ilsvrc_12':
                    acc_top1, acc_top5 = metrics['acc_top1'], metrics[
                        'acc_top5']
                elif self.dataset_name == 'coco2017-pose':
                    total_loss = metrics['total_loss_all_layers']
                    total_loss_ll_paf = metrics['total_loss_last_layer_paf']
                    total_loss_ll_heat = metrics['total_loss_last_layer_heat']
                    total_loss_ll = metrics['total_loss_last_layer']
                else:
                    raise ValueError("Unrecognized dataset name")

                if FLAGS.enbl_dst:
                    dst_loss = self.helper_dst.calc_loss(logits, logits_dst)
                    loss += dst_loss

                # TF operations & model saver
                if self.dataset_name == 'coco2017-pose':
                    self.ops['eval'] = [
                        loss, total_loss, total_loss_ll_paf,
                        total_loss_ll_heat, total_loss_ll
                    ]
                else:
                    self.ops['eval'] = [loss, acc_top1, acc_top5]
                self.saver_eval = tf.train.Saver(self.vars)
Example #15
0
    def build(self, enbl_trn_val_split=False):
        '''Build iterator(s) for tf.data.Dataset() object.

    Args:
    * enbl_trn_val_split: whether to split into training & validation subsets

    Returns:
    * iterator_trn: iterator for the training subset
    * iterator_val: iterator for the validation subset
      OR
    * iterator: iterator for the chosen subset (training OR testing)

    Example:
      # build iterator(s)
      dataset = xxxxDataset(is_train=True)  # TF operations are not created
      iterator = dataset.build()            # TF operations are created
          OR
      iterator_trn, iterator_val = dataset.build(enbl_trn_val_split=True)  # for dataset-train only

      # use the iterator to obtain a mini-batch of images & labels
      images, labels = iterator.get_next()
    '''

        # obtain list of data files' names
        filenames = tf.data.Dataset.list_files(self.file_pattern, shuffle=True)
        if self.enbl_shard:
            filenames = filenames.shard(mgw.size(), mgw.rank())

        # create a tf.data.Dataset from list of files
        dataset = filenames.apply(
            tf.contrib.data.parallel_interleave(
                self.dataset_fn, cycle_length=FLAGS.cycle_length))
        dataset = dataset.map(self.parse_fn,
                              num_parallel_calls=FLAGS.nb_threads)

        # create iterators for training & validation subsets separately
        if self.is_train and enbl_trn_val_split:
            iterator_val = self.__make_iterator(
                dataset.take(FLAGS.nb_smpls_val))
            iterator_trn = self.__make_iterator(
                dataset.skip(FLAGS.nb_smpls_val))
            return iterator_trn, iterator_val

        return self.__make_iterator(dataset)
Example #16
0
  def setup_lrn_rate(self, global_step):
    """Setup the learning rate (and number of training iterations)."""

    nb_epochs = 100
    idxs_epoch = [30, 60, 80, 90]
    decay_rates = [1.0, 0.1, 0.01, 0.001, 0.0001]
    batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else mgw.size())
    lrn_rate = setup_lrn_rate_piecewise_constant(global_step, batch_size, idxs_epoch, decay_rates)
    nb_iters = int(FLAGS.nb_smpls_train * nb_epochs * FLAGS.nb_epochs_rat / batch_size)

    return lrn_rate, nb_iters
Example #17
0
def main(unused_argv):
    """Main entry.

    Args:
    * unused_argv: unused arguments (after FLAGS is parsed)
    """

    tf.logging.set_verbosity(tf.logging.INFO)

    if FLAGS.enbl_multi_gpu:
        mgw.init()

    trainer = Trainer(data_path=FLAGS.data_path, netcfg=FLAGS.net_cfg)

    trainer.build_graph(is_train=True)
    trainer.build_graph(is_train=False)

    if FLAGS.eval_only:
        trainer.eval()
    else:
        trainer.train()
    def setup_lrn_rate(self, global_step):
        """Setup the learning rate (and number of training iterations)."""

        nb_epochs = 100
        idxs_epoch = [0.4, 0.8]
        decay_rates = [0.001, 0.0005, 0.0001]
        batch_size = FLAGS.batch_size * (1 if not FLAGS.enbl_multi_gpu else
                                         mgw.size())
        lrn_rate = setup_lrn_rate_piecewise_constant(global_step, batch_size,
                                                     idxs_epoch, decay_rates)
        nb_iters = int(12000)
        #nb_iters = int(200)
        return lrn_rate, nb_iters
Example #19
0
    def __build_eval(self):
        """Build the evaluation graph."""

        with tf.Graph().as_default():
            # create a TF session for the current graph
            config = tf.ConfigProto()
            config.gpu_options.visible_device_list = str(
                mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)  # pylint: disable=no-member
            self.sess_eval = tf.Session(config=config)

            # data input pipeline
            with tf.variable_scope(self.data_scope):
                iterator = self.build_dataset_eval()
                images, labels = iterator.get_next()

            # model definition - distilled model
            if FLAGS.enbl_dst:
                logits_dst = self.helper_dst.calc_logits(
                    self.sess_eval, images)

            # model definition - channel-pruned model
            with tf.variable_scope(self.model_scope_prnd):
                # loss & extra evaluation metrics
                logits = self.forward_eval(images)
                vars_prnd = get_vars_by_scope(self.model_scope_prnd)
                loss, metrics = self.calc_loss(labels, logits,
                                               vars_prnd['trainable'])
                if FLAGS.enbl_dst:
                    loss += self.helper_dst.calc_loss(logits, logits_dst)

                # overall pruning ratios of trainable & maskable variables
                pr_trainable = calc_prune_ratio(vars_prnd['trainable'])
                pr_maskable = calc_prune_ratio(vars_prnd['maskable'])

                # TF operations for evaluation
                self.factory_op = [tf.cast(logits, tf.uint8)]
                self.time_op = [logits]
                self.out_op = [
                    tf.cast(images, tf.uint8),
                    tf.cast(logits, tf.uint8),
                    tf.cast(labels, tf.uint8)
                ]
                self.eval_op = [loss, pr_trainable, pr_maskable] + list(
                    metrics.values())
                self.eval_op_names = ['loss', 'pr_trn', 'pr_msk'] + list(
                    metrics.keys())
                self.saver_prnd_eval = tf.train.Saver(vars_prnd['all'])

            # add input & output tensors to certain collections
            tf.add_to_collection('images_final', images)
            tf.add_to_collection('logits_final', logits)
Example #20
0
File: utils.py Project: rhkdqo93/AC
def create_session():
  """Create a TensorFlow session.

  Return:
  * sess: TensorFlow session
  """

  # create a TensorFlow session
  config = tf.ConfigProto()
  config.gpu_options.visible_device_list = str(mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)  # pylint: disable=no-member
  config.gpu_options.allow_growth = True  # pylint: disable=no-member
  sess = tf.Session(config=config)

  return sess
Example #21
0
def setup_lrn_rate(global_step, model_name, dataset_name):
    """Setup the learning rate for the given dataset.

  Args:
  * global_step: training iteration counter
  * model_name: model's name; must be one of ['lenet', 'resnet_*', 'mobilenet_v1', 'mobilenet_v2']
  * dataset_name: dataset's name; must be one of ['cifar_10', 'ilsvrc_12']

  Returns:
  * lrn_rate: learning rate
  * nb_batches: number of training mini-batches
  """

    # obtain the overall batch size across all GPUs
    if not FLAGS.enbl_multi_gpu:
        batch_size = FLAGS.batch_size
    else:
        batch_size = FLAGS.batch_size * mgw.size()

    # choose a learning rate protocol according to the model & dataset combination
    global_step = tf.cast(global_step, tf.int32)
    if dataset_name == 'cifar_10':
        if model_name == 'lenet':
            lrn_rate, nb_batches = setup_lrn_rate_lenet_cifar10(
                global_step, batch_size)
        elif model_name.startswith('resnet'):
            lrn_rate, nb_batches = setup_lrn_rate_resnet_cifar10(
                global_step, batch_size)
        else:
            raise NotImplementedError('model: {} / dataset: {}'.format(
                model_name, dataset_name))
    elif dataset_name == 'ilsvrc_12':
        if model_name.startswith('resnet'):
            lrn_rate, nb_batches = setup_lrn_rate_resnet_ilsvrc12(
                global_step, batch_size)
        elif model_name.startswith('mobilenet_v1'):
            lrn_rate, nb_batches = setup_lrn_rate_mobilenet_v1_ilsvrc12(
                global_step, batch_size)
        elif model_name.startswith('mobilenet_v2'):
            lrn_rate, nb_batches = setup_lrn_rate_mobilenet_v2_ilsvrc12(
                global_step, batch_size)
        else:
            raise NotImplementedError('model: {} / dataset: {}'.format(
                model_name, dataset_name))
    else:
        raise NotImplementedError('dataset: ' + dataset_name)

    return lrn_rate, nb_batches
Example #22
0
    def __monitor_progress(self, summary, log_rslt, time_prev, idx_iter):
        # early break for non-primary workers
        if not self.is_primary_worker():
            return None

        # write summaries for TensorBoard visualization
        self.sm_writer.add_summary(summary, idx_iter)

        # display monitored statistics
        speed = FLAGS.batch_size * FLAGS.summ_step / (timer() - time_prev)
        if FLAGS.enbl_multi_gpu:
            speed *= mgw.size()

        # NOTE: for cifar-10, acc_top5 is 0.
        if self.dataset_name == 'coco2017-pose':
            if FLAGS.enbl_dst:
                lrn_rate, dst_loss, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:
                                                                                                                                  8]
                tf.logging.info(
                    'iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss,
                       total_loss_ll_paf, total_loss_ll_heat, total_loss_ll,
                       speed))
            else:
                lrn_rate, model_loss, loss, total_loss, total_loss_ll_paf, total_loss_ll_heat, total_loss_ll = log_rslt[:
                                                                                                                        7]
                tf.logging.info(
                    'iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | ll_paf = %.4f | ll_heat = %.4f | ll = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, model_loss, loss,
                       total_loss_ll_paf, total_loss_ll_heat, total_loss_ll,
                       speed))
        else:
            if FLAGS.enbl_dst:
                lrn_rate, dst_loss, model_loss, loss, acc_top1, acc_top5 = log_rslt[:
                                                                                    6]
                tf.logging.info(
                    'iter #%d: lr = %e | dst_loss = %.4f | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, dst_loss, model_loss, loss,
                       acc_top1, acc_top5, speed))
            else:
                lrn_rate, model_loss, loss, acc_top1, acc_top5 = log_rslt[:5]
                tf.logging.info(
                    'iter #%d: lr = %e | model_loss = %.4f | loss = %.4f | acc_top1 = %.4f | acc_top5 = %.4f | speed = %.2f pics / sec'
                    % (idx_iter + 1, lrn_rate, model_loss, loss, acc_top1,
                       acc_top5, speed))

        return timer()
Example #23
0
  def __monitor_progress(self, summary, log_rslt):
    # early break for non-primary workers
    if not self.__is_primary_worker():
      return
    # write summaries for TensorBoard visualization
    self.sm_writer.add_summary(summary, self.idx_iter)

    # display monitored statistics
    lrn_rate, loss, accuracy = log_rslt[0], log_rslt[1], log_rslt[2]
    speed = FLAGS.batch_size * FLAGS.summ_step / (timer() - self.time_prev)
    if FLAGS.enbl_multi_gpu:
      speed *= mgw.size()
    tf.logging.info('iter #%d: lr = %e | loss = %e | speed = %.2f pics / sec'
                    % (self.idx_iter + 1, lrn_rate, loss, speed))
    for i in range(len(self.accuracy_keys)):
      tf.logging.info('{} = {}'.format(self.accuracy_keys[i], accuracy[i]))
    self.time_prev = timer()
Example #24
0
  def __build_network_ops(self, loss, lrn_rate):
    """Build network training operations.
    Returns:
    * train_op: training operation of the whole network
    * init_opt_op: initialization operation of the whole network's optimizer
    """

    optimizer_base = tf.train.MomentumOptimizer(lrn_rate, FLAGS.momentum)
    if not FLAGS.enbl_multi_gpu:
      optimizer = optimizer_base
    else:
      optimizer = mgw.DistributedOptimizer(optimizer_base)
    grads_origin = optimizer.compute_gradients(loss, self.trainable_vars_all)
    grads_pruned = self.__calc_grads_pruned(grads_origin)
    with tf.control_dependencies(self.update_ops_all):
      train_op = optimizer.apply_gradients(grads_pruned, global_step=self.global_step)
    init_opt_op = tf.variables_initializer(optimizer_base.variables())

    return train_op, init_opt_op
Example #25
0
    def __build_eval(self):
        """Build the evaluation graph."""

        with tf.Graph().as_default():
            # create a TF session for the current graph
            config = tf.ConfigProto()
            if FLAGS.enbl_multi_gpu:
                config.gpu_options.visible_device_list = str(mgw.local_rank())  # pylint: disable=no-member
            else:
                config.gpu_options.visible_device_list = '0'  # pylint: disable=no-member
            self.sess_eval = tf.Session(config=config)

            # data input pipeline
            with tf.variable_scope(self.data_scope):
                iterator = self.build_dataset_eval()
                images, labels = iterator.get_next()

            # model definition - distilled model
            if FLAGS.enbl_dst:
                logits_dst = self.helper_dst.calc_logits(
                    self.sess_eval, images)

            # model definition - weight-sparsified model
            with tf.variable_scope(self.model_scope):
                # loss & extra evaluation metrics
                logits = self.forward_eval(images)
                loss, metrics = self.calc_loss(labels, logits,
                                               self.trainable_vars)
                if FLAGS.enbl_dst:
                    loss += self.helper_dst.calc_loss(logits, logits_dst)

                # overall pruning ratios of trainable & maskable variables
                pr_trainable = calc_prune_ratio(self.trainable_vars)
                pr_maskable = calc_prune_ratio(self.maskable_vars)

                # TF operations for evaluation
                self.eval_op = [loss, pr_trainable, pr_maskable] + list(
                    metrics.values())
                self.eval_op_names = ['loss', 'pr_trn', 'pr_msk'] + list(
                    metrics.keys())
                self.saver_eval = tf.train.Saver(self.vars)
Example #26
0
    def __build_pruned_evaluate_model(self, path=None):
        ''' build a evaluation model from pruned model '''
        # early break for non-primary workers
        if not self.__is_primary_worker():
            return

        if path is None:
            path = FLAGS.save_path

        if not tf.train.checkpoint_exists(path):
            return

        with tf.Graph().as_default():
            config = tf.ConfigProto()
            config.gpu_options.visible_device_list = str(  # pylint: disable=no-member
                mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)
            self.sess_eval = tf.Session(config=config)
            self.saver_eval = tf.train.import_meta_graph(path + '.meta')
            self.saver_eval.restore(self.sess_eval, path)
            eval_logits = tf.get_collection('logits')[0]
            tf.add_to_collection('logits_final', eval_logits)
            eval_images = tf.get_collection('eval_images')[0]
            tf.add_to_collection('images_final', eval_images)
            eval_labels = tf.get_collection('eval_labels')[0]
            mem_images = tf.get_collection('mem_images')[0]
            mem_labels = tf.get_collection('mem_labels')[0]

            self.sess_eval.close()

            graph_editor.reroute_ts(eval_images, mem_images)
            graph_editor.reroute_ts(eval_labels, mem_labels)

            self.sess_eval = tf.Session(config=config)
            self.saver_eval.restore(self.sess_eval, path)
            trainable_vars = self.trainable_vars
            loss, accuracy = self.calc_loss(eval_labels, eval_logits,
                                            trainable_vars)
            self.eval_op = [loss] + list(accuracy.values())
            self.sm_writer.add_graph(self.sess_eval.graph)
    def __build_network_ft_ops(self, loss):
        """Build operations for network fine-tuning.

    Args:
    * loss: loss function's value

    Returns:
    * init_op: initialization operation
    * train_op: training operation
    """

        optimizer_base = tf.train.AdamOptimizer(FLAGS.ws_lrn_rate_ft)
        if FLAGS.enbl_multi_gpu:
            optimizer = mgw.DistributedOptimizer(optimizer_base)
        else:
            optimizer = optimizer_base
        grads_origin = optimizer.compute_gradients(loss,
                                                   self.vars_prnd['trainable'])
        grads_pruned = self.__calc_grads_pruned(grads_origin)
        train_op = optimizer.apply_gradients(grads_pruned)
        init_op = tf.variables_initializer(optimizer_base.variables())

        return init_op, train_op
    def __build_layer_rg_ops(self):
        """Build operations for layerwise regression.

    Returns:
    * init_op: initialization operation
    * train_ops: list of training operations, one per layer
    """

        # obtain lists of core operations in both networks
        if self.model_name.startswith('mobilenet'):
            patterns = ['pointwise/Conv2D', 'Conv2d_1c_1x1/Conv2D']
        else:
            patterns = ['Conv2D', 'MatMul']
        core_ops_full = get_ops_by_scope_n_patterns(self.model_scope_full,
                                                    patterns)
        core_ops_prnd = get_ops_by_scope_n_patterns(self.model_scope_prnd,
                                                    patterns)

        # construct initialization & training operations
        init_ops, train_ops = [], []
        for idx, (core_op_full,
                  core_op_prnd) in enumerate(zip(core_ops_full,
                                                 core_ops_prnd)):
            loss = tf.nn.l2_loss(core_op_prnd.outputs[0] -
                                 core_op_full.outputs[0])
            optimizer_base = tf.train.AdamOptimizer(FLAGS.ws_lrn_rate_rg)
            if FLAGS.enbl_multi_gpu:
                optimizer = mgw.DistributedOptimizer(optimizer_base)
            else:
                optimizer = optimizer_base
            grads_origin = optimizer.compute_gradients(
                loss, [self.vars_prnd['maskable'][idx]])
            grads_pruned = self.__calc_grads_pruned(grads_origin)
            train_ops += [optimizer.apply_gradients(grads_pruned)]
            init_ops += [tf.variables_initializer(optimizer_base.variables())]

        return tf.group(init_ops), train_ops
    def __build_minimal(self, model_helper):
        """Build the minimal graph for 'uniform' & 'heurist' protocols.

    Args:
    * model_helper: model helper with definitions of model & dataset
    """

        with tf.Graph().as_default():
            # create a TF session for the current graph
            config = tf.ConfigProto()
            config.gpu_options.visible_device_list = str(
                mgw.local_rank() if FLAGS.enbl_multi_gpu else 0)  # pylint: disable=no-member
            self.sess = tf.Session(config=config)

            # data input pipeline
            with tf.variable_scope(self.data_scope):
                iterator = model_helper.build_dataset_train()
                images, __ = iterator.get_next()

            # model definition - full-precision network
            with tf.variable_scope(self.model_scope_full):
                __ = model_helper.forward_eval(
                    images)  # DO NOT USE forward_train() HERE!!!
                self.vars_full = get_vars_by_scope(self.model_scope_full)
    def __retrain_network(self):
        """Retrain the network with layerwise regression & network fine-tuning."""

        # determine how many iterations to be executed for regression & fine-tuning
        nb_workers = mgw.size() if FLAGS.enbl_multi_gpu else 1
        nb_iters_rg = int(math.ceil(FLAGS.ws_nb_iters_rg / nb_workers))
        nb_iters_ft = int(math.ceil(FLAGS.ws_nb_iters_ft / nb_workers))

        # re-train the network with layerwise regression
        time_prev = timer()
        for rg_train_op in self.rg_train_ops:
            for __ in range(nb_iters_rg):
                self.sess_train.run(rg_train_op)
        time_rg = timer() - time_prev

        # re-train the network with global fine-tuning
        time_prev = timer()
        for __ in range(nb_iters_ft):
            self.sess_train.run(self.ft_train_op)
        time_ft = timer() - time_prev

        # display the time consumption
        tf.logging.info('time consumption: %.4f (s) - RG | %.4f (s) - FT' %
                        (time_rg, time_ft))