Beispiel #1
0
  def _create_gumbel_control_variate(self, logQHard, temperature=None):
    '''Calculate gumbel control variate.
    '''
    if temperature is None:
      temperature = self.hparams.temperature

    logQ, softSamples = self._recognition_network(sampler=functools.partial(
        self._random_sample_soft, temperature=temperature))
    softELBO, _ = self._generator_network(softSamples, logQ)
    logQ = tf.add_n(logQ)

    # Generate the softELBO_v (should be the same value but different grads)
    logQ_v, softSamples_v = self._recognition_network(sampler=functools.partial(
        self._random_sample_soft_v, temperature=temperature))
    softELBO_v, _ = self._generator_network(softSamples_v, logQ_v)
    logQ_v = tf.add_n(logQ_v)

    # Compute losses
    learning_signal = tf.stop_gradient(softELBO_v)

    # Control variate
    h = (tf.stop_gradient(learning_signal) * tf.add_n(logQHard)
          - softELBO + softELBO_v)

    extra = (softELBO_v, -softELBO + softELBO_v)

    return h, extra
Beispiel #2
0
    def __call__(self, flow=None):
        """Constructs the Sequential and its inner pieces.

        Args:
            flow: Input `Tensor` object. (Default value = None)

        Returns:
            Output of this `Parallel`.

        """

        # build inner pieces.
        with tf.variable_op_scope([], self.name, 'Parallel', reuse=self.reuse):
            if not self.reuse:
                self.reuse = True

            outputs = []
            for i, piece in enumerate(self.child_pieces):
                outputs.append(piece(flow))

            if self.mode == 'concat':
                return tf.concat(self.along_dim, outputs)
            elif self.mode == 'mean':
                return tf.add_n(outputs) / len(self.child_pieces)
            elif self.mode == 'sum':
                return tf.add_n(outputs)
Beispiel #3
0
    def loss(self, traindata):
        """build models, calculate losses.

        Args:
            traindata: 4-D Tensor of shape `[batch, height, width, channels]`.

        Returns:
            dict of each models' losses.
        """
        generated = self.g(self.z, training=True)
        g_outputs = self.d(generated, training=True, name='g')
        t_outputs = self.d(traindata, training=True, name='t')
        # add each losses to collection
        tf.add_to_collection(
            'g_losses',
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=tf.ones([self.batch_size], dtype=tf.int64),
                    logits=g_outputs)))
        tf.add_to_collection(
            'd_losses',
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=tf.ones([self.batch_size], dtype=tf.int64),
                    logits=t_outputs)))
        tf.add_to_collection(
            'd_losses',
            tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=tf.zeros([self.batch_size], dtype=tf.int64),
                    logits=g_outputs)))
        return {
            self.g: tf.add_n(tf.get_collection('g_losses'), name='total_g_loss'),
            self.d: tf.add_n(tf.get_collection('d_losses'), name='total_d_loss'),
        }
  def loss(self, predicts, labels, objects_num):
    """Add Loss to all the trainable variables

    Args:
      predicts: 4-D tensor [batch_size, cell_size, cell_size, 5 * boxes_per_cell]
      ===> (num_classes, boxes_per_cell, 4 * boxes_per_cell)
      labels  : 3-D tensor of [batch_size, max_objects, 5]
      objects_num: 1-D tensor [batch_size]
    """
    class_loss = tf.constant(0, tf.float32)
    object_loss = tf.constant(0, tf.float32)
    noobject_loss = tf.constant(0, tf.float32)
    coord_loss = tf.constant(0, tf.float32)
    loss = [0, 0, 0, 0]
    for i in range(self.batch_size):
      predict = predicts[i, :, :, :]
      label = labels[i, :, :]
      object_num = objects_num[i]
      nilboy = tf.ones([7,7,2])
      tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy])
      for j in range(4):
        loss[j] = loss[j] + tuple_results[2][j]
      nilboy = tuple_results[5]

    tf.add_to_collection('losses', (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size)

    tf.summary.scalar('class_loss', loss[0]/self.batch_size)
    tf.summary.scalar('object_loss', loss[1]/self.batch_size)
    tf.summary.scalar('noobject_loss', loss[2]/self.batch_size)
    tf.summary.scalar('coord_loss', loss[3]/self.batch_size)
    tf.summary.scalar('weight_loss', tf.add_n(tf.get_collection('losses')) - (loss[0] + loss[1] + loss[2] + loss[3])/self.batch_size )

    return tf.add_n(tf.get_collection('losses'), name='total_loss'), nilboy
Beispiel #5
0
  def _full_batch_training_op(self, inputs, cluster_idx_list, cluster_centers):
    """Creates an op for training for full batch case.

    Args:
      inputs: list of input Tensors.
      cluster_idx_list: A vector (or list of vectors). Each element in the
        vector corresponds to an input row in 'inp' and specifies the cluster id
        corresponding to the input.
      cluster_centers: Tensor Ref of cluster centers.

    Returns:
      An op for doing an update of mini-batch k-means.
    """
    cluster_sums = []
    cluster_counts = []
    epsilon = tf.constant(1e-6, dtype=inputs[0].dtype)
    for inp, cluster_idx in zip(inputs, cluster_idx_list):
      with ops.colocate_with(inp):
        cluster_sums.append(tf.unsorted_segment_sum(inp,
                                                    cluster_idx,
                                                    self._num_clusters))
        cluster_counts.append(tf.unsorted_segment_sum(
            tf.reshape(tf.ones(tf.reshape(tf.shape(inp)[0], [-1])), [-1, 1]),
            cluster_idx,
            self._num_clusters))
    with ops.colocate_with(cluster_centers):
      new_clusters_centers = tf.add_n(cluster_sums) / (
          tf.cast(tf.add_n(cluster_counts), cluster_sums[0].dtype) + epsilon)
      if self._clusters_l2_normalized():
        new_clusters_centers = tf.nn.l2_normalize(new_clusters_centers, dim=1)
    return tf.assign(cluster_centers, new_clusters_centers)
Beispiel #6
0
def _tower_loss(iterator, num_of_classes, ignore_label, scope, reuse_variable):
  """Calculates the total loss on a single tower running the deeplab model.

  Args:
    iterator: An iterator of type tf.data.Iterator for images and labels.
    num_of_classes: Number of classes for the dataset.
    ignore_label: Ignore label for the dataset.
    scope: Unique prefix string identifying the deeplab tower.
    reuse_variable: If the variable should be reused.

  Returns:
     The total loss for a batch of data.
  """
  with tf.variable_scope(
      tf.get_variable_scope(), reuse=True if reuse_variable else None):
    _build_deeplab(iterator, {common.OUTPUT_TYPE: num_of_classes}, ignore_label)

  losses = tf.losses.get_losses(scope=scope)
  for loss in losses:
    tf.summary.scalar('Losses/%s' % loss.op.name, loss)

  regularization_loss = tf.losses.get_regularization_loss(scope=scope)
  tf.summary.scalar('Losses/%s' % regularization_loss.op.name,
                    regularization_loss)

  total_loss = tf.add_n([tf.add_n(losses), regularization_loss])
  return total_loss
Beispiel #7
0
  def _make_objectives(self):
    # TODO: Hacky, will cause clashes if multiple DPG instances.
    policy_params = self._policy_params()
    critic_params = [var for var in tf.all_variables()
                     if "critic/" in var.name]
    self.policy_params = policy_params
    self.critic_params = critic_params

    # Policy objective: maximize on-policy critic activations
    mean_critic_over_time = tf.add_n(self.critic_on) / self.seq_length
    mean_critic = tf.reduce_mean(mean_critic_over_time)
    self.policy_objective = -mean_critic

    # DEV
    tf.scalar_summary("critic(a_pred).mean", mean_critic)

    # Critic objective: minimize MSE of off-policy Q-value predictions
    q_errors = [tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(critic_off_t, q_targets_t))#tf.square(critic_off_t - q_targets_t))
                for critic_off_t, q_targets_t
                in zip(self.critic_off_pre, self.q_targets)]
    self.critic_objective = tf.add_n(q_errors) / self.seq_length
    tf.scalar_summary("critic_objective", self.critic_objective)

    mean_critic_off = tf.reduce_mean(tf.add_n(self.critic_off)) / self.seq_length
    tf.scalar_summary("critic(a_explore).mean", mean_critic_off)

    tf.scalar_summary("a_pred.mean", tf.reduce_mean(tf.add_n(self.a_pred)) / self.seq_length)
    tf.scalar_summary("a_pred.maxabs", tf.reduce_max(tf.abs(tf.pack(self.a_pred))))
  def top_sharded(self,
                  sharded_body_output,
                  sharded_targets,
                  data_parallelism,
                  weights_fn=common_layers.weights_nonzero):
    """Transform all shards of targets.

    Classes with cross-shard interaction will override this function.

    Args:
      sharded_body_output: A list of Tensors.
      sharded_targets: A list of Tensors.
      data_parallelism: a expert_utils.Parallelism object.
      weights_fn: function from targets to target weights.
    Returns:
      shaded_logits: A list of Tensors.
      training_loss: a Scalar.
    """
    sharded_logits = data_parallelism(self.top, sharded_body_output,
                                      sharded_targets)
    loss_num, loss_den = data_parallelism(
        common_layers.padded_cross_entropy,
        sharded_logits,
        sharded_targets,
        self._model_hparams.label_smoothing,
        weights_fn=weights_fn)
    loss = tf.add_n(loss_num) / tf.maximum(1.0, tf.add_n(loss_den))
    return sharded_logits, loss
  def build_model(self):
    self.x = tf.placeholder(tf.float32, [self.reader.vocab_size], name="input")
    self.x_idx = tf.placeholder(tf.int32, [None], name='x_idx')  # mask paddings

    self.build_encoder()
    self.build_generator()

    self.objective = self.kl +self.recons_loss
    
    # optimizer for alternative update
    optimizer1 = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
    optimizer2 = tf.train.AdamOptimizer(learning_rate=0.1)
   
    fullvars = tf.GraphKeys.TRAINABLE_VARIABLES
    print 'fullvars:',fullvars

    enc_vars = tf.get_collection(fullvars,scope='encoder')
    print enc_vars
    
    dec_vars = tf.get_collection(fullvars,scope='generator')
    print dec_vars
    self.lossL2_enc = tf.add_n([ tf.nn.l2_loss(v) for v in enc_vars if 'bias' not in v.name]) * 0.0001
    self.lossL2_dec = tf.add_n([ tf.nn.l2_loss(v) for v in dec_vars if 'bias' not in v.name])
    print 'lossL2_enc:',self.lossL2_enc
    print 'lossL2_dec:',self.lossL2_dec          
    enc_grads = tf.gradients(self.kl+self.lossL2_enc, enc_vars)
    dec_grads = tf.gradients(self.recons_loss+self.lossL2_dec, dec_vars)
    
     
    self.optim_enc = optimizer1.apply_gradients(zip(enc_grads, enc_vars))
    self.optim_dec = optimizer2.apply_gradients(zip(dec_grads, dec_vars))  
Beispiel #10
0
def multilevel_rpn_losses(
        multilevel_anchors, multilevel_label_logits, multilevel_box_logits):
    """
    Args:
        multilevel_anchors: #lvl RPNAnchors
        multilevel_label_logits: #lvl tensors of shape HxWxA
        multilevel_box_logits: #lvl tensors of shape HxWxAx4

    Returns:
        label_loss, box_loss
    """
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_anchors) == num_lvl
    assert len(multilevel_label_logits) == num_lvl
    assert len(multilevel_box_logits) == num_lvl

    losses = []
    with tf.name_scope('rpn_losses'):
        for lvl in range(num_lvl):
            anchors = multilevel_anchors[lvl]
            label_loss, box_loss = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(),
                multilevel_label_logits[lvl], multilevel_box_logits[lvl],
                name_scope='level{}'.format(lvl + 2))
            losses.extend([label_loss, box_loss])

        total_label_loss = tf.add_n(losses[::2], name='label_loss')
        total_box_loss = tf.add_n(losses[1::2], name='box_loss')
        add_moving_summary(total_label_loss, total_box_loss)
    return total_label_loss, total_box_loss
Beispiel #11
0
    def __init__(self, nr_gpu, input, model):
        super(MultiGPUGANTrainer, self).__init__()
        assert nr_gpu > 1
        raw_devices = ['/gpu:{}'.format(k) for k in range(nr_gpu)]

        # Setup input
        input = StagingInput(input)
        cbs = input.setup(model.get_inputs_desc())
        self.register_callback(cbs)

        # Build the graph with multi-gpu replication
        def get_cost(*inputs):
            model.build_graph(*inputs)
            return [model.d_loss, model.g_loss]
        self.tower_func = TowerFuncWrapper(get_cost, model.get_inputs_desc())
        devices = [LeastLoadedDeviceSetter(d, raw_devices) for d in raw_devices]
        cost_list = DataParallelBuilder.build_on_towers(
            list(range(nr_gpu)),
            lambda: self.tower_func(*input.get_input_tensors()),
            devices)
        # Simply average the cost here. It might be faster to average the gradients
        with tf.name_scope('optimize'):
            d_loss = tf.add_n([x[0] for x in cost_list]) * (1.0 / nr_gpu)
            g_loss = tf.add_n([x[1] for x in cost_list]) * (1.0 / nr_gpu)

            opt = model.get_optimizer()
            # run one d_min after one g_min
            g_min = opt.minimize(g_loss, var_list=model.g_vars,
                                 colocate_gradients_with_ops=True, name='g_op')
            with tf.control_dependencies([g_min]):
                d_min = opt.minimize(d_loss, var_list=model.d_vars,
                                     colocate_gradients_with_ops=True, name='d_op')
        # Define the training iteration
        self.train_op = d_min
	def _hourglass(self, inputs, n, numOut, name = 'hourglass'):
		""" Hourglass Module
		Args:
			inputs	: Input Tensor
			n		: Number of downsampling step
			numOut	: Number of Output Features (channels)
			name	: Name of the block
		"""
		with tf.name_scope(name):
			# Upper Branch
			up_1 = self._residual(inputs, numOut, name = 'up_1')
			# Lower Branch
			low_ = tf.contrib.layers.max_pool2d(inputs, [2,2], [2,2], padding='VALID')
			low_1= self._residual(low_, numOut, name = 'low_1')
			
			if n > 0:
				low_2 = self._hourglass(low_1, n-1, numOut, name = 'low_2')
			else:
				low_2 = self._residual(low_1, numOut, name = 'low_2')
				
			low_3 = self._residual(low_2, numOut, name = 'low_3')
			up_2 = tf.image.resize_nearest_neighbor(low_3, tf.shape(low_3)[1:3]*2, name = 'upsampling')
			if self.modif:
				# Use of RELU
				return tf.nn.relu(tf.add_n([up_2,up_1]), name='out_hg')
			else:
				return tf.add_n([up_2,up_1], name='out_hg')
    def combined_loss_G(self,batch_size_tf):
        """
        Calculates the sum of the combined adversarial, lp and GDL losses in the given proportion. Used
        for training the generative model.

        @param gen_frames: A list of tensors of the generated frames at each scale.
        @param gt_frames: A list of tensors of the ground truth frames at each scale.
        @param d_preds: A list of tensors of the classifications made by the discriminator model at each
                        scale.
        @param lam_adv: The percentage of the adversarial loss to use in the combined loss.
        @param lam_lp: The percentage of the lp loss to use in the combined loss.
        @param lam_gdl: The percentage of the GDL loss to use in the combined loss.
        @param l_num: 1 or 2 for l1 and l2 loss, respectively).
        @param alpha: The power to which each gradient term is raised in GDL loss.

        @return: The combined adversarial, lp and GDL losses.

        """

        diceterm=loss_dice(self.G, self.CT_GT, self.num_classes,batch_size_tf)
        fcnterm=lossfcn(self.G, self.CT_GT, self.num_classes, batch_size_tf, self.classweights)
        if self.adversarial:
            bceterm=tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(self.D_logits_, tf.ones_like(self.D_)))
            loss_=self.lam_dice*diceterm + self.lam_fcn*fcnterm + self.lam_adv*bceterm
            tf.add_to_collection('losses', loss_)
            loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
            return loss, diceterm, fcnterm, bceterm

        else:
            loss_=self.lam_dice*diceterm + self.lam_fcn*fcnterm
            tf.add_to_collection('losses', loss_)
            loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
            return loss, self.lam_dice*diceterm, self.lam_fcn*fcnterm
Beispiel #14
0
def solve(global_step):
    """add solver to losses"""
    # learning reate
    lr = _configure_learning_rate(82783, global_step)
    optimizer = _configure_optimizer(lr)
    tf.summary.scalar('learning_rate', lr)

    # compute and apply gradient
    losses = tf.get_collection(tf.GraphKeys.LOSSES)
    regular_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    regular_loss = tf.add_n(regular_losses)
    out_loss = tf.add_n(losses)
    total_loss = tf.add_n(losses + regular_losses)

    tf.summary.scalar('total_loss', total_loss)
    tf.summary.scalar('out_loss', out_loss)
    tf.summary.scalar('regular_loss', regular_loss)

    update_ops = []
    variables_to_train = _get_variables_to_train()
    # update_op = optimizer.minimize(total_loss)
    gradients = optimizer.compute_gradients(total_loss, var_list=variables_to_train)
    grad_updates = optimizer.apply_gradients(gradients, 
            global_step=global_step)
    update_ops.append(grad_updates)
    
    # update moving mean and variance
    if FLAGS.update_bn:
        update_bns = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        update_bn = tf.group(*update_bns)
        update_ops.append(update_bn)

    return tf.group(*update_ops)
Beispiel #15
0
  def _build(self, dataset, feature_transformer):
    if self.samples_per_class is not None:
      if dataset not in self.dataset_map:
        # datasets are outside of frames from while loops
        with tf.control_dependencies(None):
          self.dataset_map[dataset] = utils.sample_n_per_class(
              dataset, self.samples_per_class)

      dataset = self.dataset_map[dataset]

    stats = collections.defaultdict(list)
    losses = []
    # TODO(lmetz) move this to ingraph control flow?
    for _ in xrange(self.averages):
      loss, stat = self._build_once(dataset, feature_transformer)
      losses.append(loss)
      for k, v in stat.items():
        stats[k].append(v)
    stats = {k: tf.add_n(v) / float(len(v)) for k, v in stats.items()}

    summary_updates = []
    for k, v in stats.items():
      tf.summary.scalar(k, v)

    with tf.control_dependencies(summary_updates):
      return tf.add_n(losses) / float(len(losses))
 def _read(self, keys, redundant_states):
     read = _comp_mul(keys, redundant_states)
     if self._num_copies > 1:
         xs_real = tf.split(1, self._num_copies, _comp_real(read))
         xs_imag = tf.split(1, self._num_copies, _comp_imag(read))
         read = (tf.add_n(xs_real)/self._num_copies, tf.add_n(xs_imag)/self._num_copies)
     return read
Beispiel #17
0
  def after_apply(self):
    self._moving_averager = tf.train.ExponentialMovingAverage(decay=self._beta, zero_debias=self._zero_debias)
    assert self._grads != None and len(self._grads) > 0
    after_apply_ops = []

    # get per var g**2 and norm**2
    self._grad_squared = []
    self._grad_norm_squared = []
    for v, g in zip(self._tvars, self._grads):
      with ops.colocate_with(v):
        self._grad_squared.append(tf.square(g) )
    self._grad_norm_squared = [tf.reduce_sum(grad_squared) for grad_squared in self._grad_squared]

    # the following running average on squared norm of gradient is shared by grad_var and dist_to_opt
    avg_op = self._moving_averager.apply(self._grad_norm_squared)
    with tf.control_dependencies([avg_op] ):
      self._grad_norm_squared_avg = [self._moving_averager.average(val) for val in self._grad_norm_squared]
      self._grad_norm_squared = tf.add_n(self._grad_norm_squared)
      self._grad_norm_squared_avg = tf.add_n(self._grad_norm_squared_avg)
    after_apply_ops.append(avg_op)

    with tf.control_dependencies([avg_op] ):
      curv_range_ops = self.curvature_range()
      after_apply_ops += curv_range_ops
      grad_var_ops = self.grad_variance()
      after_apply_ops += grad_var_ops
      dist_to_opt_ops = self.dist_to_opt() 
      after_apply_ops += dist_to_opt_ops

    return tf.group(*after_apply_ops)
  def __init__(self, gan=None, config=None, trainer=None, name="SelfSupervisedTrainHook"):
    super().__init__(config=config, gan=gan, trainer=trainer, name=name)
    g_loss = []
    d_loss = []
    if hasattr(self.gan.inputs, 'frames'):
        x = gan.x0#gan.inputs.x
        g = gan.g0#gan.generator.sample
    else:
        x = gan.inputs.x
        g = gan.generator.sample
    reuse = False
    for i in range(4):
        if gan.width() != gan.height() and i % 2 == 0:
            continue
        _x = tf.image.rot90(x, i+1)
        _g = tf.image.rot90(g, i+1)
        stacked = tf.concat([_x, _g], axis=0)
        shared = gan.create_discriminator(stacked, reuse=True).named_layers['shared']
        r = gan.create_component(config["r"], input=shared, reuse=reuse)
        reuse=True
        gan.discriminator.add_variables(r)
        gan.generator.add_variables(r)
        labels = tf.one_hot(i, 4)
        _dl = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=r.sample[0])
        _gl = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=r.sample[1])
        d_loss.append(_dl)
        g_loss.append(_gl)

    self.g_loss = (self.config.alpha or 1.0) * tf.add_n(g_loss)
    self.d_loss = (self.config.beta or 1.0) * tf.add_n(d_loss)

    self.gan.add_metric('ssgl', self.g_loss)
    self.gan.add_metric('ssdl', self.d_loss)
Beispiel #19
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            M = self._build_keras_model()
            logits = M(image)
        prob = tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        wrong = symbolic_functions.prediction_incorrect(logits, label, name='incorrect')
        train_error = tf.reduce_mean(wrong, name='train_error')
        summary.add_moving_summary(train_error)

        wd_cost = tf.add_n(M.losses, name='regularize_loss')    # this is how Keras manage regularizers
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # this is the keras naming
        summary.add_param_summary(('conv2d.*/kernel', ['histogram', 'rms']))
Beispiel #20
0
def sequence_loss_by_example(inputs, targets, weights, loss_function,
                             average_across_timesteps=True, name=None):
  """Sampled softmax loss for a sequence of inputs (per example).

  Args:
    inputs: List of 2D Tensors of shape [batch_size x hid_dim].
    targets: List of 1D batch-sized int32 Tensors of the same length as logits.
    weights: List of 1D batch-sized float-Tensors of the same length as logits.
    loss_function: Sampled softmax function (inputs, labels) -> loss
    average_across_timesteps: If set, divide the returned cost by the total
      label weight.
    name: Optional name for this operation, default: 'sequence_loss_by_example'.

  Returns:
    1D batch-sized float Tensor: The log-perplexity for each sequence.

  Raises:
    ValueError: If len(inputs) is different from len(targets) or len(weights).
  """
  if len(targets) != len(inputs) or len(weights) != len(inputs):
    raise ValueError('Lengths of logits, weights, and targets must be the same '
                     '%d, %d, %d.' % (len(inputs), len(weights), len(targets)))
  with tf.op_scope(inputs + targets + weights, name,
                   'sequence_loss_by_example'):
    log_perp_list = []
    for inp, target, weight in zip(inputs, targets, weights):
      crossent = loss_function(inp, target)
      log_perp_list.append(crossent * weight)
    log_perps = tf.add_n(log_perp_list)
    if average_across_timesteps:
      total_size = tf.add_n(weights)
      total_size += 1e-12  # Just to avoid division by 0 for all-0 weights.
      log_perps /= total_size
  return log_perps
Beispiel #21
0
def _shake_shake_block(x, output_filters, stride, is_training):
  """Builds a full shake-shake sub layer."""
  batch_size = tf.shape(x)[0]

  # Generate random numbers for scaling the branches
  rand_forward = [
      tf.random_uniform(
          [batch_size, 1, 1, 1], minval=0, maxval=1, dtype=tf.float32)
      for _ in range(2)
  ]
  rand_backward = [
      tf.random_uniform(
          [batch_size, 1, 1, 1], minval=0, maxval=1, dtype=tf.float32)
      for _ in range(2)
  ]
  # Normalize so that all sum to 1
  total_forward = tf.add_n(rand_forward)
  total_backward = tf.add_n(rand_backward)
  rand_forward = [samp / total_forward for samp in rand_forward]
  rand_backward = [samp / total_backward for samp in rand_backward]
  zipped_rand = zip(rand_forward, rand_backward)

  branches = []
  for branch, (r_forward, r_backward) in enumerate(zipped_rand):
    with tf.variable_scope('branch_{}'.format(branch)):
      b = _shake_shake_branch(x, output_filters, stride, r_forward, r_backward,
                              is_training)
      branches.append(b)
  res = _shake_shake_skip_connection(x, output_filters, stride)
  return res + tf.add_n(branches)
Beispiel #22
0
 def loss_sharded(self, sharded_top_out, sharded_targets, data_parallelism):
   """Compute loss for all shards."""
   sharded_loss_num, sharded_loss_den = data_parallelism(
       self.loss, sharded_top_out, sharded_targets)
   loss = tf.add_n(sharded_loss_num) / tf.maximum(1.0,
                                                  tf.add_n(sharded_loss_den))
   return loss
Beispiel #23
0
def loss(logits, labels, lambs):
    # put a sigfunction on logits and then transpose
    logits = tf.transpose(framwork.sig_func(logits))
    # according to the labels, erase rows which is not in labels
    labels_unique = tf.constant(range(NUM_CLASSES), dtype=tf.int32)
    labels_num = NUM_CLASSES
    # logits = tf.gather(logits, indices=labels_unique)
    # lambs = tf.gather(lambs, indices=labels_unique)
    # set the value of each row to True when it occurs in labels
    template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, BATCH_SIZE])
    labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1])
    indict_logic = tf.equal(labels_expand, template)
    # split the tensor along rows
    logit_list = tf.split(0, labels_num, logits)
    indict_logic_list = tf.split(0, labels_num, indict_logic)
    lambda_list = tf.split(0, NUM_CLASSES, lambs)
    # loss_list = list()
    # for i in range(self.image_classes):
    #     loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i]))
    loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list)
    losses = tf.add_n(loss_list)
    tf.add_to_collection('losses', losses)
    # The total loss is defined as the cross entropy loss plus all of the weight
    # decay terms (L2 loss).
    return tf.add_n(tf.get_collection('losses'), name='total_loss')
    def create(self):
        gan = self.gan
        config = self.config
        ops = self.gan.ops
        split = len(gan.generator.children)+len(gan.generator.parents)+1
        #generator structure: 
        # x, gp1, ..., gpn, gc1, ..., gcm
        d_real = self.d_real
        d_fake = self.d_fake

        net = gan.discriminator.sample

        ds = self.split_batch(net, split)
        d_real = ds[0]
        d_fake = tf.add_n(ds[1:len(gan.generator.parents)+1])/(len(gan.generator.parents))
        d_loss, _ = self._create(d_real, d_fake)

        ds = self.split_batch(net, split)
        d_real = ds[0]
        d_fake = tf.add_n(ds[1+len(gan.generator.parents):])/(len(gan.generator.children))
        _, g_loss = self._create(d_real, d_fake)
        self.children_losses = self.split_batch(g_loss, len(gan.generator.children))

        d_loss = ops.squash(d_loss, config.reduce or tf.reduce_mean) #linear doesn't work with this
        g_loss = ops.squash(g_loss, config.reduce or tf.reduce_mean)

        self.sample = [d_loss, g_loss]
        self.d_loss = d_loss
        self.g_loss = g_loss

        return self.sample
Beispiel #25
0
def weight_decay(penalty_type, penalty):
  """Add weight decay.

  Args:
    model: TensorflowGraph.

  Returns:
    A scalar tensor containing the weight decay cost.

  Raises:
    NotImplementedError: If an unsupported penalty type is requested.
  """
  variables = []
  # exclude bias variables
  for v in tf.trainable_variables():
    if v.get_shape().ndims == 2:
      variables.append(v)

  with tf.name_scope('weight_decay'):
    if penalty_type == 'l1':
      cost = tf.add_n([tf.reduce_sum(tf.abs(v)) for v in variables])
    elif penalty_type == 'l2':
      cost = tf.add_n([tf.nn.l2_loss(v) for v in variables])
    else:
      raise NotImplementedError('Unsupported penalty_type %s' % penalty_type)
    cost *= penalty
    #tf.scalar_summary('Weight Decay Cost', cost)
  return cost
Beispiel #26
0
def allreduce_grads_hierarchical(all_grads, devices, average=False):
    """
    Hierarchical allreduce for DGX-1 system.

    Args:
        all_grads (K x N): List of list of gradients. N is the number of variables.
        devices ([str]): K str for the K devices.
        average (bool): average gradients or not.

    Returns:
        (K x N): same as input, but each grad is replaced by the average over K lists.
    """
    num_gpu = len(devices)
    assert num_gpu == 8, num_gpu
    assert len(all_grads) == num_gpu, len(all_grads)
    group_size = num_gpu // 2

    agg_all_grads = []  # N x K
    for varid, grads in enumerate(zip(*all_grads)):
        # grads: K gradients
        g0_main_gpu = varid % num_gpu
        g1_main_gpu = (g0_main_gpu + group_size) % num_gpu
        g0_start = 0 if g0_main_gpu < group_size else group_size
        g1_start = 0 if g1_main_gpu < group_size else group_size
        assert g0_start != g1_start
        g0_grads = grads[g0_start: g0_start + group_size]
        g1_grads = grads[g1_start: g1_start + group_size]

        with tf.device(devices[g0_main_gpu]):
            g0_agg = tf.add_n(g0_grads, name='group0_agg')

        with tf.device(devices[g1_main_gpu]):
            g1_agg = tf.add_n(g1_grads, name='group1_agg')
            g1_total_agg = tf.add(g0_agg, g1_agg, name='group1_total_agg')

        with tf.device(devices[g0_main_gpu]):
            g0_total_agg = tf.identity(g1_total_agg, name='group0_total_agg')

        agg_grads = []  # K aggregated grads
        for k in range(num_gpu):
            if (k < group_size) == (g0_main_gpu < group_size):
                main_gpu = g0_total_agg
            else:
                main_gpu = g1_total_agg
            with tf.device(devices[k]):
                if not average:
                    device_total_agg = tf.identity(
                        main_gpu, name='device{}_total_agg'.format(k))
                else:
                    # TODO where to put average?
                    device_total_agg = tf.multiply(
                        main_gpu, 1.0 / num_gpu, name='device{}_total_agg'.format(k))
                agg_grads.append(device_total_agg)

        agg_all_grads.append(agg_grads)

    # transpose
    agg_all_grads = list(zip(*agg_all_grads))   # K x Nvar
    return agg_all_grads
 def get_train_collection(self):
     ret = dict()
     ret['rpn_loss_cls'] = tf.add_n(tf.get_collection('rpn_loss_cls'))
     ret['rpn_loss_box'] = tf.add_n(tf.get_collection('rpn_loss_box'))
     ret['loss_cls'] = tf.add_n(tf.get_collection('loss_cls'))
     ret['loss_box'] = tf.add_n(tf.get_collection('loss_box'))
     ret['tot_losses'] = tf.add_n(tf.get_collection('losses'))
     return ret
Beispiel #28
0
 def _integral(lower, upper):
     result = []
     for f_scale, nd_bounds, nd_integral, normalisation_1 in all_integrals:
         nd_normalisation_2 = []
         for bounds, integral in zip(nd_bounds, nd_integral):
             integral_bounds = find_common_bounds([Region(lower, upper)], bounds)
             nd_normalisation_2.append(_integrate_component(integral_bounds, integral))
         result.append(f_scale/tf.add_n(normalisation_1)*tf.add_n(nd_normalisation_2))
     return tf.add_n(result)
def sequence_loss_by_example(logits, targets, weights, num_decoder_symbols,
                             average_across_timesteps=True,
                             softmax_loss_function=None, name=None):
  """Weighted cross-entropy loss for a sequence of logits (per example).

  Args:
    logits: list of 2D Tensors of shape [batch_size x num_decoder_symbols]. nick logits are 2d tensors
    targets: list of 1D batch-sized int32-Tensors of the same length as logits.
    weights: list of 1D batch-sized float-Tensors of the same length as logits.
    num_decoder_symbols: integer, number of decoder symbols (output classes).
    average_across_timesteps: If set, divide the returned cost by the total
      label weight.
    softmax_loss_function: function (inputs-batch, labels-batch) -> loss-batch
      to be used instead of the standard softmax (the default if this is None).
    name: optional name for this operation, default: "sequence_loss_by_example".

  Returns:
    1D batch-sized float Tensor: the log-perplexity for each sequence.
    notice here they take the ln(perplexity) -- which is why you get loss as you do

  Raises:
    ValueError: if len(logits) is different from len(targets) or len(weights).
  """
  if len(targets) != len(logits) or len(weights) != len(logits):
    raise ValueError("Lengths of logits, weights, and targets must be the same "
                     "%d, %d, %d." % (len(logits), len(weights), len(targets)))
  with tf.op_scope(logits + targets + weights, name,
                   "sequence_loss_by_example"):
    batch_size = tf.shape(targets[0])[0]
    log_perp_list = []
    length = batch_size * num_decoder_symbols #this represents the batch size x vocab size
    for i in xrange(len(logits)):
      if softmax_loss_function is None:
        # TODO(lukaszkaiser): There is no SparseCrossEntropy in TensorFlow, so
        # we need to first cast targets into a dense representation, and as
        # SparseToDense does not accept batched inputs, we need to do this by
        # re-indexing and re-sizing. When TensorFlow adds SparseCrossEntropy,
        # rewrite this method.
        indices = targets[i] + num_decoder_symbols * tf.range(batch_size)
        with tf.device("/cpu:0"):  # Sparse-to-dense must happen on CPU for now.
          dense = tf.sparse_to_dense(indices, tf.expand_dims(length, 0), 1.0,
                                     0.0)
        target = tf.reshape(dense, [-1, num_decoder_symbols])
        crossent = tf.nn.softmax_cross_entropy_with_logits(
            logits[i], target, name="SequenceLoss/CrossEntropy{0}".format(i))
      else:
        crossent = softmax_loss_function(logits[i], targets[i])

      log_perp_list.append(crossent * weights[i]) #this determines the cost I think?

    log_perps = tf.add_n(log_perp_list) #this adds all the elements in the tensor together
    if average_across_timesteps:
      total_size = tf.add_n(weights) #nick, this adds element wise all the of weights -- this produces just one number!
      total_size += 1e-12  # Just to avoid division by 0 for all-0 weights. This is adding it to just one number! total_size = total_size + 1e-12
      log_perps /= total_size #one number is produced here! this is equivalent to log_perps = log_perps/total_size
  return log_perps #this is the natural log of your perplexity
Beispiel #30
0
    def __init__(self, params, network, loss, score, optimizer, image_summary=True):
        self.params = params
        self.network = network
        self.loss = loss
        self.score = score
        self.optimizer = optimizer

        self.root_path = os.path.dirname(os.path.realpath(__file__))
        self.results_path = os.path.join(self.root_path, 'results')
        self.experiment_path = os.path.join(self.results_path, params['experiment'])
        self.trial_path = os.path.join(self.experiment_path, params['trial'])
        self.checkpoint_path = os.path.join(self.results_path, self.params['experiment'], self.params['trial'])
        self.model_path = os.path.join(self.checkpoint_path, 'model.ckpt')

        if not os.path.exists(self.results_path):
            os.mkdir(self.results_path)

        if not os.path.exists(self.experiment_path):
            os.mkdir(self.experiment_path)

        if not os.path.exists(self.trial_path):
            os.mkdir(self.trial_path)

        for i in range(len(self.network.weights)):
            tf.add_to_collection('losses', tf.mul(tf.nn.l2_loss(self.network.weights[i]), self.params['weight_decay']))

            tf.histogram_summary('weights/layer #%d' % i, self.network.weights[i])
            tf.histogram_summary('biases/layer #%d' % i, self.network.biases[i])

        weight_loss = tf.add_n(tf.get_collection('losses'))
        tf.add_to_collection('losses', self.loss)
        total_loss = tf.add_n(tf.get_collection('losses'))

        tf.scalar_summary('loss/base', self.loss)
        tf.scalar_summary('loss/weights', weight_loss)
        tf.scalar_summary('loss/total', total_loss)

        if image_summary:
            tf.image_summary('images/reference', self.network.y_)
            tf.image_summary('images/distorted', self.network.x)
            tf.image_summary('images/cleaned', tf.minimum(self.network.output(), 1.))

        tf.scalar_summary('score/train', self.score)

        self.train_summary_step = tf.merge_all_summaries()
        self.score_placeholder = tf.placeholder(tf.float32)

        self.val_summary_step = tf.scalar_summary('score/validation', self.score_placeholder)
        self.test_summary_step = tf.scalar_summary('score/test', self.score_placeholder)

        self.summary_writer = tf.train.SummaryWriter(self.trial_path)

        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.train_step = self.optimizer.minimize(total_loss, global_step=self.global_step)

        self.saver = tf.train.Saver()
Beispiel #31
0
def train_3d_nn():
    time0 = time.time()
    chunks_ids = get_ids(DATA_PATH)
    X, Y = get_data(chunks_ids, DATA_PATH)

    print("Total time to load data: " +
          str(timedelta(seconds=int(round(time.time() - time0)))))
    print('Splitting into train, validation sets')
    Y = np.argmax(Y, axis=1)

    # Crunch 4 classes to 2
    Y[Y == 2] = 1
    Y[Y == 3] = 1

    train_x, validation_x, train_y, validation_y = model_selection.train_test_split(
        X, Y, random_state=42, stratify=Y, test_size=0.20)

    klass_weights = np.asarray([69838.0 / 40513.0, 69838.0 / 29325.0])
    # Free up X and Y memory
    del X
    del Y
    print("Total time to split: " +
          str(timedelta(seconds=int(round(time.time() - time0)))))

    print('train_x: {}'.format(train_x.shape))
    print('validation_x: {}'.format(validation_x.shape))
    print('train_y: {}'.format(train_y.shape))
    print('validation_y: {}'.format(validation_y.shape))

    train_y = (np.arange(FLAGS.num_classes) == train_y[:, None]) + 0
    validation_y = (np.arange(FLAGS.num_classes) == validation_y[:, None]) + 0

    # Seed numpy random to generate identical random numbers every time (used in batching)
    np.random.seed(42)

    def get_validation_batch(validation_x_ids, validation_y, batch_number):
        num_images = len(validation_x_ids)

        count = 0
        start_index = batch_number * FLAGS.batch_size
        end_index = start_index + FLAGS.batch_size
        end_index = num_images if end_index > num_images else end_index
        real_batch_size = end_index - start_index

        validation_x = np.ndarray([
            real_batch_size, FLAGS.chunk_size, FLAGS.chunk_size,
            FLAGS.chunk_size, 1
        ],
                                  dtype=np.float32)

        for chunk_id in validation_x_ids[start_index:end_index]:
            chunk = np.load(DATA_PATH + chunk_id + '_X.npy').astype(np.float32,
                                                                    copy=False)
            validation_x[count, :, :, :, :] = img_to_rgb(chunk)
            count = count + 1

        return validation_x, validation_y[start_index:end_index]

    def feed_dict(is_train, batch_number=0):
        if is_train:
            x_batch, y_batch = get_batch(train_x, train_y)
            k = FLAGS.dropout
        else:
            x_batch, y_batch = get_validation_batch(validation_x, validation_y,
                                                    batch_number)
            k = 1.0
        crss_entrpy_weights = np.ones((y_batch.shape[0]))
        for m in range(y_batch.shape[0]):
            crss_entrpy_weights[m] = np.amax(y_batch[m] * klass_weights)
        return {
            x: x_batch,
            y_labels: y_batch,
            keep_prob: k,
            cross_entropy_weights: crss_entrpy_weights
        }

    # Graph construction
    graph = tf.Graph()
    with graph.as_default():
        x = tf.placeholder(tf.float32,
                           shape=[
                               None, FLAGS.chunk_size, FLAGS.chunk_size,
                               FLAGS.chunk_size, 1
                           ],
                           name='x')
        y = tf.placeholder(tf.float32,
                           shape=[None, FLAGS.num_classes],
                           name='y')
        y_labels = tf.placeholder(tf.float32,
                                  shape=[None, FLAGS.num_classes],
                                  name='y_labels')
        cross_entropy_weights = tf.placeholder(tf.float32,
                                               shape=[None],
                                               name='cross_entropy_weights')
        keep_prob = tf.placeholder(tf.float32)

        class_weights_base = tf.ones_like(y_labels)
        class_weights = tf.multiply(class_weights_base,
                                    [69838.0 / 40513.0, 69838.0 / 29325.0])

        # layer1
        conv1_1_out, conv1_1_weights = conv3d(inputs=x,
                                              filter_size=3,
                                              num_filters=16,
                                              num_channels=1,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv1_1')
        relu1_1_out = relu_3d(inputs=conv1_1_out, layer_name='relu1_1')

        conv1_2_out, conv1_2_weights = conv3d(inputs=relu1_1_out,
                                              filter_size=3,
                                              num_filters=16,
                                              num_channels=16,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv1_2')
        relu1_2_out = relu_3d(inputs=conv1_2_out, layer_name='relu1_2')

        pool1_out = max_pool_3d(inputs=relu1_2_out,
                                filter_size=[1, 2, 2, 2, 1],
                                strides=[1, 2, 2, 2, 1],
                                layer_name='pool1')

        # layer2
        conv2_1_out, conv2_1_weights = conv3d(inputs=pool1_out,
                                              filter_size=3,
                                              num_filters=32,
                                              num_channels=16,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv2_1')
        relu2_1_out = relu_3d(inputs=conv2_1_out, layer_name='relu2_1')

        conv2_2_out, conv2_2_weights = conv3d(inputs=relu2_1_out,
                                              filter_size=3,
                                              num_filters=32,
                                              num_channels=32,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv2_2')
        relu2_2_out = relu_3d(inputs=conv2_2_out, layer_name='relu2_2')

        pool2_out = max_pool_3d(inputs=relu2_2_out,
                                filter_size=[1, 2, 2, 2, 1],
                                strides=[1, 2, 2, 2, 1],
                                layer_name='pool2')

        # layer3
        conv3_1_out, conv3_1_weights = conv3d(inputs=pool2_out,
                                              filter_size=3,
                                              num_filters=64,
                                              num_channels=32,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv3_1')
        relu3_1_out = relu_3d(inputs=conv3_1_out, layer_name='relu3_1')

        conv3_2_out, conv3_2_weights = conv3d(inputs=relu3_1_out,
                                              filter_size=3,
                                              num_filters=64,
                                              num_channels=64,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv3_2')
        relu3_2_out = relu_3d(inputs=conv3_2_out, layer_name='relu3_2')

        conv3_3_out, conv3_3_weights = conv3d(inputs=relu3_2_out,
                                              filter_size=3,
                                              num_filters=64,
                                              num_channels=64,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv3_3')
        relu3_3_out = relu_3d(inputs=conv3_3_out, layer_name='relu3_3')

        pool3_out = max_pool_3d(inputs=relu3_3_out,
                                filter_size=[1, 2, 2, 2, 1],
                                strides=[1, 2, 2, 2, 1],
                                layer_name='pool3')

        # layer4
        conv4_1_out, conv4_1_weights = conv3d(inputs=pool3_out,
                                              filter_size=3,
                                              num_filters=128,
                                              num_channels=64,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv4_1')
        relu4_1_out = relu_3d(inputs=conv4_1_out, layer_name='relu4_1')

        conv4_2_out, conv4_2_weights = conv3d(inputs=relu4_1_out,
                                              filter_size=3,
                                              num_filters=128,
                                              num_channels=128,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv4_2')
        relu4_2_out = relu_3d(inputs=conv4_2_out, layer_name='relu4_2')

        conv4_3_out, conv4_3_weights = conv3d(inputs=relu4_2_out,
                                              filter_size=3,
                                              num_filters=128,
                                              num_channels=128,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv4_3')
        relu4_3_out = relu_3d(inputs=conv4_3_out, layer_name='relu4_3')

        pool4_out = max_pool_3d(inputs=relu4_3_out,
                                filter_size=[1, 2, 2, 2, 1],
                                strides=[1, 2, 2, 2, 1],
                                layer_name='pool4')

        # layer5
        conv5_1_out, conv5_1_weights = conv3d(inputs=pool4_out,
                                              filter_size=3,
                                              num_filters=256,
                                              num_channels=128,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv5_1')
        relu5_1_out = relu_3d(inputs=conv5_1_out, layer_name='relu5_1')

        conv5_2_out, conv5_2_weights = conv3d(inputs=relu5_1_out,
                                              filter_size=3,
                                              num_filters=256,
                                              num_channels=256,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv5_2')
        relu5_2_out = relu_3d(inputs=conv5_2_out, layer_name='relu5_2')

        conv5_3_out, conv5_3_weights = conv3d(inputs=relu5_2_out,
                                              filter_size=3,
                                              num_filters=256,
                                              num_channels=256,
                                              strides=[1, 3, 3, 3, 1],
                                              layer_name='conv5_3')
        relu5_3_out = relu_3d(inputs=conv5_3_out, layer_name='relu5_3')

        pool5_out = max_pool_3d(inputs=relu5_3_out,
                                filter_size=[1, 2, 2, 2, 1],
                                strides=[1, 2, 2, 2, 1],
                                layer_name='pool5')
        flatten5_out, flatten5_features = flatten_3d(pool5_out,
                                                     layer_name='flatten5')

        # layer6
        dense6_out = dense_3d(inputs=flatten5_out,
                              num_inputs=int(flatten5_out.shape[1]),
                              num_outputs=4096,
                              layer_name='fc6')
        relu6_out = relu_3d(inputs=dense6_out, layer_name='relu6')
        dropout6_out = dropout_3d(inputs=relu6_out,
                                  keep_prob=0.5,
                                  layer_name='drop6')

        # layer7
        dense7_out = dense_3d(inputs=dropout6_out,
                              num_inputs=int(dropout6_out.shape[1]),
                              num_outputs=4096,
                              layer_name='fc7')
        relu7_out = relu_3d(inputs=dense7_out, layer_name='relu7')
        dropout7_out = dropout_3d(inputs=relu7_out,
                                  keep_prob=0.5,
                                  layer_name='drop7')

        # layer8
        dense8_out = dense_3d(inputs=dropout7_out,
                              num_inputs=int(dropout7_out.shape[1]),
                              num_outputs=1000,
                              layer_name='fc8')

        # layer9
        dense9_out = dense_3d(inputs=dense8_out,
                              num_inputs=int(dense8_out.shape[1]),
                              num_outputs=FLAGS.num_classes,
                              layer_name='fc9')

        # Final softmax
        y = tf.nn.softmax(dense9_out)

        # Overall Metrics Calculations
        with tf.name_scope('log_loss'):
            log_loss = tf.losses.log_loss(y_labels, y, epsilon=10e-15)
            tf.summary.scalar('log_loss', log_loss)

        with tf.name_scope('softmax_cross_entropy'):
            softmax_cross_entropy = tf.losses.softmax_cross_entropy(
                y_labels, dense9_out)
            tf.summary.scalar('softmax_cross_entropy', softmax_cross_entropy)

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(y, 1),
                                          tf.argmax(y_labels, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            tf.summary.scalar('accuracy', accuracy)

        with tf.name_scope('weighted_log_loss'):
            weighted_log_loss = tf.losses.log_loss(
                y_labels, y, weights=class_weights, epsilon=10e-15) + tf.add_n(
                    tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
            tf.summary.scalar('weighted_log_loss', weighted_log_loss)

        with tf.name_scope('weighted_softmax_cross_entropy'):
            weighted_softmax_cross_entropy = tf.losses.softmax_cross_entropy(
                y_labels, dense9_out, weights=cross_entropy_weights)
            tf.summary.scalar('weighted_softmax_cross_entropy',
                              weighted_softmax_cross_entropy)

        with tf.name_scope('sparse_softmax_cross_entropy'):
            y_labels_argmax_int = tf.to_int32(tf.argmax(y_labels, axis=1))
            sparse_softmax_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
                labels=y_labels_argmax_int, logits=dense9_out)
            tf.summary.scalar('sparse_softmax_cross_entropy',
                              sparse_softmax_cross_entropy)

        with tf.name_scope('weighted_sparse_softmax_cross_entropy'):
            y_labels_argmax_int = tf.to_int32(tf.argmax(y_labels, axis=1))
            weighted_sparse_softmax_cross_entropy = tf.losses.sparse_softmax_cross_entropy(
                labels=y_labels_argmax_int,
                logits=dense9_out,
                weights=cross_entropy_weights)
            tf.summary.scalar('weighted_sparse_softmax_cross_entropy',
                              weighted_sparse_softmax_cross_entropy)

        # Class Based Metrics calculations
        y_pred_class = tf.argmax(y, 1)
        y_labels_class = tf.argmax(y_labels, 1)

        confusion_matrix = tf.confusion_matrix(y_labels_class,
                                               y_pred_class,
                                               num_classes=FLAGS.num_classes)

        sum_row_0 = tf.reduce_sum(confusion_matrix[0, :])
        sum_row_1 = tf.reduce_sum(confusion_matrix[1, :])
        # sum_row_2 = tf.reduce_sum(confusion_matrix[2, :])
        # sum_row_3 = tf.reduce_sum(confusion_matrix[3, :])
        sum_col_0 = tf.reduce_sum(confusion_matrix[:, 0])
        sum_col_1 = tf.reduce_sum(confusion_matrix[:, 1])
        # sum_col_2 = tf.reduce_sum(confusion_matrix[:, 2])
        # sum_col_3 = tf.reduce_sum(confusion_matrix[:, 3])

        sum_all = tf.reduce_sum(confusion_matrix[:, :])

        with tf.name_scope('precision'):
            precision_0 = confusion_matrix[0, 0] / sum_col_0
            precision_1 = confusion_matrix[1, 1] / sum_col_1
            # precision_2 = confusion_matrix[2,2] / sum_col_2
            # precision_3 = confusion_matrix[3,3] / sum_col_3

            tf.summary.scalar('precision_0', precision_0)
            tf.summary.scalar('precision_1', precision_1)
            # tf.summary.scalar('precision_2', precision_2)
            # tf.summary.scalar('precision_3', precision_3)

        with tf.name_scope('recall'):
            recall_0 = confusion_matrix[0, 0] / sum_row_0
            recall_1 = confusion_matrix[1, 1] / sum_row_1
            # recall_2 = confusion_matrix[2,2] / sum_row_2
            # recall_3 = confusion_matrix[3,3] / sum_row_3

            tf.summary.scalar('recall_0', recall_0)
            tf.summary.scalar('recall_1', recall_1)
            # tf.summary.scalar('recall_2', recall_2)
            # tf.summary.scalar('recall_3', recall_3)

        with tf.name_scope('specificity'):
            tn_0 = sum_all - (sum_row_0 + sum_col_0 - confusion_matrix[0, 0])
            fp_0 = sum_col_0 - confusion_matrix[0, 0]
            specificity_0 = tn_0 / (tn_0 + fp_0)

            tn_1 = sum_all - (sum_row_1 + sum_col_1 - confusion_matrix[1, 1])
            fp_1 = sum_col_1 - confusion_matrix[1, 1]
            specificity_1 = tn_1 / (tn_1 + fp_1)

            # tn_2 = sum_all - (sum_row_2 + sum_col_2 - confusion_matrix[2,2])
            # fp_2 = sum_col_2 - confusion_matrix[2,2]
            # specificity_2 = tn_2 / (tn_2 + fp_2)
            #
            # tn_3 = sum_all - (sum_row_3 + sum_col_3 - confusion_matrix[3,3])
            # fp_3 = sum_col_3 - confusion_matrix[3,3]
            # specificity_3 = tn_3 / (tn_3 + fp_3)

            tf.summary.scalar('specificity_0', specificity_0)
            tf.summary.scalar('specificity_1', specificity_1)
            # tf.summary.scalar('specificity_2', specificity_2)
            # tf.summary.scalar('specificity_3', specificity_3)

        with tf.name_scope('true_positives'):
            tp_0 = confusion_matrix[0, 0]
            tp_1 = confusion_matrix[1, 1]
            # tp_2 = confusion_matrix[2,2]
            # tp_3 = confusion_matrix[3,3]

            tf.summary.scalar('true_positives_0', tp_0)
            tf.summary.scalar('true_positives_1', tp_1)
            # tf.summary.scalar('true_positives_2', tp_2)
            # tf.summary.scalar('true_positives_3', tp_3)

        with tf.name_scope('true_negatives'):
            tf.summary.scalar('true_negatives_0', tn_0)
            tf.summary.scalar('true_negatives_1', tn_1)
            # tf.summary.scalar('true_negatives_2', tn_2)
            # tf.summary.scalar('true_negatives_3', tn_3)

        with tf.name_scope('false_positives'):
            tf.summary.scalar('false_positives_0', fp_0)
            tf.summary.scalar('false_positives_1', fp_1)
            # tf.summary.scalar('false_positives_2', fp_2)
            # tf.summary.scalar('false_positives_3', fp_3)

        with tf.name_scope('false_negatives'):
            fn_0 = sum_row_0 - tp_0
            fn_1 = sum_row_1 - tp_1
            # fn_2 = sum_row_2 - tp_2
            # fn_3 = sum_row_3 - tp_3

            tf.summary.scalar('false_negatives_0', fn_0)
            tf.summary.scalar('false_negatives_1', fn_1)
            # tf.summary.scalar('false_negatives_2', fn_2)
            # tf.summary.scalar('false_negatives_3', fn_3)

        with tf.name_scope('log_loss_by_class'):
            log_loss_0 = tf.losses.log_loss(y_labels[0], y[0], epsilon=10e-15)
            log_loss_1 = tf.losses.log_loss(y_labels[1], y[1], epsilon=10e-15)
            # log_loss_2 = tf.losses.log_loss(y_labels[2], y[2], epsilon=10e-15)
            # log_loss_3 = tf.losses.log_loss(y_labels[3], y[3], epsilon=10e-15)

            #added extra '_' to avoid tenosorboard name collision with the main log_loss metric
            tf.summary.scalar('log_loss__0', log_loss_0)
            tf.summary.scalar('log_loss__1', log_loss_1)
            # tf.summary.scalar('log_loss__2', log_loss_2)
            # tf.summary.scalar('log_loss__3', log_loss_3)

        with tf.name_scope('softmax_cross_entropy_by_class'):
            softmax_cross_entropy_0 = tf.losses.softmax_cross_entropy(
                y_labels[0], dense9_out[0])
            softmax_cross_entropy_1 = tf.losses.softmax_cross_entropy(
                y_labels[1], dense9_out[1])
            # softmax_cross_entropy_2 = tf.losses.softmax_cross_entropy(y_labels[2], dense9_out[2])
            # softmax_cross_entropy_3 = tf.losses.softmax_cross_entropy(y_labels[3], dense9_out[3])

            tf.summary.scalar('softmax_cross_entropy_0',
                              softmax_cross_entropy_0)
            tf.summary.scalar('softmax_cross_entropy_1',
                              softmax_cross_entropy_1)
            # tf.summary.scalar('softmax_cross_entropy_2', softmax_cross_entropy_2)
            # tf.summary.scalar('softmax_cross_entropy_3', softmax_cross_entropy_3)

        with tf.name_scope('accuracy_by_class'):
            accuracy_0 = (tp_0 + tn_0) / (tp_0 + fp_0 + fn_0 + tn_0)
            accuracy_1 = (tp_1 + tn_1) / (tp_1 + fp_1 + fn_1 + tn_1)
            # accuracy_2 = (tp_2 + tn_2)/(tp_2 + fp_2 + fn_2 + tn_2)
            # accuracy_3 = (tp_3 + tn_3)/(tp_3 + fp_3 + fn_3 + tn_3)

            tf.summary.scalar('accuracy_0', accuracy_0)
            tf.summary.scalar('accuracy_1', accuracy_1)
            # tf.summary.scalar('accuracy_2', accuracy_2)
            # tf.summary.scalar('accuracy_3', accuracy_3)

        with tf.name_scope('weighted_log_loss_by_class'):
            weighted_log_loss_0 = tf.losses.log_loss(y_labels[0],
                                                     y[0],
                                                     weights=class_weights[0],
                                                     epsilon=10e-15)
            weighted_log_loss_1 = tf.losses.log_loss(y_labels[1],
                                                     y[1],
                                                     weights=class_weights[1],
                                                     epsilon=10e-15)
            # weighted_log_loss_2 = tf.losses.log_loss(y_labels[2], y[2], weights=class_weights[2], epsilon=10e-15)
            # weighted_log_loss_3 = tf.losses.log_loss(y_labels[3], y[3], weights=class_weights[3], epsilon=10e-15)

            tf.summary.scalar('weighted_log_loss_0', weighted_log_loss_0)
            tf.summary.scalar('weighted_log_loss_1', weighted_log_loss_1)
            # tf.summary.scalar('weighted_log_loss_2', weighted_log_loss_2)
            # tf.summary.scalar('weighted_log_loss_3', weighted_log_loss_3)

        with tf.name_scope('f1_score_by_class'):
            f1_score_0 = 2 * (precision_0 * recall_0) / (precision_0 +
                                                         recall_0)
            f1_score_1 = 2 * (precision_1 * recall_1) / (precision_1 +
                                                         recall_1)
            # f1_score_2 = 2 * (precision_2 * recall_2) / (precision_2 + recall_2)
            # f1_score_3 = 2 * (precision_3 * recall_3) / (precision_3 + recall_3)
            # #f1_score = (f1_score_0 * 40591.0/69920.0) + (f1_score_1 * 14624.0/69920.0) + (f1_score_2 * 10490.0/69920.0) + (f1_score_3 *4215.0/ 69920.0)
            tf.summary.scalar('f1_score_0', f1_score_0)
            tf.summary.scalar('f1_score_1', f1_score_1)
            # tf.summary.scalar('f1_score_2', f1_score_2)
            # tf.summary.scalar('f1_score_3', f1_score_3)

        with tf.name_scope('train'):
            optimizer = tf.train.AdamOptimizer(
                learning_rate=1e-4,
                name='adam_optimizer').minimize(softmax_cross_entropy)

        merged = tf.summary.merge_all()
        saver = tf.train.Saver()

    # Setting up config
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = FLAGS.allow_growth
    config.log_device_placement = FLAGS.log_device_placement
    config.allow_soft_placement = FLAGS.allow_soft_placement

    # timestamp used to identify the start of run
    start_timestamp = str(int(time.time()))
    model_id = str(uuid.uuid4())

    # Name used to save all artifacts of run
    run_name = 'runType={0:}_timestamp={1:}_batchSize={2:}_maxIterations={3:}_numTrain={4:}_numValidation={5:}_modelId={6:}'
    train_run_name = run_name.format('train', start_timestamp,
                                     FLAGS.batch_size, FLAGS.max_iterations,
                                     train_x.shape[0], validation_x.shape[0],
                                     model_id)

    test_run_name = run_name.format('test', start_timestamp, FLAGS.batch_size,
                                    FLAGS.max_iterations, train_x.shape[0],
                                    validation_x.shape[0], model_id)

    print('Run_name: {}'.format(train_run_name))

    k_count = 0
    with tf.Session(graph=graph, config=config) as sess:
        train_writer = tf.summary.FileWriter(
            TENSORBOARD_SUMMARIES + train_run_name, sess.graph)
        test_writer = tf.summary.FileWriter(
            TENSORBOARD_SUMMARIES + test_run_name, sess.graph)
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])

        for i in tqdm(range(FLAGS.max_iterations)):
            if (i % FLAGS.iteration_analysis
                    == 0) or (i == (FLAGS.max_iterations - 1)):
                save_model(sess, model_id, saver)
                # Validation
                num_batches = int(
                    math.ceil(float(len(validation_x)) / FLAGS.batch_size))
                for k in range(num_batches):
                    _, step_summary = sess.run([y, merged],
                                               feed_dict=feed_dict(False, k))
                    test_writer.add_summary(step_summary, k_count)
                    k_count = k_count + 1
            else:
                # Train
                _, step_summary = sess.run([optimizer, merged],
                                           feed_dict=feed_dict(True))
                train_writer.add_summary(step_summary, i)

        train_writer.close()
        test_writer.close()
        # Clossing session
        sess.close()
Beispiel #32
0
def main():
    """Create the model and start the training."""
    args = get_arguments()

    os.environ['CUDA_DEVIDE_ORDER'] = "PCI_BUS_ID"
    os.environ['CUDA_VISIBLE_DEVICES'] = args.GPU

    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    tf.set_random_seed(args.random_seed)

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(args.data_dir, args.data_list, input_size,
                             args.random_scale, args.random_mirror,
                             args.ignore_label, IMG_MEAN, coord)
        image_batch, label_batch = reader.dequeue(args.batch_size)
        image_batch075 = tf.image.resize_images(
            image_batch, [int(h * 0.75), int(w * 0.75)])
        image_batch05 = tf.image.resize_images(
            image_batch, [int(h * 0.5), int(w * 0.5)])

    # Create network.
    with tf.variable_scope('', reuse=False):
        net = DeepLabResNetModel_34({'data': image_batch},
                                    is_training=args.is_training,
                                    num_classes=args.num_classes)
    with tf.variable_scope('', reuse=True):
        net075 = DeepLabResNetModel_34({'data': image_batch075},
                                       is_training=args.is_training,
                                       num_classes=args.num_classes)
    with tf.variable_scope('', reuse=True):
        net05 = DeepLabResNetModel_34({'data': image_batch05},
                                      is_training=args.is_training,
                                      num_classes=args.num_classes)
    # For a small batch size, it is better to keep
    # the statistics of the BN layers (running means and variances)
    # frozen, and to not update the values provided by the pre-trained model.
    # If is_training=True, the statistics will be updated during the training.
    # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
    # if they are presented in var_list of the optimiser definition.

    # Predictions.
    raw_output100 = net.layers['fc1_voc12']
    raw_output075 = net075.layers['fc1_voc12']
    raw_output05 = net05.layers['fc1_voc12']
    raw_output = tf.reduce_max(tf.stack([
        raw_output100,
        tf.image.resize_images(raw_output075,
                               tf.shape(raw_output100)[1:3, ]),
        tf.image.resize_images(raw_output05,
                               tf.shape(raw_output100)[1:3, ])
    ]),
                               axis=0)
    # Which variables to load. Running means and variances are not trainable,
    # thus all_variables() should be restored.
    restore_var = [
        v for v in tf.global_variables()
        if 'fc' not in v.name or not args.not_restore_last
    ]
    all_trainable = [
        v for v in tf.trainable_variables()
        if 'beta' not in v.name and 'gamma' not in v.name
    ]
    fc_trainable = [v for v in all_trainable if 'fc' in v.name]
    conv_trainable = [v for v in all_trainable
                      if 'fc' not in v.name]  # lr * 1.0
    fc_w_trainable = [v for v in fc_trainable
                      if 'weights' in v.name]  # lr * 10.0
    fc_b_trainable = [v for v in fc_trainable
                      if 'biases' in v.name]  # lr * 20.0
    assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
    assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

    # Predictions: ignoring all predictions with labels greater or equal than n_classes
    raw_prediction = tf.reshape(raw_output, [-1, args.num_classes])
    raw_prediction100 = tf.reshape(raw_output100, [-1, args.num_classes])
    raw_prediction075 = tf.reshape(raw_output075, [-1, args.num_classes])
    raw_prediction05 = tf.reshape(raw_output05, [-1, args.num_classes])

    label_proc = prepare_label(label_batch,
                               tf.stack(raw_output.get_shape()[1:3]),
                               num_classes=args.num_classes,
                               one_hot=False)  # [batch_size, h, w]
    label_proc075 = prepare_label(label_batch,
                                  tf.stack(raw_output075.get_shape()[1:3]),
                                  num_classes=args.num_classes,
                                  one_hot=False)
    label_proc05 = prepare_label(label_batch,
                                 tf.stack(raw_output05.get_shape()[1:3]),
                                 num_classes=args.num_classes,
                                 one_hot=False)

    raw_gt = tf.reshape(label_proc, [
        -1,
    ])
    raw_gt075 = tf.reshape(label_proc075, [
        -1,
    ])
    raw_gt05 = tf.reshape(label_proc05, [
        -1,
    ])

    indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, args.num_classes - 1)),
                         1)
    indices075 = tf.squeeze(
        tf.where(tf.less_equal(raw_gt075, args.num_classes - 1)), 1)
    indices05 = tf.squeeze(
        tf.where(tf.less_equal(raw_gt05, args.num_classes - 1)), 1)

    gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
    gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
    gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

    prediction = tf.gather(raw_prediction, indices)
    prediction100 = tf.gather(raw_prediction100, indices)
    prediction075 = tf.gather(raw_prediction075, indices075)
    prediction05 = tf.gather(raw_prediction05, indices05)

    # Pixel-wise softmax loss.
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction,
                                                          labels=gt)
    loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=prediction100, labels=gt)
    loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=prediction075, labels=gt075)
    loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=prediction05, labels=gt05)
    l2_losses = [
        args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'weights' in v.name
    ]
    reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
        loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(loss05) + tf.add_n(
            l2_losses)
    tf.summary.scalar('loss', reduced_loss)

    # Processed predictions: for visualisation.
    raw_output_up = tf.image.resize_bilinear(raw_output,
                                             tf.shape(image_batch)[1:3, ])
    raw_output_up = tf.argmax(raw_output_up, dimension=3)
    pred = tf.expand_dims(raw_output_up, dim=3)

    # Image summary.
    images_summary = tf.py_func(inv_preprocess,
                                [image_batch, args.save_num_images, IMG_MEAN],
                                tf.uint8)
    labels_summary = tf.py_func(
        decode_labels, [label_batch, args.save_num_images, args.num_classes],
        tf.uint8)
    preds_summary = tf.py_func(decode_labels,
                               [pred, args.save_num_images, args.num_classes],
                               tf.uint8)

    tf.summary.image(
        'images',
        tf.concat(axis=2,
                  values=[images_summary, labels_summary, preds_summary]),
        max_outputs=args.save_num_images)  # Concatenate row-wise.

    # Define loss and optimisation parameters.
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    learning_rate = tf.scalar_mul(
        base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    tf.summary.scalar('learning_rate', learning_rate)

    opt_conv = tf.train.AdamOptimizer(learning_rate)
    opt_fc_w = tf.train.AdamOptimizer(learning_rate)
    opt_fc_b = tf.train.AdamOptimizer(learning_rate)

    # Define a variable to accumulate gradients.
    accum_grads = [
        tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False)
        for v in conv_trainable + fc_w_trainable + fc_b_trainable
    ]

    # Define an operation to clear the accumulated gradients for next batch.
    zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads]

    # Compute gradients.
    grads = tf.gradients(reduced_loss,
                         conv_trainable + fc_w_trainable + fc_b_trainable)

    # Accumulate and normalise the gradients.
    accum_grads_op = [
        accum_grads[i].assign_add(grad / args.grad_update_every)
        for i, grad in enumerate(grads)
    ]

    grads_conv = accum_grads[:len(conv_trainable)]
    grads_fc_w = accum_grads[len(conv_trainable):(len(conv_trainable) +
                                                  len(fc_w_trainable))]
    grads_fc_b = accum_grads[(len(conv_trainable) + len(fc_w_trainable)):]

    # Apply the gradients.
    train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
    train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
    train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))

    train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

    merged = tf.summary.merge_all()
    summary_writer = tf.summary.FileWriter(args.snapshot_dir,
                                           graph=tf.get_default_graph())

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)

    # Saver for storing checkpoints of the model.
    saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=10)

    # Load variables if the checkpoint is provided.
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list=restore_var)
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        start_time = time.time()
        feed_dict = {step_ph: step}
        loss_value = 0

        # Clear the accumulated gradients.
        sess.run(zero_op, feed_dict=feed_dict)

        # Accumulate gradients.
        for i in range(args.grad_update_every):
            _, l_val = sess.run([accum_grads_op, reduced_loss],
                                feed_dict=feed_dict)
            loss_value += l_val

        # Normalise the loss.
        loss_value /= args.grad_update_every

        # Apply gradients.
        if step % args.save_pred_every == 0:
            images, labels, summary, _ = sess.run(
                [image_batch, label_batch, merged, train_op],
                feed_dict=feed_dict)
            summary_writer.add_summary(summary, step)
            save(saver, sess, args.snapshot_dir, step)
        else:
            sess.run(train_op, feed_dict=feed_dict)

        duration = time.time() - start_time
        print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(
            step, loss_value, duration))
    coord.request_stop()
    coord.join(threads)
Beispiel #33
0
def get_branch_logits(features,
                      num_classes,
                      atrous_rates=None,
                      aspp_with_batch_norm=False,
                      kernel_size=1,
                      weight_decay=0.0001,
                      reuse=None,
                      scope_suffix=''):
  """Gets the logits from each model's branch.

  The underlying model is branched out in the last layer when atrous
  spatial pyramid pooling is employed, and all branches are sum-merged
  to form the final logits.

  Args:
    features: A float tensor of shape [batch, height, width, channels].
    num_classes: Number of classes to predict.
    atrous_rates: A list of atrous convolution rates for last layer.
    aspp_with_batch_norm: Use batch normalization layers for ASPP.
    kernel_size: Kernel size for convolution.
    weight_decay: Weight decay for the model variables.
    reuse: Reuse model variables or not.
    scope_suffix: Scope suffix for the model variables.

  Returns:
    Merged logits with shape [batch, height, width, num_classes].

  Raises:
    ValueError: Upon invalid input kernel_size value.
  """
  # When using batch normalization with ASPP, ASPP has been applied before
  # in extract_features, and thus we simply apply 1x1 convolution here.
  if aspp_with_batch_norm or atrous_rates is None:
    if kernel_size != 1:
      raise ValueError('Kernel size must be 1 when atrous_rates is None or '
                       'using aspp_with_batch_norm. Gets %d.' % kernel_size)
    atrous_rates = [1]

  with slim.arg_scope(
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=tf.truncated_normal_initializer(stddev=0.01),
      reuse=reuse):
    with tf.variable_scope(LOGITS_SCOPE_NAME, LOGITS_SCOPE_NAME, [features]):
      branch_logits = []
      for i, rate in enumerate(atrous_rates):
        scope = scope_suffix
        if i:
          scope += '_%d' % i

        branch_logits.append(
            slim.conv2d(
                features,
                num_classes,
                kernel_size=kernel_size,
                rate=rate,
                activation_fn=None,
                normalizer_fn=None,
                scope=scope))

      return tf.add_n(branch_logits)
Beispiel #34
0
    def __init__(self, params, word2vec, features, labels, training=False):
        len1, len2, s1, s2 = features
        embed_dim = params['embed_dim']
        hidden_size = embed_dim  #params['hidden_size']
        dropout = params['dropout']
        input_keep = 0.8
        learning_rate = 0.001
        max_norm = 10
        l2_coef = 1e-5  #0.0001
        num_heads = 8

        if not training:
            dropout = 0.0

        K.set_learning_phase(training)

        with tf.device('/cpu:0'):
            embedding = tf.get_variable("word2vec",
                                        initializer=word2vec,
                                        trainable=False)
            s1 = tf.nn.embedding_lookup(embedding, s1)
            s2 = tf.nn.embedding_lookup(embedding, s2)
        if training:
            s1 = tf.nn.dropout(s1, input_keep)
            s2 = tf.nn.dropout(s2, input_keep)

        c = highway(s1,
                    size=embed_dim,
                    scope="highway",
                    dropout=dropout,
                    reuse=None)
        q = highway(s2,
                    size=embed_dim,
                    scope="highway",
                    dropout=dropout,
                    reuse=True)

        c_mask = tf.sequence_mask(len1, dtype=tf.float32)
        q_mask = tf.sequence_mask(len2, dtype=tf.float32)

        # Encoding
        c = residual_block(c,
                           num_blocks=1,
                           num_conv_layers=4,
                           kernel_size=7,
                           mask=c_mask,
                           num_filters=hidden_size,
                           num_heads=num_heads,
                           seq_len=len1,
                           scope="Encoder",
                           bias=False,
                           dropout=dropout)
        q = residual_block(
            q,
            num_blocks=1,
            num_conv_layers=4,
            kernel_size=7,
            mask=q_mask,
            num_filters=hidden_size,
            num_heads=num_heads,
            seq_len=len2,
            scope="Encoder",
            reuse=True,  # Share the weights between passage and question
            bias=False,
            dropout=dropout)

        # att
        c_maxlen = tf.cast(tf.reduce_max(len1), tf.int32)
        q_maxlen = tf.cast(tf.reduce_max(len2), tf.int32)

        S = optimized_trilinear_for_attention([c, q],
                                              c_maxlen,
                                              q_maxlen,
                                              input_keep_prob=1.0 - dropout)
        mask_q = tf.expand_dims(q_mask, 1)
        S_ = tf.nn.softmax(mask_logits(S, mask=mask_q))
        c_att = tf.matmul(S_, q)  # same length as c

        mask_c = tf.expand_dims(c_mask, 2)
        S_T = tf.transpose(tf.nn.softmax(mask_logits(S, mask=mask_c), axis=1),
                           [0, 2, 1])
        q_att = tf.matmul(S_T, c)  # same length as q

        # c_att2 = tf.matmul(S_, q_att)  # same length as c
        # q_att2 = tf.matmul(S_T, c_att) # same length as q

        # c_comb = tf.concat([c, c_att, c*c_att, c_att2, c*c_att2], axis=-1)
        # q_comb = tf.concat([q, q_att, q*q_att, q_att2, q*q_att2], axis=-1)
        c_comb = tf.concat([c, c_att, c * c_att, tf.abs(c - c_att)], axis=-1)
        q_comb = tf.concat([q, q_att, q * q_att, tf.abs(q - q_att)], axis=-1)

        # match
        c_proj = conv(c_comb, hidden_size, name="proj")
        q_proj = conv(q_comb, hidden_size, name="proj", reuse=True)

        c_proj = tf.nn.dropout(c_proj, 1.0 - dropout)
        q_proj = tf.nn.dropout(q_proj, 1.0 - dropout)

        c_match = residual_block(c_proj,
                                 num_blocks=1,
                                 num_conv_layers=2,
                                 kernel_size=5,
                                 mask=c_mask,
                                 num_filters=hidden_size,
                                 num_heads=num_heads,
                                 seq_len=len1,
                                 scope="match",
                                 bias=False,
                                 reuse=False,
                                 dropout=dropout)
        q_match = residual_block(q_proj,
                                 num_blocks=1,
                                 num_conv_layers=2,
                                 kernel_size=5,
                                 mask=q_mask,
                                 num_filters=hidden_size,
                                 num_heads=num_heads,
                                 seq_len=len2,
                                 scope="match",
                                 bias=False,
                                 reuse=True,
                                 dropout=dropout)

        # Aggregate
        with tf.name_scope('l2_norm'):
            x = aggregate(c_match, q_match)
            logits = tf.squeeze(Dense(1)(x))

        self.prob = tf.sigmoid(logits)
        self.pred = tf.rint(self.prob)
        self.acc = tf.metrics.accuracy(labels=labels, predictions=self.pred)

        self.loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.to_float(labels),
                                                    logits=logits))
        l2 = tf.add_n([
            tf.nn.l2_loss(v)
            for v in tf.trainable_variables("l2_norm") if 'bias' not in v.name
        ]) * l2_coef
        # l2 = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables()
        #                 if 'bias' not in v.name ]) * l2_coef
        self.loss += l2
        variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        l2_loss = tf.contrib.layers.apply_regularization(
            regularizer, variables)
        self.loss += l2_loss

        # decay
        var_ema = tf.train.ExponentialMovingAverage(0.9999)
        ema_op = var_ema.apply(tf.trainable_variables())
        with tf.control_dependencies([ema_op]):
            self.loss = tf.identity(self.loss)

        if training:
            self.global_step = tf.train.get_or_create_global_step()
            learning_rate = tf.minimum(
                0.0005, 0.001 / tf.log(999.) *
                tf.log(tf.cast(self.global_step, tf.float32) + 1))
            optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                # Ensures that we execute the update_ops before performing the train_step
                gradients, variables = zip(
                    *optimizer.compute_gradients(self.loss))
                gradients, _ = tf.clip_by_global_norm(gradients, max_norm)
                self.train_op = optimizer.apply_gradients(
                    zip(gradients, variables), global_step=self.global_step)
def resnet_model_fn(features,
                    labels,
                    mode,
                    model_class,
                    resnet_size,
                    weight_decay,
                    learning_rate_fn,
                    momentum,
                    data_format,
                    resnet_version,
                    loss_scale,
                    loss_filter_fn=None,
                    dtype=resnet_model.DEFAULT_DTYPE,
                    fine_tune=False,
                    label_smoothing=0.0,
                    horovod=False):
    """Shared functionality for different resnet model_fns.

  Initializes the ResnetModel representing the model layers
  and uses that model to build the necessary EstimatorSpecs for
  the `mode` in question. For training, this means building losses,
  the optimizer, and the train op that get passed into the EstimatorSpec.
  For evaluation and prediction, the EstimatorSpec is returned without
  a train op, but with the necessary parameters for the given mode.

  Args:
    features: tensor representing input images
    labels: tensor representing class labels for all input images
    mode: current estimator mode; should be one of
      `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`
    model_class: a class representing a TensorFlow model that has a __call__
      function. We assume here that this is a subclass of ResnetModel.
    resnet_size: A single integer for the size of the ResNet model.
    weight_decay: weight decay loss rate used to regularize learned variables.
    learning_rate_fn: function that returns the current learning rate given
      the current global_step
    momentum: momentum term used for optimization
    data_format: Input format ('channels_last', 'channels_first', or None).
      If set to None, the format is dependent on whether a GPU is available.
    resnet_version: Integer representing which version of the ResNet network to
      use. See README for details. Valid values: [1, 2]
    loss_scale: The factor to scale the loss for numerical stability. A detailed
      summary is present in the arg parser help text.
    loss_filter_fn: function that takes a string variable name and returns
      True if the var should be included in loss calculation, and False
      otherwise. If None, batch_normalization variables will be excluded
      from the loss.
    dtype: the TensorFlow dtype to use for calculations.
    fine_tune: If True only train the dense layers(final layers).
    label_smoothing: If greater than 0 then smooth the labels.

  Returns:
    EstimatorSpec parameterized according to the input params and the
    current mode.
  """

    # Generate a summary node for the images
    tf.compat.v1.summary.image('images', features, max_outputs=6)
    # Checks that features/images have same data type being used for calculations.
    assert features.dtype == dtype

    model = model_class(resnet_size,
                        data_format,
                        resnet_version=resnet_version,
                        dtype=dtype)

    logits = model(features, mode == tf.estimator.ModeKeys.TRAIN)

    # This acts as a no-op if the logits are already in fp32 (provided logits are
    # not a SparseTensor). If dtype is is low precision, logits must be cast to
    # fp32 for numerical stability.
    logits = tf.cast(logits, tf.float32)

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Return the predictions and the specification for serving a SavedModel
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'predict': tf.estimator.export.PredictOutput(predictions)
            })

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    if label_smoothing != 0.0:
        one_hot_labels = tf.one_hot(labels, 1001)
        cross_entropy = tf.losses.softmax_cross_entropy(
            logits=logits,
            onehot_labels=one_hot_labels,
            label_smoothing=label_smoothing)
    else:
        cross_entropy = tf.compat.v1.losses.sparse_softmax_cross_entropy(
            logits=logits, labels=labels)

    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy')
    tf.compat.v1.summary.scalar('cross_entropy', cross_entropy)

    # If no loss_filter_fn is passed, assume we want the default behavior,
    # which is that batch_normalization variables are excluded from loss.
    def exclude_batch_norm(name):
        return 'batch_normalization' not in name

    loss_filter_fn = loss_filter_fn or exclude_batch_norm

    # Add weight decay to the loss.
    l2_loss = weight_decay * tf.add_n(
        # loss is computed using fp32 for numerical stability.
        [
            tf.nn.l2_loss(tf.cast(v, tf.float32))
            for v in tf.compat.v1.trainable_variables()
            if loss_filter_fn(v.name)
        ])
    tf.compat.v1.summary.scalar('l2_loss', l2_loss)
    loss = cross_entropy + l2_loss

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.compat.v1.train.get_or_create_global_step()

        learning_rate = learning_rate_fn(global_step)

        # Create a tensor named learning_rate for logging purposes
        tf.identity(learning_rate, name='learning_rate')
        tf.compat.v1.summary.scalar('learning_rate', learning_rate)

        if flags.FLAGS.enable_lars:
            optimizer = tf.contrib.opt.LARSOptimizer(
                learning_rate,
                momentum=momentum,
                weight_decay=weight_decay,
                skip_list=['batch_normalization', 'bias'])
        else:
            optimizer = tf.compat.v1.train.MomentumOptimizer(
                learning_rate=learning_rate, momentum=momentum)

        fp16_implementation = getattr(flags.FLAGS, 'fp16_implementation', None)
        if fp16_implementation == 'graph_rewrite':
            optimizer = (tf.compat.v1.train.experimental.
                         enable_mixed_precision_graph_rewrite(
                             optimizer, loss_scale=loss_scale))

        if horovod:
            import horovod.tensorflow as hvd
            optimizer = hvd.DistributedOptimizer(optimizer, num_groups=1)

        def _dense_grad_filter(gvs):
            """Only apply gradient updates to the final layer.

      This function is used for fine tuning.

      Args:
        gvs: list of tuples with gradients and variable info
      Returns:
        filtered gradients so that only the dense layer remains
      """
            return [(g, v) for g, v in gvs if 'dense' in v.name]

        if loss_scale != 1 and fp16_implementation != 'graph_rewrite':
            # When computing fp16 gradients, often intermediate tensor values are
            # so small, they underflow to 0. To avoid this, we multiply the loss by
            # loss_scale to make these tensor values loss_scale times bigger.
            scaled_grad_vars = optimizer.compute_gradients(loss * loss_scale)

            if fine_tune:
                scaled_grad_vars = _dense_grad_filter(scaled_grad_vars)

            # Once the gradient computation is complete we can scale the gradients
            # back to the correct scale before passing them to the optimizer.
            unscaled_grad_vars = [(grad / loss_scale, var)
                                  for grad, var in scaled_grad_vars]
            minimize_op = optimizer.apply_gradients(unscaled_grad_vars,
                                                    global_step)
        else:
            grad_vars = optimizer.compute_gradients(loss)
            if fine_tune:
                grad_vars = _dense_grad_filter(grad_vars)
            minimize_op = optimizer.apply_gradients(grad_vars, global_step)

        update_ops = tf.compat.v1.get_collection(
            tf.compat.v1.GraphKeys.UPDATE_OPS)
        train_op = tf.group(minimize_op, update_ops)
    else:
        train_op = None

    accuracy = tf.compat.v1.metrics.accuracy(labels, predictions['classes'])
    accuracy_top_5 = tf.compat.v1.metrics.mean(
        tf.nn.in_top_k(predictions=logits,
                       targets=labels,
                       k=5,
                       name='top_5_op'))
    metrics = {'accuracy': accuracy, 'accuracy_top_5': accuracy_top_5}

    # Create a tensor named train_accuracy for logging purposes
    tf.identity(accuracy[1], name='train_accuracy')
    tf.identity(accuracy_top_5[1], name='train_accuracy_top_5')
    tf.compat.v1.summary.scalar('train_accuracy', accuracy[1])
    tf.compat.v1.summary.scalar('train_accuracy_top_5', accuracy_top_5[1])

    return tf.estimator.EstimatorSpec(mode=mode,
                                      predictions=predictions,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=metrics)
Beispiel #36
0
def get_train_model(opt, device='/cpu:0'):
    num_inp = opt['num_inp']
    num_hid_enc = opt['num_hid_enc']
    num_hid = opt['num_hid']
    num_hid_dec = opt['num_hid_dec']
    wd = opt['weight_decay']
    nl = eval(opt['non_linear'])

    with tf.device(device):
        # Input (N, D)
        x = tf.placeholder('float', [None, num_inp], name='x')

        # Encoder hidden layer (N, H1)
        w_1 = weight_variable([num_inp, num_hid_enc], wd=wd, name='w_1')
        b_1 = weight_variable([num_hid_enc], wd=wd, name='b_1')
        h_enc = nl(tf.matmul(x, w_1) + b_1, name='h_enc')

        # Encoder output: distribution parameters mu, log_sigma (N, 1, H)
        w_2 = weight_variable([num_hid_enc, num_hid], wd=wd, name='w_2')
        b_2 = weight_variable([num_hid], wd=wd, name='b_2')
        mu_enc = tf.matmul(h_enc, w_2) + b_2

        w_3 = weight_variable([num_hid_enc, num_hid], wd=wd, name='w_3')
        b_3 = weight_variable([num_hid], wd=wd, name='b_3')
        log_sigma_enc = tf.add(tf.matmul(h_enc, w_3),
                               b_3,
                               name='log_sigma_enc')

        # Noise (N, M, H)
        t = tf.placeholder('float', [None, num_hid], name='t')

        # Encoder latent variable (N * M, H)
        z = tf.add(mu_enc, tf.mul(tf.exp(log_sigma_enc), t), name='z')

        # KL Divergence
        kl_qzx_pz = tf.mul(
            -0.5,
            tf.reduce_sum(1 + 2 * log_sigma_enc - mu_enc * mu_enc -
                          tf.exp(2 * log_sigma_enc)),
            name='kl_qzx_pz')

        # Decoder hidden layer
        w_4 = weight_variable([num_hid, num_hid_dec], wd=wd, name='w_4')
        b_4 = weight_variable([num_hid_dec], wd=wd, name='b_4')
        h_dec = nl(tf.matmul(z, w_4) + b_4, name='h_dec')

        # Decoder output: distribution parameters mu, log_sigma
        w_5 = weight_variable([num_hid_dec, num_inp], wd=wd, name='w_5')
        b_5 = weight_variable([num_inp], wd=wd, name='b_5')
        mu_dec = tf.sigmoid(tf.matmul(h_dec, w_5) + b_5)

        # Gaussian posterior: p(x | z)
        if opt['output_dist'] == 'Gaussian':
            w_6 = weight_variable([num_hid_dec, num_inp], wd=wd, name='w_6')
            b_6 = weight_variable([num_inp], wd=wd, name='b_6')
            log_sigma_dec = tf.add(tf.matmul(h_dec, w_6),
                                   b_6,
                                   name='log_sigma_dec')
            sigma_dec = tf.exp(log_sigma_dec + 1e-4, name='sigma_dec')
            log_pxz = tf.reduce_sum(-0.5 * tf.log(2 * np.pi) - log_sigma_dec -
                                    0.5 * (x - mu_dec) / sigma_dec *
                                    (x - mu_dec) / sigma_dec,
                                    name='log_pxz')
        elif opt['output_dist'] == 'Bernoulli':
            # Bernoulli posterior: p(x | z), (same as cross entropy)
            log_pxz = tf.reduce_sum(x * tf.log(mu_dec + 1e-7) +
                                    (1 - x) * tf.log((1 - mu_dec + 1e-7)),
                                    name='log_pxz')
        else:
            raise Exception('Unknown output distribution type: {}'.format(
                opt['output_dist']))

        # Normalize by number of examples
        num_ex = tf.shape(x, name='num_ex')

        # Variational lower bound of marginal log-likelihood
        w_kl = 1.0
        w_logp = 1.0
        log_px_lb = (-w_kl * kl_qzx_pz + w_logp * log_pxz) / \
            (w_kl + w_logp) * 2.0 / tf.to_float(num_ex[0])
        tf.add_to_collection('losses', -log_px_lb)
        total_loss = tf.add_n(tf.get_collection('losses'), name='total_loss')

        lr = 1e-4
        eps = 1e-7
        train_step = tf.train.AdamOptimizer(lr,
                                            epsilon=eps).minimize(total_loss)

    m = {
        'x': x,
        't': t,
        'w_1': w_1,
        'b_1': b_1,
        'h_enc': h_enc,
        'w_2': w_2,
        'b_2': b_2,
        'w_3': w_3,
        'b_3': b_3,
        'mu_enc': mu_enc,
        'log_sigma_enc': log_sigma_enc,
        'z': z,
        'kl_qzx_pz': kl_qzx_pz,
        'w_4': w_4,
        'b_4': b_4,
        'h_dec': h_dec,
        'w_5': w_5,
        'b_5': b_5,
        'mu_dec': mu_dec,
        'log_pxz': log_pxz,
        'log_px_lb': log_px_lb,
        'train_step': train_step
    }

    if opt['output_dist'] == 'Gaussian':
        m['w_6'] = w_6
        m['b_6'] = b_6
        m['log_sigma_dec'] = log_sigma_dec

    return m
Beispiel #37
0
    def __init__(self,
                 is_training=False,
                 hidden_units=128,
                 num_layers=1,
                 input_sequence_len=20,
                 output_sequence_len=10,
                 num_input_symbols=20,
                 num_output_symbols=20,
                 weight_amplitude=0.08,
                 batch_size=32,
                 peep=False):

        self.encoder_inputs = []
        self.decoder_inputs = []

        for i in range(input_sequence_len):
            self.encoder_inputs.append(tf.placeholder(tf.float32, shape=(None, num_input_symbols),
                                                      name="encoder_{0}".format(i)))

        for i in range(output_sequence_len + 1):
            self.decoder_inputs.append(tf.placeholder(tf.float32, shape=(None, num_output_symbols),
                                                      name="decoder_{0}".format(i)))

        def random_uniform():
            return tf.random_uniform_initializer(-weight_amplitude, weight_amplitude)

        if num_layers > 1:
            cells = [rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, input_size=num_input_symbols,
                                       initializer=random_uniform())]
            cells += [rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, input_size=hidden_units,
                                        initializer=random_uniform()) for _ in range(num_layers - 1)]
            self.cell = rnn_cell.MultiRNNCell(cells)
        else:
            self.cell = rnn_cell.LSTMCell(hidden_units, use_peepholes=peep,
                                          initializer=random_uniform())

        self.w_softmax = tf.get_variable('w_softmax', shape=(hidden_units, num_output_symbols),
                                         initializer=random_uniform())
        self.b_softmax = tf.get_variable('b_softmax', shape=(num_output_symbols,),
                                         initializer=random_uniform())

        # decoder_outputs is a list of tensors with output_sequence_len: [(batch_size x hidden_units)]
        decoder_outputs, _ = self._init_seq2seq(self.encoder_inputs, self.decoder_inputs, self.cell,
                                                feed_previous=not is_training)

        output_logits = [tf.matmul(decoder_output, self.w_softmax) + self.b_softmax
                         for decoder_output in decoder_outputs]
        self.output_probs = [tf.nn.softmax(logit) for logit in output_logits]

        # If this is a training model create the training operation and loss function
        if is_training:
            self.targets = self.decoder_inputs[1:]
            losses = [tf.nn.softmax_cross_entropy_with_logits(logit, target)
                      for logit, target in zip(output_logits, self.targets)]

            loss = tf.reduce_sum(tf.add_n(losses))
            self.cost = loss / output_sequence_len / batch_size
            self.learning_rate = tf.Variable(DEFAULT_LEARNING_RATE, trainable=False)

            train_vars = tf.trainable_variables()
            grads = tf.gradients(self.cost, train_vars)
            optimizer = tf.train.AdamOptimizer(self.learning_rate)

            self.train_op = optimizer.apply_gradients(zip(grads, train_vars))
Beispiel #38
0
    def __init__(self, num_features, **kwargs):
        defaults = {
            'num_epochs': 10,
            'display_step': 1,
            'batch_size': 100,
            'num_steps': 3,
            'debug': False,
            'normalize': True,
            'latent_vector_size': 100,
            'adpt_l': 2.0,
            'res_depth': 1,
            'ns_param': 0.01,
            'batch_param': 0.1,
            'dr_param': 1.,
            'df_param': 1.,
            'learning_rate': .001,
            'reg_param': 0.01
        }

        self.num_features = num_features

        vars(self).update({p: kwargs.get(p, d) for p, d in defaults.items()})

        ########################################
        # TensorFlow Variables                 #
        ########################################

        self.X = tf.placeholder(
            'float32',
            [None, num_features * self.num_steps],
            name='X'
        )
        self.Y = tf.placeholder('int64', [None], name='Y')
        self.T = tf.placeholder('float32', name='T')
        self.Z = tf.placeholder(
            'float32',
            [None, self.latent_vector_size],
            name='Z'
        )
        self.keep_prob = tf.placeholder('float32', name='keep_prob')

        # for normalization
        self.feature_min = tf.Variable(
            np.zeros(num_features * self.num_steps),
            dtype=tf.float32
        )

        self.feature_max = tf.Variable(
            np.zeros(num_features * self.num_steps),
            dtype=tf.float32
        )

        ########################################
        # GAN Model                            #
        ########################################

        self.embedding_ops = []

        def build_net(x, sizes):
            lrelu = nn.lrelu_gen(0.1)

            def block(x, in_dim, out_dim, i):
                with tf.variable_scope('block_{}'.format(i)):
                    z = x
                    for j in range(self.res_depth):
                        with tf.variable_scope('res_block_{}'.format(j)):
                            z = nn.build_residual_block(
                                z, lrelu, in_dim, self.reg_param
                            )
                            with tf.variable_scope('residual_block'):
                                self.embedding_ops.append(z)
                            z = tf.nn.dropout(z, self.keep_prob)

                    z = nn.build_fc_layer(
                        z, lrelu, in_dim, out_dim, self.reg_param
                    )

                    with tf.variable_scope('fc_block'):
                        self.embedding_ops.append(z)

                    if i < len(sizes) - 2:
                        z = tf.nn.dropout(z, self.keep_prob)

                    return z
            z = x

            for i in range(1, len(sizes)):
                z = block(z, sizes[i-1], sizes[i], i-1)

            return z

        vec_size = self.num_features * self.num_steps
        rnn_g_sizes = [vec_size, 100, vec_size]

        def generator(t, x_prev):
            x = tf.squeeze(
                tf.slice(self.X, [0, t, 0], [-1, 1, -1])
            )

            x = tf.reduce_mean(tf.add(x, x_prev), axis=1)

            x_next = tf.nn.sigmoid(build_net(x_prev, rnn_g_sizes))
            t = tf.add(t, 1)

            return t, x_next

        def discriminator(t, out, x_prev):
            x = tf.squeeze(
                tf.slice(self.X, [0, t, 0], [-1, 1, -1])
            )

            x = tf.reduce_mean(tf.add(x, x_prev), axis=1)

            x_next = tf.nn.sigmoid(build_net(x_prev, rnn_g_sizes))
            t = tf.add(t, 1)

            return t, x_next




        g_sizes = [self.latent_vector_size, 100, vec_size]
        d_sizes = [vec_size, 64, 32, 16, 8, 4, 2]

        with tf.variable_scope('generator'):
            G_sample = tf.nn.sigmoid(build_net(self.Z, g_sizes))

        with tf.variable_scope('discriminator'):
            D_logit_real = build_net(self.X, d_sizes)
            tf.get_variable_scope().reuse_variables()
            D_logit_fake = build_net(G_sample, d_sizes)

        D_fake = tf.nn.sigmoid(D_logit_fake)
        D_real = tf.nn.sigmoid(D_logit_real)

        self.scores = D_logit_real

        ########################################
        # Losses & Optimizers                  #
        ########################################

        # D Loss
        D_loss_real = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=D_logit_real, labels=tf.ones_like(self.Y)
            )
        )

        D_loss_fake = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=D_logit_fake, labels=tf.zeros_like(self.Y)
            )
        )

        # Ensure differnce between nodes.
        node_simiality_loss = -tf.reduce_mean(tf.add(
            tf.square(D_real[:, 1] - D_real[:, 0]),
            tf.square(D_fake[:, 1] - D_fake[:, 0])
        ))

        # Punish stddev accross batch
        batch_loss = tf.reduce_mean(tf.add(
            tf.nn.moments(D_real, [1])[1],
            tf.nn.moments(D_fake, [1])[1]
        ))

        self.D_loss = tf.add_n([
            self.df_param * D_loss_fake,
            self.dr_param * D_loss_real,
            self.ns_param * node_simiality_loss,
            self.batch_param * batch_loss
        ])

        self.D_only_loss = tf.add_n([
            self.df_param * D_loss_fake,
            self.dr_param * D_loss_real
        ])

        self.D_loss += tf.add_n(tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES,
            scope='discriminator'
        ))

        # G Loss
        self.G_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=D_logit_fake, labels=tf.ones_like(self.Y)
            )
        )

        self.G_loss += tf.add_n(tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES,
            scope='generator'
        ))

        # Optimizers
        self.D_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.D_loss,
            var_list=tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                scope='discriminator'
            )
        )

        self.G_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(
            self.G_loss,
            var_list=tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES,
                scope='generator'
            )
        )

        ########################################
        # Evaluation Metrics                   #
        ########################################

        # negative_labels = tf.cast(tf.fill(tf.shape(self.Y), 0), 'int64')
        # positive_labels = tf.cast(tf.fill(tf.shape(self.Y), 1), 'int64')

        # pred_labels = tf.where(
        #     tf.greater(self.scores, tf.fill(tf.shape(self.Y), 0.5)),
        #     positive_labels,
        #     negative_labels

        # )

        pred_labels = tf.argmax(self.scores, 1)

        self.confusion_matrix = tf.confusion_matrix(
            self.Y,
            pred_labels,
            num_classes=2
        )

        self.accuracy = tf.reduce_mean(
            tf.to_float(tf.equal(pred_labels, self.Y))
        )

        # Variable ops
        self.init_op = tf.global_variables_initializer()
        self.saver = tf.train.Saver()
        self.config = tf.ConfigProto()
        self.config.gpu_options.allow_growth = True
 def l1_weights(self):
   """L1 loss for the weights of the network"""
   return tf.add_n([tf.reduce_sum(tf.abs(v)) for v in tf.trainable_variables() if v in self.vars])
 def l2_weights(self):
   """L2 loss for the weights of the network"""
   return tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables() if v in self.vars])
Beispiel #41
0
b_fc2 = bias_variable([fc_size2])
'''
h_fc2 = tf.nn.sigmoid(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

W_fc3 = weight_variable([fc_size2, 2])
b_fc3 = bias_variable([2])
'''
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

#Train and evaluate
saver = tf.train.Saver()
cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
#Weight reg 
tf.add_to_collection("losses",cross_entropy)
loss = tf.add_n(tf.get_collection("losses"))
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
if emptyTrain:
    sess.run(tf.initialize_all_variables())
else:
    saver.restore(sess, "./mnistnnsave/model.ckpt")
Iteration = 20000
data_dic
#train_x = scale_to_01(np.array(data_dic['data']))
train_x = np.array(data_dic['data'])
train_y = np.array(data_dic['label'])
#shuffle the train data
train = np.hstack((train_x,train_y))
train_list = train.tolist()
def main(args):


    network = importlib.import_module(args.model_def, 'inference')
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Store some git revision info in a text file in the log directory
    src_path,_ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    np.random.seed(seed=args.seed)

    train_set = facenet.get_dataset(args.data_dir)
    nrof_classes = len(train_set)
    
    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    pretrained_model = None
    if args.pretrained_model:
        pretrained_model = os.path.expanduser(args.pretrained_model)
        print('Pre-trained model: %s' % pretrained_model)
    
    if args.lfw_dir:
        print('LFW directory: %s' % args.lfw_dir)
        # Read the file containing the pairs used for testing
        pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
        # Get the paths for the corresponding images
        lfw_paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext)
    
    with tf.Graph().as_default():
        tf.set_random_seed(args.seed)
        global_step = tf.Variable(0, trainable=False)
        
        # Get a list of image paths and their labels
        image_list, label_list = facenet.get_image_paths_and_labels(train_set)

        # Read data and apply label preserving distortions
        image_batch, label_batch = facenet.read_and_augument_data(image_list, label_list, args.image_size,
            args.batch_size, args.max_nrof_epochs, args.random_crop, args.random_flip, args.random_rotate, 
            args.nrof_preprocess_threads)
        print('Total number of classes: %d' % nrof_classes)
        print('Total number of examples: %d' % len(image_list))
        
        print('Building training graph')
        
        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate')
        
        # Build the inference graph
        prelogits, _ = network.inference(image_batch, args.keep_probability, 
            phase_train=True, weight_decay=args.weight_decay)
        logits = slim.fully_connected(prelogits, len(train_set), activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 
                weights_regularizer=slim.l2_regularizer(args.weight_decay),
                scope='Logits', reuse=False)

        # Add DeCov regularization loss
        if args.decov_loss_factor>0.0:
            logits_decov_loss = facenet.decov_loss(logits) * args.decov_loss_factor
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, logits_decov_loss)
            
        # Add center loss
        if args.center_loss_factor>0.0:
            prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes)
            tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor)

        learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step,
            args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True)
        tf.scalar_summary('learning_rate', learning_rate)

        # Calculate the average cross entropy loss across the batch
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits, label_batch, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)
        
        # Calculate the total losses
        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op = facenet.train(total_loss, global_step, args.optimizer, 
            learning_rate, args.moving_average_decay, tf.all_variables(), args.log_histograms)
        
        # Evaluation
        print('Building evaluation graph')
        lfw_label_list = range(0,len(lfw_paths))
        assert (len(lfw_paths) % args.lfw_batch_size == 0), "The number of images in the LFW test set need to be divisible by the lfw_batch_size"
        eval_image_batch, eval_label_batch = facenet.read_and_augument_data(lfw_paths, lfw_label_list, args.image_size,
            args.lfw_batch_size, None, False, False, False, args.nrof_preprocess_threads, shuffle=False)
        # Node for input images
        eval_image_batch.set_shape((None, args.image_size, args.image_size, 3))
        eval_image_batch = tf.identity(eval_image_batch, name='input')
        eval_prelogits, _ = network.inference(eval_image_batch, 1.0, 
            phase_train=False, weight_decay=0.0, reuse=True)
        eval_embeddings = tf.nn.l2_normalize(eval_prelogits, 1, 1e-10, name='embeddings')

        # Create a saver
        saver = tf.train.Saver(tf.all_variables(), max_to_keep=3)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
        sess.run(tf.initialize_all_variables())
        sess.run(tf.initialize_local_variables())
        summary_writer = tf.train.SummaryWriter(log_dir, sess.graph)
        tf.train.start_queue_runners(sess=sess)

        with sess.as_default():

            if pretrained_model:
                print('Restoring pretrained model: %s' % pretrained_model)
                saver.restore(sess, pretrained_model)

            # Training and validation loop
            print('Running training')
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch
                train(args, sess, epoch, learning_rate_placeholder, global_step, 
                    total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step)

                # Evaluate on LFW

                '''
                if args.lfw_dir:
                    evaluate(sess, eval_embeddings, eval_label_batch, actual_issame, args.lfw_batch_size, args.seed,
                        args.lfw_nrof_folds, log_dir, step, summary_writer)
                '''

                
    return model_dir
Beispiel #43
0
 def l2_loss(self):
     return self.l2_scale * tf.add_n([
         tf.nn.l2_loss(v)
         for v in tf.trainable_variables() if 'bias' not in v.name
     ])
y_pred = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y)

cost = tf.reduce_mean(y_pred)

norms = []

for weight in weights.values():
    if "6" in weight.name or "7" in weight.name or "8" in weight.name:
        norms.append(tf.nn.l2_loss(weight))

for weight in biases.values():
    if "6" in weight.name or "7" in weight.name or "8" in weight.name:
        norms.append(tf.nn.l2_loss(weight))

loss_L2 = tf.add_n(norms) * .05

cost = cost

optimizer = tf.train.AdamOptimizer(
    learning_rate=learning_rate).minimize(cost + loss_L2)

correct_pred = tf.equal(tf.math.argmax(input=pred, axis=1),
                        tf.argmax(input=y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=epochs, save_relative_paths=True)

with tf.Session() as sess:
    sess.run(init)
Beispiel #45
0
    return var


x = tf.placeholder(dtype=tf.float32, shape=(None, 2))
y_ = tf.placeholder(dtype=tf.float32, shape=(None, 1))

batch_size = 8

layer_dimension = [2, 10, 10, 10, 1]

n_layers = len(layer_dimension)

cur_layer = x

in_dimension = layer_dimension[0]

for i in range(1, n_layers):
    out_dimension = layer_dimension[i]

    weight = get_weight(shape=[in_dimension, out_dimension], lamdba=0.001)

    bias = tf.Variable(tf.constant(0.1, shape=[out_dimension]))

    cur_layer = tf.nn.relu(tf.matmul(cur_layer, weight) + bias)
    in_dimension = out_dimension

mess_loss = tf.reduce_mean(tf.square(y_ - cur_layer))

tf.add_to_collection('losses', mess_loss)
loss = tf.add_n(tf.get_collection('losses'))
from tensorflow.core.protobuf import saver_pb2
# These are the local imports. We import that from our directory 
# driving_data is for reading our dataset
import driving_data
# model is out tensorflow model. check the model graph here. https://imgur.com/IuBJdKe
import model
# the path for our trained model. In case there is a trained model already we will import that and start training with that. If you want to you can also start from scratch.
LOGDIR = './save'
# Tensorflow Session. Read more here https://www.tensorflow.org/api_docs/python/tf/Session
sess = tf.InteractiveSession()
# This is our normalization function. We use L2 and now we define a constant for that. 
L2NormConst = 0.001

train_vars = tf.trainable_variables()

loss = tf.reduce_mean(tf.square(tf.subtract(model.y_, model.y))) + tf.add_n([tf.nn.l2_loss(v) for v in train_vars]) * L2NormConst
accuracy = 100 - loss 
train_step = tf.train.AdamOptimizer(1e-4).minimize(loss)
sess.run(tf.initialize_all_variables())

# create a summary to monitor cost tensor
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
# merge all summaries into a single op
merged_summary_op =  tf.summary.merge_all()

saver = tf.train.Saver(write_version = saver_pb2.SaverDef.V2)

# op to write logs to Tensorboard
logs_path = './logs'
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
Beispiel #47
0
                            use_relu=True,
                            weight_loss=0.04)

layer_fc3 = create_fc_layer(input=layer_fc2,
                            num_inputs=fc_layer_size2,
                            num_outputs=num_classes,
                            use_relu=False)

y_pred = tf.nn.softmax(layer_fc3, name='y_pred')

y_pred_cls = tf.argmax(y_pred, dimension=1)
session.run(tf.global_variables_initializer())
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc3, labels=y_true)
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')   # L2 Regularization
tf.add_to_collection('losses', cross_entropy_mean)
cost = tf.add_n(tf.get_collection('losses'), name='total_loss')
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)   # 1e-4
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

session.run(tf.global_variables_initializer())


def show_progress(epoch, feed_dict_train, feed_dict_validate, val_loss, duration=None):
    acc = session.run(accuracy, feed_dict=feed_dict_train)
    val_acc = session.run(accuracy, feed_dict=feed_dict_validate)
    if duration is not None:
        examples_per_sec = batch_size / duration
        msg = "Training Epoch {0}, Iterations: {1} --- Training Accuracy: {2:>6.1%}," \
              "  Validation Accuracy: {3:>6.1%},  Validation Loss: {4:.3f}," \
              " {5:.2f} examples/sec, {6:.2f} sec/iteration"
Beispiel #48
0
    def build_graph(self, image, label):
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x):
            if BITA == 32:
                return tf.nn.relu(x)
            return tf.clip_by_value(x, 0.0, 1.0)

        def activate(x):
            return fa(nonlin(x))

        image = image / 256.0

        with remap_variables(binarize_weight), \
        argscope(BatchNorm, momentum=0.9, epsilon=1e-4),\
                argscope(Conv2D, use_bias=False):
            logits = (
                LinearWrap(image).Conv2D('conv0',
                                         48,
                                         5,
                                         padding='VALID',
                                         use_bias=True).MaxPooling(
                                             'pool0', 2,
                                             padding='SAME').apply(activate)
                # 18
                .Conv2D('conv1', 64, 3, padding='SAME').apply(
                    fg, 'fg1', is_training,
                    kernel_size=18)  #模型的核心变动,用fg代替bn和activate
                #.BatchNorm('bn1')
                #.apply(activate)
                .Conv2D('conv2', 64, 3, padding='SAME').MaxPooling(
                    'pool1', 2, padding='SAME').apply(
                        fg, 'fg2', training=is_training,
                        kernel_size=9)  #注意,这里要先maxpooling再做量化。
                #因为原来的模型maxpooling是在bn之后的
                #.BatchNorm('bn2')
                #.MaxPooling('pool1', 2, padding='SAME')
                #.apply(activate)
                # 9
                .Conv2D('conv3', 128, 3, padding='VALID').apply(fg,
                                                                'fg3',
                                                                is_training,
                                                                kernel_size=7)
                #.BatchNorm('bn3')
                #.apply(activate)
                # 7
                .Conv2D('conv4', 128, 3, padding='SAME').apply(fg,
                                                               'fg4',
                                                               is_training,
                                                               kernel_size=7)
                #.BatchNorm('bn4')
                #.apply(activate)
                .Conv2D('conv5', 128, 3, padding='VALID').apply(fg,
                                                                'fg5',
                                                                is_training,
                                                                kernel_size=5)
                #.BatchNorm('bn5').apply(activate)
                # 5
                .Dropout(rate=0.5 if is_training else 0.0).Conv2D(
                    'conv6', 512, 5, padding='VALID')
                #最后一层不做量化
                .BatchNorm('bn6').apply(nonlin).FullyConnected('fc1', 100)())

        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)),
                        tf.float32,
                        name='wrong-top1')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        total_cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, total_cost)
        return total_cost
                matrix_name = get_block_name(i, j)
                matrices[matrix_name] = tf.random_uniform([M, M], name=matrix_name)

    #intermediate_traces will store sum of trace for all sub-matrices on each machine
    intermediate_traces = {0:0,1:0,2:0,3:0,4:0}
    import datetime
    print "Before matrix creation:",datetime.datetime.now()
    for i in range(0, d):
        for j in range(0, d):
            with tf.device("/job:worker/task:%d" % ( (i+j) % 5 )):
                A = matrices[get_block_name(i, j)]
                B = matrices[get_block_name(j, i)]
                traceForSubMatrix = tf.trace(tf.matmul(A, B))
                oldSum = intermediate_traces[(i+j) % 5]
                intermediate_traces[(i+j) % 5] = oldSum + traceForSubMatrix 
            print "After matrix creation:",datetime.datetime.now()

    with tf.device("/job:worker/task:0"):
        #Calculate total trace by summing up all elements from intermediate_traces
        retval = tf.add_n(intermediate_traces.values())
        print "After retval calculation:",datetime.datetime.now()

        config = tf.ConfigProto(log_device_placement=True)
        with tf.Session("grpc://vm-23-2:2222", config=config, graph=g) as sess:
            result = sess.run(retval)
            sess.close()
            print "Trace of the big matrix is = ", result
            print "SUCCESS"


Beispiel #50
0
#softmax
with tf.name_scope("softmmax"):
    W_soft = weight_variable("W_soft", [192, 10], stddev=1 / 192.0)
    b_soft = bias_variable("b_soft", [10], 0.0)
    h_soft = tf.add(tf.matmul(h_local4, W_soft), b_soft, name="h_soft")

#暂不是很清楚这个函数的意思
# _activation_summary(h_soft)

with tf.name_scope("loss"):
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=input_y,
                                                           logits=h_soft)
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name="cross_entropy")
    tf.add_to_collection("losses", cross_entropy_mean)
    loss = tf.add_n(tf.get_collection("losses"), name="total_loss")

#计算准确率
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.arg_max(input_y_onehot, 1),
                                  tf.arg_max(h_soft, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype=tf.float32))

with tf.name_scope("train"):
    opt_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
tf.train.start_queue_runners(sess=sess)

#绘制图表
Beispiel #51
0
def compute_weight_decay(vars):
    return tf.add_n([tf.nn.l2_loss(v) for v in vars])
Beispiel #52
0
def regularize_cost(regex, func, name='regularize_cost'):
    """
    Apply a regularizer on trainable variables matching the regex, and print
    the matched variables (only print once in multi-tower training).
    In replicated mode, it will only regularize variables within the current tower.

    Args:
        regex (str): a regex to match variable names, e.g. "conv.*/W"
        func: the regularization function, which takes a tensor and returns a scalar tensor.
            E.g., ``tf.contrib.layers.l2_regularizer``.

    Returns:
        tf.Tensor: the total regularization cost.

    Example:
        .. code-block:: python

            cost = cost + regularize_cost("fc.*/W", l2_regularizer(1e-5))
    """
    ctx = get_current_tower_context()
    if not ctx.is_training:
        # Currently cannot build the wd_cost correctly at inference,
        # because ths vs_name used in inference can be '', therefore the
        # variable filter will fail
        return tf.constant(0, dtype=tf.float32, name='empty_' + name)
    params = tf.trainable_variables()

    # If vars are shared, use all of them
    # If vars are replicated, only regularize those in the current tower
    params = ctx.filter_vars_by_vs_name(params)

    G = tf.get_default_graph()

    to_regularize = []

    with tf.name_scope('regularize_cost'):
        costs = []
        for p in params:
            para_name = p.op.name
            if re.search(regex, para_name):
                with G.colocate_with(p):
                    costs.append(func(p))
                to_regularize.append(p.name)
        if not costs:
            return tf.constant(0, dtype=tf.float32, name='empty_' + name)

    # remove tower prefix from names, and print
    if len(ctx.vs_name):
        prefix = ctx.vs_name + '/'
        prefixlen = len(prefix)

        def f(name):
            if name.startswith(prefix):
                return name[prefixlen:]
            return name

        to_regularize = list(map(f, to_regularize))
    to_print = ', '.join(to_regularize)
    _log_regularizer(to_print)

    return tf.add_n(costs, name=name)
def ssd_losses_old(logits,
                   localisations,
                   gclasses,
                   glocalisations,
                   gscores,
                   match_threshold=0.5,
                   negative_ratio=3.,
                   alpha=1.,
                   label_smoothing=0.,
                   device='/cpu:0',
                   scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.device(device):
        with tf.name_scope(scope, 'ssd_losses'):
            l_cross_pos = []
            l_cross_neg = []
            l_loc = []
            for i in range(len(logits)):
                dtype = logits[i].dtype
                with tf.name_scope('block_%i' % i):
                    # Sizing weight...
                    wsize = tfe.get_shape(logits[i], rank=5)
                    wsize = wsize[1] * wsize[2] * wsize[3]

                    # Positive mask.
                    pmask = gscores[i] > match_threshold
                    fpmask = tf.cast(pmask, dtype)
                    n_positives = tf.reduce_sum(fpmask)

                    # Select some random negative entries.
                    # n_entries = np.prod(gclasses[i].get_shape().as_list())
                    # r_positive = n_positives / n_entries
                    # r_negative = negative_ratio * n_positives / (n_entries - n_positives)

                    # Negative mask.
                    no_classes = tf.cast(pmask, tf.int32)
                    predictions = slim.softmax(logits[i])
                    nmask = tf.logical_and(tf.logical_not(pmask),
                                           gscores[i] > -0.5)
                    fnmask = tf.cast(nmask, dtype)
                    nvalues = tf.where(nmask, predictions[:, :, :, :, 0],
                                       1. - fnmask)
                    nvalues_flat = tf.reshape(nvalues, [-1])
                    # Number of negative entries to select.
                    n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                    n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                    n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                    max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask),
                                                  tf.int32)
                    n_neg = tf.minimum(n_neg, max_neg_entries)

                    val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                    max_hard_pred = -val[-1]
                    # Final negative mask.
                    nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
                    fnmask = tf.cast(nmask, dtype)

                    # Add cross-entropy loss.
                    with tf.name_scope('cross_entropy_pos'):
                        fpmask = wsize * fpmask
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logits[i], labels=gclasses[i])
                        loss = tf.losses.compute_weighted_loss(loss, fpmask)
                        l_cross_pos.append(loss)

                    with tf.name_scope('cross_entropy_neg'):
                        fnmask = wsize * fnmask
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logits[i], labels=no_classes)
                        loss = tf.losses.compute_weighted_loss(loss, fnmask)
                        l_cross_neg.append(loss)

                    # Add localization loss: smooth L1, L2, ...
                    with tf.name_scope('localization'):
                        # Weights Tensor: positive mask + random negative.
                        weights = tf.expand_dims(alpha * fpmask, axis=-1)
                        loss = custom_layers.abs_smooth(localisations[i] -
                                                        glocalisations[i])
                        loss = tf.losses.compute_weighted_loss(loss, weights)
                        l_loc.append(loss)

            # Additional total losses...
            with tf.name_scope('total'):
                total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
                total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
                total_cross = tf.add(total_cross_pos, total_cross_neg,
                                     'cross_entropy')
                total_loc = tf.add_n(l_loc, 'localization')

                # Add to EXTRA LOSSES TF.collection
                tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
                tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
                tf.add_to_collection('EXTRA_LOSSES', total_cross)
                tf.add_to_collection('EXTRA_LOSSES', total_loc)
def train():

    with tf.Graph().as_default(), tf.device('/cpu:0'):

        num_gpu = len(cfgs.GPU_GROUP.strip().split(','))
        global_step = slim.get_or_create_global_step()
        lr = warmup_lr(cfgs.LR, global_step, cfgs.WARM_SETP, num_gpu)
        # lr = warmup_and_cosine_lr(cfgs.LR, global_step, cfgs.WARM_SETP, cfgs.MAX_ITERATION, num_gpu)
        tf.summary.scalar('lr', lr)

        optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM)
        retinanet = build_whole_network_r3det_csl.DetectionNetwork(
            base_network_name=cfgs.NET_NAME, is_training=True)

        with tf.name_scope('get_batch'):

            if cfgs.IMAGE_PYRAMID:
                shortside_len_list = tf.constant(cfgs.IMG_SHORT_SIDE_LEN)
                shortside_len = tf.random_shuffle(shortside_len_list)[0]

            else:
                shortside_len = cfgs.IMG_SHORT_SIDE_LEN

            img_name_batch, img_batch, gtboxes_and_label_batch, num_objects_batch, img_h_batch, img_w_batch = \
                next_batch(dataset_name=cfgs.DATASET_NAME,
                           batch_size=cfgs.BATCH_SIZE * num_gpu,
                           shortside_len=shortside_len,
                           is_training=True)

        # data processing
        inputs_list = []
        for i in range(num_gpu):
            img = tf.expand_dims(img_batch[i], axis=0)
            if cfgs.NET_NAME in [
                    'resnet152_v1d', 'resnet101_v1d', 'resnet50_v1d'
            ]:
                img = img / tf.constant([cfgs.PIXEL_STD])

            gtboxes_and_label_r = tf.py_func(backward_convert,
                                             inp=[gtboxes_and_label_batch[i]],
                                             Tout=tf.float32)
            gtboxes_and_label_r = tf.reshape(gtboxes_and_label_r, [-1, 6])

            gtboxes_and_label_h = get_horizen_minAreaRectangle(
                gtboxes_and_label_batch[i])
            gtboxes_and_label_h = tf.reshape(gtboxes_and_label_h, [-1, 5])

            num_objects = num_objects_batch[i]
            num_objects = tf.cast(tf.reshape(num_objects, [
                -1,
            ]), tf.float32)

            img_h = img_h_batch[i]
            img_w = img_w_batch[i]

            inputs_list.append([
                img, gtboxes_and_label_h, gtboxes_and_label_r, num_objects,
                img_h, img_w
            ])

        tower_grads = []
        biases_regularizer = tf.no_regularizer
        weights_regularizer = tf.contrib.layers.l2_regularizer(
            cfgs.WEIGHT_DECAY)

        total_loss_dict = {
            'cls_loss': tf.constant(0., tf.float32),
            'reg_loss': tf.constant(0., tf.float32),
            'refine_cls_loss': tf.constant(0., tf.float32),
            'refine_reg_loss': tf.constant(0., tf.float32),
            'angle_cls_loss': tf.constant(0., tf.float32),
            'total_losses': tf.constant(0., tf.float32),
        }

        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpu):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('tower_%d' % i):
                        with slim.arg_scope(
                            [slim.model_variable, slim.variable],
                                device='/device:CPU:0'):
                            with slim.arg_scope(
                                [
                                    slim.conv2d, slim.conv2d_in_plane,
                                    slim.conv2d_transpose,
                                    slim.separable_conv2d, slim.fully_connected
                                ],
                                    weights_regularizer=weights_regularizer,
                                    biases_regularizer=biases_regularizer,
                                    biases_initializer=tf.constant_initializer(
                                        0.0)):

                                gtboxes_and_label_h, gtboxes_and_label_r = tf.py_func(
                                    get_gtboxes_and_label,
                                    inp=[
                                        inputs_list[i][1], inputs_list[i][2],
                                        inputs_list[i][3]
                                    ],
                                    Tout=[tf.float32, tf.float32])
                                gtboxes_and_label_h = tf.reshape(
                                    gtboxes_and_label_h, [-1, 5])
                                gtboxes_and_label_r = tf.reshape(
                                    gtboxes_and_label_r, [-1, 6])

                                if cfgs.ANGLE_RANGE == 180:
                                    gtboxes_and_label_r_ = tf.py_func(
                                        coordinate_present_convert,
                                        inp=[gtboxes_and_label_r, -1],
                                        Tout=tf.float32)
                                    gtboxes_and_label_r_ = tf.reshape(
                                        gtboxes_and_label_r_, [-1, 6])

                                    gt_smooth_label = tf.py_func(
                                        angle_smooth_label,
                                        inp=[
                                            gtboxes_and_label_r_[:, -2],
                                            cfgs.ANGLE_RANGE, cfgs.LABEL_TYPE,
                                            cfgs.RADUIUS, cfgs.OMEGA
                                        ],
                                        Tout=tf.float32)
                                else:
                                    gt_smooth_label = tf.py_func(
                                        angle_smooth_label,
                                        inp=[
                                            gtboxes_and_label_r[:, -2],
                                            cfgs.ANGLE_RANGE, cfgs.LABEL_TYPE,
                                            cfgs.RADUIUS, cfgs.OMEGA
                                        ],
                                        Tout=tf.float32)

                                gt_smooth_label = tf.reshape(
                                    gt_smooth_label,
                                    [-1, cfgs.ANGLE_RANGE // cfgs.OMEGA])

                                img = inputs_list[i][0]
                                img_shape = inputs_list[i][-2:]
                                img = tf.image.crop_to_bounding_box(
                                    image=img,
                                    offset_height=0,
                                    offset_width=0,
                                    target_height=tf.cast(
                                        img_shape[0], tf.int32),
                                    target_width=tf.cast(
                                        img_shape[1], tf.int32))

                                outputs = retinanet.build_whole_detection_network(
                                    input_img_batch=img,
                                    gtboxes_batch_h=gtboxes_and_label_h,
                                    gtboxes_batch_r=gtboxes_and_label_r,
                                    gt_smooth_label=gt_smooth_label,
                                    gpu_id=i)
                                gtboxes_in_img_h = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_h[:, :-1],
                                    labels=gtboxes_and_label_h[:, -1],
                                    method=0,
                                    is_csl=True)
                                gtboxes_in_img_r = draw_boxes_with_categories(
                                    img_batch=img,
                                    boxes=gtboxes_and_label_r[:, :-1],
                                    labels=gtboxes_and_label_r[:, -1],
                                    method=1,
                                    is_csl=True)

                                tf.summary.image(
                                    'Compare/gtboxes_h_gpu:%d' % i,
                                    gtboxes_in_img_h)
                                tf.summary.image(
                                    'Compare/gtboxes_r_gpu:%d' % i,
                                    gtboxes_in_img_r)

                                if cfgs.ADD_BOX_IN_TENSORBOARD:
                                    detections_in_img = draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[0],
                                        scores=outputs[1],
                                        labels=outputs[2],
                                        method=1,
                                        is_csl=True)
                                    tf.summary.image(
                                        'Compare/final_detection_gpu:%d' % i,
                                        detections_in_img)

                                    detections_angle_in_img = draw_boxes_with_categories_and_scores(
                                        img_batch=img,
                                        boxes=outputs[3],
                                        scores=outputs[1],
                                        labels=outputs[2],
                                        method=1,
                                        is_csl=True)
                                    tf.summary.image(
                                        'Compare/final_detection_angle_gpu:%d'
                                        % i, detections_angle_in_img)

                                loss_dict = outputs[-1]

                                total_losses = 0.0
                                for k in loss_dict.keys():
                                    total_losses += loss_dict[k]
                                    total_loss_dict[
                                        k] += loss_dict[k] / num_gpu

                                total_losses = total_losses / num_gpu
                                total_loss_dict['total_losses'] += total_losses

                                if i == num_gpu - 1:
                                    regularization_losses = tf.get_collection(
                                        tf.GraphKeys.REGULARIZATION_LOSSES)
                                    # weight_decay_loss = tf.add_n(slim.losses.get_regularization_losses())
                                    total_losses = total_losses + tf.add_n(
                                        regularization_losses)

                        tf.get_variable_scope().reuse_variables()
                        grads = optimizer.compute_gradients(total_losses)
                        if cfgs.GRADIENT_CLIPPING_BY_NORM is not None:
                            grads = slim.learning.clip_gradient_norms(
                                grads, cfgs.GRADIENT_CLIPPING_BY_NORM)
                        tower_grads.append(grads)

        for k in total_loss_dict.keys():
            tf.summary.scalar('{}/{}'.format(k.split('_')[0], k),
                              total_loss_dict[k])

        if len(tower_grads) > 1:
            grads = sum_gradients(tower_grads)
        else:
            grads = tower_grads[0]

        if cfgs.MUTILPY_BIAS_GRADIENT is not None:
            final_gvs = []
            with tf.variable_scope('Gradient_Mult'):
                for grad, var in grads:
                    scale = 1.
                    if '/biases:' in var.name:
                        scale *= cfgs.MUTILPY_BIAS_GRADIENT
                    if 'conv_new' in var.name:
                        scale *= 3.
                    if not np.allclose(scale, 1.0):
                        grad = tf.multiply(grad, scale)

                    final_gvs.append((grad, var))
            apply_gradient_op = optimizer.apply_gradients(
                final_gvs, global_step=global_step)
        else:
            apply_gradient_op = optimizer.apply_gradients(
                grads, global_step=global_step)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.9999, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())

        train_op = tf.group(apply_gradient_op, variables_averages_op)
        # train_op = optimizer.apply_gradients(final_gvs, global_step=global_step)
        summary_op = tf.summary.merge_all()

        restorer, restore_ckpt = retinanet.get_restorer()
        saver = tf.train.Saver(max_to_keep=5)

        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        tfconfig = tf.ConfigProto(allow_soft_placement=True,
                                  log_device_placement=False)
        tfconfig.gpu_options.allow_growth = True
        with tf.Session(config=tfconfig) as sess:
            sess.run(init_op)

            # sess.run(tf.initialize_all_variables())
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            summary_path = os.path.join(cfgs.SUMMARY_PATH, cfgs.VERSION)
            tools.mkdir(summary_path)
            summary_writer = tf.summary.FileWriter(summary_path,
                                                   graph=sess.graph)

            if not restorer is None:
                restorer.restore(sess, restore_ckpt)
                print('restore model')

            for step in range(cfgs.MAX_ITERATION // num_gpu):
                training_time = time.strftime('%Y-%m-%d %H:%M:%S',
                                              time.localtime(time.time()))

                if step % cfgs.SHOW_TRAIN_INFO_INTE != 0 and step % cfgs.SMRY_ITER != 0:
                    _, global_stepnp = sess.run([train_op, global_step])

                else:
                    if step % cfgs.SHOW_TRAIN_INFO_INTE == 0 and step % cfgs.SMRY_ITER != 0:
                        start = time.time()

                        _, global_stepnp, total_loss_dict_ = \
                            sess.run([train_op, global_step, total_loss_dict])

                        end = time.time()

                        print('***' * 20)
                        print("""%s: global_step:%d  current_step:%d""" %
                              (training_time,
                               (global_stepnp - 1) * num_gpu, step * num_gpu))
                        print("""per_cost_time:%.3fs""" %
                              ((end - start) / num_gpu))
                        loss_str = ''
                        for k in total_loss_dict_.keys():
                            loss_str += '%s:%.3f\n' % (k, total_loss_dict_[k])
                        print(loss_str)

                        if np.isnan(total_loss_dict_['total_losses']):
                            sys.exit(0)

                    else:
                        if step % cfgs.SMRY_ITER == 0:
                            _, global_stepnp, summary_str = sess.run(
                                [train_op, global_step, summary_op])
                            summary_writer.add_summary(
                                summary_str, (global_stepnp - 1) * num_gpu)
                            summary_writer.flush()

                if (step > 0 and step % (cfgs.SAVE_WEIGHTS_INTE // num_gpu)
                        == 0) or (step >= cfgs.MAX_ITERATION // num_gpu - 1):

                    save_dir = os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)
                    if not os.path.exists(save_dir):
                        os.mkdir(save_dir)

                    save_ckpt = os.path.join(
                        save_dir, '{}_'.format(cfgs.DATASET_NAME) + str(
                            (global_stepnp - 1) * num_gpu) + 'model.ckpt')
                    saver.save(sess, save_ckpt)
                    print(' weights had been saved')

            coord.request_stop()
            coord.join(threads)
Beispiel #55
0
    def add_loss(self):
        '''Adds loss to the model. Sets "loss" field. initialize must have been called.'''
        with tf.variable_scope('loss') as scope:
            hp = self.hparams

            if hp.mask_decoder:
                # Compute loss of predictions before postnet
                before = MaskedMSE(self.mel_targets,
                                   self.decoder_output,
                                   self.targets_lengths,
                                   hparams=self.hparams)
                # Compute loss after postnet
                after = MaskedMSE(self.mel_targets,
                                  self.mel_outputs,
                                  self.targets_lengths,
                                  hparams=self.hparams)
                # Compute <stop_token> loss (for learning dynamic generation stop)
                stop_token_loss = MaskedSigmoidCrossEntropy(
                    self.stop_token_targets,
                    self.stop_token_prediction,
                    self.targets_lengths,
                    hparams=self.hparams)
            else:
                # Compute loss of predictions before postnet
                before = tf.losses.mean_squared_error(self.mel_targets,
                                                      self.decoder_output)
                # Compute loss after postnet
                after = tf.losses.mean_squared_error(self.mel_targets,
                                                     self.mel_outputs)
                # Compute <stop_token> loss (for learning dynamic generation stop)
                stop_token_loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        labels=self.stop_token_targets,
                        logits=self.stop_token_prediction))

            if hp.predict_linear:
                # Compute linear loss
                # From https://github.com/keithito/tacotron/blob/tacotron2-work-in-progress/models/tacotron.py
                # Prioritize loss for frequencies under 2000 Hz.
                l1 = tf.abs(self.linear_targets - self.linear_outputs)
                n_priority_freq = int(2000 / (hp.sample_rate * 0.5) *
                                      hp.num_mels)
                linear_loss = 0.5 * tf.reduce_mean(l1) + 0.5 * tf.reduce_mean(
                    l1[:, :, 0:n_priority_freq])
            else:
                linear_loss = 0.

            # Compute the regularization weight
            if hp.tacotron_scale_regularization:
                reg_weight_scaler = 1. / (
                    2 * hp.max_abs_value) if hp.symmetric_mels else 1. / (
                        hp.max_abs_value)
                reg_weight = hp.tacotron_reg_weight * reg_weight_scaler
            else:
                reg_weight = hp.tacotron_reg_weight

            # Get all trainable variables
            all_vars = tf.trainable_variables()
            regularization = tf.add_n([
                tf.nn.l2_loss(v)
                for v in all_vars if not ('bias' in v.name or 'Bias' in v.name)
            ]) * reg_weight

            # Compute final loss term
            self.before_loss = before
            self.after_loss = after
            self.stop_token_loss = stop_token_loss
            self.regularization_loss = regularization
            self.linear_loss = linear_loss
            self.loss = self.before_loss + self.after_loss + self.stop_token_loss + self.regularization_loss + self.linear_loss
Beispiel #56
0
def position_sensitive_crop_regions(image,
                                    boxes,
                                    crop_size,
                                    num_spatial_bins,
                                    global_pool):
  """Position-sensitive crop and pool rectangular regions from a feature grid.

  The output crops are split into `spatial_bins_y` vertical bins
  and `spatial_bins_x` horizontal bins. For each intersection of a vertical
  and a horizontal bin the output values are gathered by performing
  `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
  channels of the image. This reduces `depth` by a factor of
  `(spatial_bins_y * spatial_bins_x)`.

  When global_pool is True, this function implements a differentiable version
  of position-sensitive RoI pooling used in
  [R-FCN detection system](https://arxiv.org/abs/1605.06409).

  When global_pool is False, this function implements a differentiable version
  of position-sensitive assembling operation used in
  [instance FCN](https://arxiv.org/abs/1603.08678).

  Args:
    image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
      `int16`, `int32`, `int64`, `half`, `float32`, `float64`.
      A 3-D tensor of shape `[image_height, image_width, depth]`.
      Both `image_height` and `image_width` need to be positive.
    boxes: A `Tensor` of type `float32`.
      A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in
      normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value
      of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so
      as the `[0, 1]` interval of normalized image height is mapped to
      `[0, image_height - 1] in image height coordinates. We do allow y1 > y2,
      in which case the sampled crop is an up-down flipped version of the
      original image. The width dimension is treated similarly.
    crop_size: A list of two integers `[crop_height, crop_width]`. All
      cropped image patches are resized to this size. The aspect ratio of the
      image content is not preserved. Both `crop_height` and `crop_width` need
      to be positive.
    num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
      Represents the number of position-sensitive bins in y and x directions.
      Both values should be >= 1. `crop_height` should be divisible by
      `spatial_bins_y`, and similarly for width.
      The number of image channels should be divisible by
      (spatial_bins_y * spatial_bins_x).
      Suggested value from R-FCN paper: [3, 3].
    global_pool: A boolean variable.
      If True, we perform average global pooling on the features assembled from
        the position-sensitive score maps.
      If False, we keep the position-pooled features without global pooling
        over the spatial coordinates.
      Note that using global_pool=True is equivalent to but more efficient than
        running the function with global_pool=False and then performing global
        average pooling.

  Returns:
    position_sensitive_features: A 4-D tensor of shape
      `[num_boxes, K, K, crop_channels]`,
      where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
      where K = 1 when global_pool is True (Average-pooled cropped regions),
      and K = crop_size when global_pool is False.
  Raises:
    ValueError: Raised in four situations:
      `num_spatial_bins` is not >= 1;
      `num_spatial_bins` does not divide `crop_size`;
      `(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
      `bin_crop_size` is not square when global_pool=False due to the
        constraint in function space_to_depth.
  """
  total_bins = 1
  bin_crop_size = []

  for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size):
    if num_bins < 1:
      raise ValueError('num_spatial_bins should be >= 1')

    if crop_dim % num_bins != 0:
      raise ValueError('crop_size should be divisible by num_spatial_bins')

    total_bins *= num_bins
    bin_crop_size.append(crop_dim // num_bins)

  if not global_pool and bin_crop_size[0] != bin_crop_size[1]:
    raise ValueError('Only support square bin crop size for now.')

  ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1)
  spatial_bins_y, spatial_bins_x = num_spatial_bins

  # Split each box into spatial_bins_y * spatial_bins_x bins.
  position_sensitive_boxes = []
  for bin_y in range(spatial_bins_y):
    step_y = (ymax - ymin) / spatial_bins_y
    for bin_x in range(spatial_bins_x):
      step_x = (xmax - xmin) / spatial_bins_x
      box_coordinates = [ymin + bin_y * step_y,
                         xmin + bin_x * step_x,
                         ymin + (bin_y + 1) * step_y,
                         xmin + (bin_x + 1) * step_x,
                        ]
      position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1))

  image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2)

  image_crops = []
  for (split, box) in zip(image_splits, position_sensitive_boxes):
    if split.shape.is_fully_defined() and box.shape.is_fully_defined():
      crop = tf.squeeze(
          matmul_crop_and_resize(
              tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0),
              bin_crop_size),
          axis=0)
    else:
      crop = tf.image.crop_and_resize(
          tf.expand_dims(split, 0), box,
          tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size)
    image_crops.append(crop)

  if global_pool:
    # Average over all bins.
    position_sensitive_features = tf.add_n(image_crops) / len(image_crops)
    # Then average over spatial positions within the bins.
    position_sensitive_features = tf.reduce_mean(
        position_sensitive_features, [1, 2], keep_dims=True)
  else:
    # Reorder height/width to depth channel.
    block_size = bin_crop_size[0]
    if block_size >= 2:
      image_crops = [tf.space_to_depth(
          crop, block_size=block_size) for crop in image_crops]

    # Pack image_crops so that first dimension is for position-senstive boxes.
    position_sensitive_features = tf.stack(image_crops, axis=0)

    # Unroll the position-sensitive boxes to spatial positions.
    position_sensitive_features = tf.squeeze(
        tf.batch_to_space_nd(position_sensitive_features,
                             block_shape=[1] + num_spatial_bins,
                             crops=tf.zeros((3, 2), dtype=tf.int32)),
        squeeze_dims=[0])

    # Reorder back the depth channel.
    if block_size >= 2:
      position_sensitive_features = tf.depth_to_space(
          position_sensitive_features, block_size=block_size)

  return position_sensitive_features
Beispiel #57
0
def resnet_model_fn(features, labels, mode, params):
    """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images. If transpose_input is enabled, it
        is transposed to device layout and reshaped to 1D tensor.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
        `params['batch_size']` is always provided and should be used as the
        effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
    if isinstance(features, dict):
        features = features['feature']

    # In most cases, the default data format NCHW instead of NHWC should be
    # used for a significant performance boost on GPU/TPU. NHWC should be used
    # only if the network needs to be run on CPU since the pooling operations
    # are only supported on NHWC.
    if params['data_format'] == 'channels_first':
        assert not params['transpose_input']  # channels_first only for GPU
        features = tf.transpose(features, [0, 3, 1, 2])

    if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
        image_size = tf.sqrt(tf.shape(features)[0] / (3 * tf.shape(labels)[0]))
        features = tf.reshape(features, [image_size, image_size, 3, -1])
        features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

    # Normalize the image to zero mean and unit variance.
    features -= tf.constant(MEAN_RGB, shape=[1, 1, 3], dtype=features.dtype)
    features /= tf.constant(STDDEV_RGB, shape=[1, 1, 3], dtype=features.dtype)

    # DropBlock keep_prob for the 4 block groups of ResNet architecture.
    # None means applying no DropBlock at the corresponding block group.
    dropblock_keep_probs = [None] * 4
    if params['dropblock_groups']:
        # Scheduled keep_prob for DropBlock.
        train_steps = tf.cast(params['train_steps'], tf.float32)
        current_step = tf.cast(tf.train.get_global_step(), tf.float32)
        current_ratio = current_step / train_steps
        dropblock_keep_prob = (1 - current_ratio *
                               (1 - params['dropblock_keep_prob']))

        # Computes DropBlock keep_prob for different block groups of ResNet.
        dropblock_groups = [
            int(x) for x in params['dropblock_groups'].split(',')
        ]
        for block_group in dropblock_groups:
            if block_group < 1 or block_group > 4:
                raise ValueError(
                    'dropblock_groups should be a comma separated list of integers '
                    'between 1 and 4 (dropblcok_groups: {}).'.format(
                        params['dropblock_groups']))
            dropblock_keep_probs[block_group - 1] = 1 - (
                (1 - dropblock_keep_prob) / 4.0**(4 - block_group))

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        network = resnet_model.resnet_v1(
            resnet_depth=params['resnet_depth'],
            num_classes=params['num_label_classes'],
            dropblock_size=params['dropblock_size'],
            dropblock_keep_probs=dropblock_keep_probs,
            data_format=params['data_format'])
        return network(inputs=features,
                       is_training=(mode == tf.estimator.ModeKeys.TRAIN))

    if params['precision'] == 'bfloat16':
        with tf.contrib.tpu.bfloat16_scope():
            logits = build_network()
        logits = tf.cast(logits, tf.float32)
    elif params['precision'] == 'float32':
        logits = build_network()

    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'classes': tf.argmax(logits, axis=1),
            'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
        }
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    # If necessary, in the model_fn, use params['batch_size'] instead the batch
    # size flags (--train_batch_size or --eval_batch_size).
    batch_size = params['batch_size']  # pylint: disable=unused-variable

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    one_hot_labels = tf.one_hot(labels, params['num_label_classes'])
    cross_entropy = tf.losses.softmax_cross_entropy(
        logits=logits,
        onehot_labels=one_hot_labels,
        label_smoothing=params['label_smoothing'])

    # Add weight decay to the loss for non-batch-normalization variables.
    loss = cross_entropy + params['weight_decay'] * tf.add_n([
        tf.nn.l2_loss(v) for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

    host_call = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        # Compute the current epoch and associated learning rate from global_step.
        global_step = tf.train.get_global_step()
        steps_per_epoch = params['num_train_images'] / params[
            'train_batch_size']
        current_epoch = (tf.cast(global_step, tf.float32) / steps_per_epoch)
        # LARS is a large batch optimizer. LARS enables higher accuracy at batch 16K
        # and larger batch sizes.
        if params['enable_lars']:
            learning_rate = 0.0
            optimizer = lars_util.init_lars_optimizer(current_epoch, params)
        else:
            learning_rate = lottery.get_lr_tensor(params)
            if learning_rate is None:
                learning_rate = learning_rate_schedule(params, current_epoch)
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=params['momentum'],
                                                   use_nesterov=True)
        if params['use_tpu']:
            # When using TPU, wrap the optimizer with CrossShardOptimizer which
            # handles synchronization details between different TPU cores. To the
            # user, this should look like regular synchronous training.
            optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)

        # Batch normalization requires UPDATE_OPS to be added as a dependency to
        # the train operation.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step)

        if not params['skip_host_call']:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                # Host call fns are executed params['iterations_per_loop'] times after
                # one TPU loop is finished, setting max_queue value to the same as
                # number of iterations will make the summary writer only flush the data
                # to storage once per loop.
                with summary.create_file_writer(
                        FLAGS.model_dir,
                        max_queue=params['iterations_per_loop']).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', loss[0], step=gs)
                        summary.scalar('learning_rate', lr[0], step=gs)
                        summary.scalar('current_epoch', ce[0], step=gs)

                        return summary.all_summary_ops()

            # To log the loss, current learning rate, and epoch for Tensorboard, the
            # summary op needs to be run on the host CPU via host_call. host_call
            # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
            # dimension. These Tensors are implicitly concatenated to
            # [params['batch_size']].
            gs_t = tf.reshape(global_step, [1])
            loss_t = tf.reshape(loss, [1])
            lr_t = tf.reshape(learning_rate, [1])
            ce_t = tf.reshape(current_epoch, [1])

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'top_1_accuracy': top_1_accuracy,
                'top_5_accuracy': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                           loss=loss,
                                           train_op=train_op,
                                           host_call=host_call,
                                           eval_metrics=eval_metrics)
    def build(self):

        with self._graph.as_default(), tf.device('/cpu:0'):

            # Create an optimizer that performs gradient descent.
            opt, lr, global_step = self.get_opt()

            ##some global placeholder
            keep_prob = tf.placeholder(tf.float32, name="keep_prob")
            L2_reg = tf.placeholder(tf.float32, name="L2_reg")
            training = tf.placeholder(tf.bool, name="training_flag")

            total_loss_to_show = 0.
            images_place_holder_list = []
            labels_place_holder_list = []
            boxes_place_holder_list = []

            weights_initializer = slim.xavier_initializer()
            biases_initializer = tf.constant_initializer(0.)
            biases_regularizer = tf.no_regularizer
            weights_regularizer = tf.contrib.layers.l2_regularizer(L2_reg)

            # Calculate the gradients for each model tower.
            tower_grads = []
            with tf.variable_scope(tf.get_variable_scope()):
                for i in range(cfg.TRAIN.num_gpu):
                    with tf.device('/gpu:%d' % i):
                        with tf.name_scope('tower_%d' % (i)) as scope:
                            with slim.arg_scope(
                                [slim.model_variable, slim.variable],
                                    device='/cpu:0'):

                                images_ = tf.placeholder(tf.float32,
                                                         [None, None, None, 3],
                                                         name="images")
                                boxes_ = tf.placeholder(
                                    tf.float32,
                                    [cfg.TRAIN.batch_size, None, 4],
                                    name="input_boxes")
                                labels_ = tf.placeholder(
                                    tf.int64, [cfg.TRAIN.batch_size, None],
                                    name="input_labels")
                                ###total anchor

                                images_place_holder_list.append(images_)
                                labels_place_holder_list.append(labels_)
                                boxes_place_holder_list.append(boxes_)

                                with slim.arg_scope([slim.conv2d, slim.conv2d_in_plane, \
                                                     slim.conv2d_transpose, slim.separable_conv2d,
                                                     slim.fully_connected],
                                                    weights_regularizer=weights_regularizer,
                                                    biases_regularizer=biases_regularizer,
                                                    weights_initializer=weights_initializer,
                                                    biases_initializer=biases_initializer):
                                    reg_loss, cla_loss, l2_loss = self.tower_loss(
                                        scope, images_, labels_, boxes_,
                                        L2_reg, training)

                                    ##use muti gpu ,large batch
                                    if i == cfg.TRAIN.num_gpu - 1:
                                        total_loss = tf.add_n(
                                            [reg_loss, cla_loss, l2_loss])
                                    else:
                                        total_loss = tf.add_n(
                                            [reg_loss, cla_loss])
                                total_loss_to_show += total_loss
                                # Reuse variables for the next tower.
                                tf.get_variable_scope().reuse_variables()

                                ##when use batchnorm, updates operations only from the
                                ## final tower. Ideally, we should grab the updates from all towers
                                # but these stats accumulate extremely fast so we can ignore the
                                #  other stats from the other towers without significant detriment.
                                bn_update_ops = tf.get_collection(
                                    tf.GraphKeys.UPDATE_OPS, scope=scope)

                                # Retain the summaries from the final tower.
                                self.summaries = tf.get_collection(
                                    tf.GraphKeys.SUMMARIES, scope)
                                # Calculate the gradients for the batch of data on this CIFAR tower.
                                grads = opt.compute_gradients(total_loss)

                                # Keep track of the gradients across all towers.
                                tower_grads.append(grads)
            # We must calculate the mean of each gradient. Note that this is the
            # synchronization point across all towers.
            grads = self.average_gradients(tower_grads)

            # Add a summary to track the learning rate.
            self.add_summary(tf.summary.scalar('learning_rate', lr))
            self.add_summary(
                tf.summary.scalar('total_loss', total_loss_to_show))
            self.add_summary(tf.summary.scalar('loc_loss', reg_loss))
            self.add_summary(tf.summary.scalar('cla_loss', cla_loss))
            self.add_summary(tf.summary.scalar('l2_loss', l2_loss))

            # Add histograms for gradients.
            for grad, var in grads:
                if grad is not None:
                    self.add_summary(
                        tf.summary.histogram(var.op.name + '/gradients', grad))

            # Apply the gradients to adjust the shared variables.
            apply_gradient_op = opt.apply_gradients(grads,
                                                    global_step=global_step)

            # Add histograms for trainable variables.
            for var in tf.trainable_variables():
                self.add_summary(tf.summary.histogram(var.op.name, var))

            if self.ema_weights:
                # Track the moving averages of all trainable variables.
                variable_averages = tf.train.ExponentialMovingAverage(
                    0.9, global_step)
                variables_averages_op = variable_averages.apply(
                    tf.trainable_variables())
                # Group all updates to into a single train op.
                train_op = tf.group(apply_gradient_op, variables_averages_op,
                                    *bn_update_ops)
            else:
                train_op = tf.group(apply_gradient_op, *bn_update_ops)

            ###set inputs and ouputs
            self.inputs = [
                images_place_holder_list, boxes_place_holder_list,
                labels_place_holder_list, keep_prob, L2_reg, training
            ]
            self.outputs = [
                train_op, total_loss_to_show, reg_loss, cla_loss, l2_loss, lr
            ]
            self.val_outputs = [
                total_loss_to_show, reg_loss, cla_loss, l2_loss, lr
            ]

            ##init all variables
            init = tf.global_variables_initializer()
            self.sess.run(init)
Beispiel #59
0
def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            pointclouds_pl, labels_pl = MODEL.placeholder_inputs(
                BATCH_SIZE, NUM_POINT)
            is_training_pl = tf.placeholder(tf.bool, shape=())

            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to helpfully increment the 'batch' parameter
            # for you every time it trains.
            batch = tf.get_variable('batch', [],
                                    initializer=tf.constant_initializer(0),
                                    trainable=False)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss
            pred, end_points = MODEL.get_model(pointclouds_pl,
                                               is_training_pl,
                                               bn_decay=bn_decay,
                                               num_class=NUM_CLASSES)
            MODEL.get_loss(pred, labels_pl, end_points)
            losses = tf.get_collection('losses')
            total_loss = tf.add_n(losses, name='total_loss')
            tf.summary.scalar('total_loss', total_loss)
            for l in losses + [total_loss]:
                tf.summary.scalar(l.op.name, l)

            correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct,
                                             tf.float32)) / float(BATCH_SIZE)
            tf.summary.scalar('accuracy', accuracy)

            print("--- Get training operator")
            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(total_loss, global_step=batch)

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'),
                                            sess.graph)

        # Init variables
        init = tf.global_variables_initializer()
        sess.run(init)
        # saver.restore(sess, os.path.join(LOG_DIR,'model.ckpt'))
        # log_string("Model restored.")

        ops = {
            'pointclouds_pl': pointclouds_pl,
            'labels_pl': labels_pl,
            'is_training_pl': is_training_pl,
            'pred': pred,
            'loss': total_loss,
            'train_op': train_op,
            'merged': merged,
            'step': batch,
            'end_points': end_points
        }

        best_acc = -1
        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()

            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            # if epoch % 10 == 0:
            save_path = saver.save(sess, os.path.join(LOG_DIR, "model.ckpt"))
            log_string("Model saved in file: %s" % save_path)
            # hidden layers
            h, E0, E1 = layer(args.layer_type, (h, E0, E1, alpha_val), 64, training, args, activation=tf.nn.elu)

            # classification layer
            logits,_,_ = layer(args.layer_type, (h, E0, E1, alpha_val), nC, training, args,
                             multi_edge_aggregation='mean')


            Yhat = tf.one_hot(tf.argmax(logits, axis=-1), nC)
            loss_train = utils.calc_loss(Y, logits, idx_train, W=W)
            loss_val = utils.calc_loss(Y, logits, idx_val)
            loss_test = utils.calc_loss(Y, logits, idx_test)

            vars = tf.trainable_variables()
            lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in vars if
                               'bias' not in v.name and
                               'gamma' not in v.name]) * args.weight_decay
            optimizer = tf.train.AdamOptimizer(learning_rate=args.lr)
            train_op = optimizer.minimize(loss_train + lossL2)

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())

            # ************************************************************
            # training
            # ************************************************************
            # ckpt_dir = Path('./ckpt')
            # ckpt_dir.mkdir(parents=True, exist_ok=True)
            # ckpt_path = ckpt_dir/'checkpoint.ckpt'
            # print('ckpt_path=', ckpt_path)