Example #1
0
def _prng_key(i, key):
  """Generate a key for PRNG for counter value i and key value key."""
  return tf.stack([tf.cast(key, dtype=tf.int32),
                   tf.cast(i, dtype=tf.int32)],
                  axis=0)
Example #2
0
def clipoutNeg(vec, threshold=1e-6):
    mask = tf.cast(vec > threshold, tf.float32)
    return mask * vec
Example #3
0
def detection_loss(cls_outputs, box_outputs, labels, params):
  """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in
      [batch_size, height, width, num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.
  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
  # Sum all positives in a batch for normalization and avoid zero
  # num_positives_sum, which would lead to inf loss during training
  num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
  levels = cls_outputs.keys()

  cls_losses = []
  box_losses = []
  box_iou_losses = []
  for level in levels:
    if params['data_format'] == 'channels_first':
      labels['cls_targets_%d' % level] = tf.transpose(
          labels['cls_targets_%d' % level], [0, 3, 1, 2])
      labels['box_targets_%d' % level] = tf.transpose(
          labels['box_targets_%d' % level], [0, 3, 1, 2])
    # Onehot encoding for classification labels.
    cls_targets_at_level = tf.one_hot(
        labels['cls_targets_%d' % level],
        params['num_classes'])
    if params['data_format'] == 'channels_first':
      bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list()
      cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                        [bs, -1, width, height])
    else:
      bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list()
      cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                        [bs, width, height, -1])
    box_targets_at_level = labels['box_targets_%d' % level]
    cls_loss = _classification_loss(
        cls_outputs[level],
        cls_targets_at_level,
        num_positives_sum,
        alpha=params['alpha'],
        gamma=params['gamma'])
    if params['data_format'] == 'channels_first':
      cls_loss = tf.reshape(cls_loss,
                            [bs, -1, width, height, params['num_classes']])
    else:
      cls_loss = tf.reshape(cls_loss,
                            [bs, width, height, -1, params['num_classes']])
    cls_loss *= tf.cast(tf.expand_dims(
        tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32)
    cls_losses.append(tf.reduce_sum(cls_loss))
    box_losses.append(
        _box_loss(
            box_outputs[level],
            box_targets_at_level,
            num_positives_sum,
            delta=params['delta']))
    if params['iou_loss_type']:
      box_iou_losses.append(
          _box_iou_loss(box_outputs[level], box_targets_at_level,
                        num_positives_sum, params['iou_loss_type']))

  # Sum per level losses to total loss.
  cls_loss = tf.add_n(cls_losses)
  box_loss = tf.add_n(box_losses)
  box_iou_loss = tf.add_n(box_iou_losses) if box_iou_losses else 0.0
  total_loss = (
      cls_loss + params['box_loss_weight'] * box_loss +
      params['iou_loss_weight'] * box_iou_loss)
  return total_loss, cls_loss, box_loss, box_iou_loss
Example #4
0
def normalize_image(image):
    """Rescales image from range [0, 255] to [-1, 1]."""
    return (tf.cast(image, tf.float32) - 127.5) / 127.5
    def step_fn(self, params, model):
        """Separate implementation."""
        train_batch_size = params.train_batch_size
        num_replicas = params.num_replicas
        uda_data = params.uda_data
        batch_size = train_batch_size // num_replicas

        dtypes = [
            tf.bfloat16 if params.use_bfloat16 else tf.float32, tf.float32,
            tf.bfloat16 if params.use_bfloat16 else tf.float32,
            tf.bfloat16 if params.use_bfloat16 else tf.float32
        ]
        shapes = [[batch_size, params.image_size, params.image_size, 3],
                  [batch_size, params.num_classes],
                  [
                      batch_size * params.uda_data, params.image_size,
                      params.image_size, 3
                  ],
                  [
                      batch_size * params.uda_data, params.image_size,
                      params.image_size, 3
                  ]]

        if params.use_xla_sharding and params.num_cores_per_replica > 1:
            q = tpu_feed._PartitionedInfeedQueue(
                number_of_tuple_elements=4,
                host_id=0,
                input_partition_dims=[
                    [1, 1, params.num_cores_per_replica, 1],
                    [1, 1],
                    [1, 1, params.num_cores_per_replica, 1],
                    [1, 1, params.num_cores_per_replica, 1],
                ],
                device_assignment=params.device_assignment)
            q.set_tuple_types(dtypes)
            q.set_tuple_shapes(shapes)
            l_images, l_labels, u_images_ori, u_images_aug = q.generate_dequeue_op(
            )
            l_images = xla_sharding.split(l_images, 2,
                                          params.num_cores_per_replica)
            u_images_ori = xla_sharding.split(u_images_ori, 2,
                                              params.num_cores_per_replica)
            u_images_aug = xla_sharding.split(u_images_aug, 2,
                                              params.num_cores_per_replica)
        else:
            with tf.device(tf.tpu.core(0)):
                (l_images, l_labels, u_images_ori,
                 u_images_aug) = tf.raw_ops.InfeedDequeueTuple(dtypes=dtypes,
                                                               shapes=shapes)
        global_step = tf.train.get_or_create_global_step()
        num_replicas = tf.cast(params.num_replicas, tf.float32)

        all_images = tf.concat([l_images, u_images_ori, u_images_aug], axis=0)

        # all calls to teacher
        with tf.variable_scope('teacher', reuse=tf.AUTO_REUSE):
            logits, labels, masks, cross_entropy = UDA.build_uda_cross_entropy(
                params, model, all_images, l_labels)

        # 1st call to student
        with tf.variable_scope(MODEL_SCOPE):
            u_aug_and_l_images = tf.concat([u_images_aug, l_images], axis=0)
            logits['s_on_u_aug_and_l'] = model(u_aug_and_l_images,
                                               training=True)
            logits['s_on_u'], logits['s_on_l_old'] = tf.split(
                logits['s_on_u_aug_and_l'],
                [u_images_aug.shape[0].value, l_images.shape[0].value],
                axis=0)

        # for backprop
        cross_entropy['s_on_u'] = tf.losses.softmax_cross_entropy(
            onehot_labels=tf.stop_gradient(tf.nn.softmax(logits['u_aug'], -1)),
            logits=logits['s_on_u'],
            label_smoothing=params.label_smoothing,
            reduction=tf.losses.Reduction.NONE)
        cross_entropy['s_on_u'] = tf.reduce_sum(
            cross_entropy['s_on_u']) / float(train_batch_size * uda_data)

        # for Taylor
        cross_entropy['s_on_l_old'] = tf.losses.softmax_cross_entropy(
            onehot_labels=labels['l'],
            logits=logits['s_on_l_old'],
            reduction=tf.losses.Reduction.SUM)
        cross_entropy['s_on_l_old'] = tf.tpu.cross_replica_sum(
            cross_entropy['s_on_l_old']) / float(train_batch_size)
        shadow = tf.get_variable(name='cross_entropy_old',
                                 shape=[],
                                 trainable=False,
                                 dtype=tf.float32)
        shadow_update = tf.assign(shadow, cross_entropy['s_on_l_old'])

        w_s = {}
        g_s = {}
        g_n = {}
        lr = {}
        optim = {}
        w_s['s'] = [
            w for w in tf.trainable_variables()
            if w.name.lower().startswith(MODEL_SCOPE)
        ]
        g_s['s_on_u'] = tf.gradients(cross_entropy['s_on_u'], w_s['s'])
        # g_s['s_on_u'] = [tf.tpu.cross_replica_sum(g) for g in g_s['s_on_u']]

        lr['s'] = common_utils.get_learning_rate(
            params,
            initial_lr=params.mpl_student_lr,
            num_warmup_steps=params.mpl_student_lr_warmup_steps,
            num_wait_steps=params.mpl_student_lr_wait_steps)
        lr['s'], optim['s'] = common_utils.get_optimizer(params,
                                                         learning_rate=lr['s'])
        optim['s']._create_slots(w_s['s'])  # pylint: disable=protected-access
        update_ops = [
            op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            if op.name.startswith(f'train/{MODEL_SCOPE}/')
        ]

        with tf.control_dependencies(update_ops + [shadow_update]):
            g_s['s_on_u'] = common_utils.add_weight_decay(
                params, w_s['s'], g_s['s_on_u'])
            g_s['s_on_u'], g_n['s_on_u'] = tf.clip_by_global_norm(
                g_s['s_on_u'], params.grad_bound)
            train_op = optim['s'].apply_gradients(zip(g_s['s_on_u'], w_s['s']))

            with tf.control_dependencies([train_op]):
                ema_train_op = common_utils.setup_ema(
                    params, name_scope=f'{MODEL_SCOPE}/{model.name}')

        # 2nd call to student
        with tf.control_dependencies([ema_train_op]):
            with tf.variable_scope(MODEL_SCOPE, reuse=tf.AUTO_REUSE):
                logits['s_on_l_new'] = model(l_images, training=True)

        cross_entropy['s_on_l_new'] = tf.losses.softmax_cross_entropy(
            onehot_labels=labels['l'],
            logits=logits['s_on_l_new'],
            reduction=tf.losses.Reduction.SUM)
        cross_entropy['s_on_l_new'] = tf.tpu.cross_replica_sum(
            cross_entropy['s_on_l_new']) / float(train_batch_size)

        dot_product = cross_entropy['s_on_l_new'] - shadow
        # dot_product = tf.clip_by_value(
        #     dot_product,
        #     clip_value_min=-params.mpl_dot_product_bound,
        #     clip_value_max=params.mpl_dot_product_bound)
        moving_dot_product = tf.get_variable('moving_dot_product',
                                             shape=[],
                                             trainable=False,
                                             dtype=tf.float32)
        moving_dot_product_update = tf.assign_sub(
            moving_dot_product, 0.01 * (moving_dot_product - dot_product))
        with tf.control_dependencies([moving_dot_product_update]):
            dot_product = dot_product - moving_dot_product
            dot_product = tf.stop_gradient(dot_product)
        cross_entropy['mpl'] = tf.losses.softmax_cross_entropy(
            onehot_labels=tf.stop_gradient(
                tf.nn.softmax(logits['u_aug'], axis=-1)),
            logits=logits['u_aug'],
            reduction=tf.losses.Reduction.NONE)
        cross_entropy['mpl'] = tf.reduce_sum(cross_entropy['mpl']) / float(
            train_batch_size * uda_data)

        # teacher train op
        uda_weight = params.uda_weight * tf.minimum(
            1.,
            tf.cast(global_step, tf.float32) / float(params.uda_steps))
        teacher_loss = (cross_entropy['u'] * uda_weight + cross_entropy['l'] +
                        cross_entropy['mpl'] * dot_product)
        w_s['t'] = [w for w in tf.trainable_variables() if 'teacher' in w.name]
        g_s['t'] = tf.gradients(teacher_loss, w_s['t'])
        g_s['t'] = common_utils.add_weight_decay(params, w_s['t'], g_s['t'])
        g_s['t'], g_n['t'] = tf.clip_by_global_norm(g_s['t'],
                                                    params.grad_bound)
        lr['t'] = common_utils.get_learning_rate(
            params,
            initial_lr=params.mpl_teacher_lr,
            num_warmup_steps=params.mpl_teacher_lr_warmup_steps)
        lr['t'], optim['t'] = common_utils.get_optimizer(params,
                                                         learning_rate=lr['t'])

        teacher_train_op = optim['t'].apply_gradients(zip(g_s['t'], w_s['t']),
                                                      global_step=global_step)

        with tf.control_dependencies([teacher_train_op]):
            logs = collections.OrderedDict()
            logs['global_step'] = tf.cast(global_step, tf.float32)

            logs['cross_entropy/student_on_u'] = cross_entropy['s_on_u']
            logs['cross_entropy/student_on_l'] = (cross_entropy['s_on_l_new'] /
                                                  num_replicas)
            logs['cross_entropy/teacher_on_u'] = cross_entropy['u']
            logs['cross_entropy/teacher_on_l'] = cross_entropy['l']
            logs['lr/student'] = tf.identity(lr['s']) / num_replicas
            logs['lr/teacher'] = tf.identity(lr['t']) / num_replicas
            logs['mpl/dot_product'] = dot_product / num_replicas
            logs['mpl/moving_dot_product'] = moving_dot_product / num_replicas
            logs['uda/u_ratio'] = tf.reduce_mean(masks['u']) / num_replicas
            logs['uda/l_ratio'] = tf.reduce_mean(masks['l']) / num_replicas
            logs['uda/weight'] = uda_weight / num_replicas

            tensors = [tf.expand_dims(t, axis=0) for t in logs.values()]
            self.step_info = {k: [tf.float32, [1]] for k in logs.keys()}

            def outfeed(tensors):
                with tf.device(tf.tpu.core(params.num_cores_per_replica - 1)):
                    return tf.raw_ops.OutfeedEnqueueTuple(inputs=tensors)

            outfeed_enqueue_op = tf.cond(common_utils.should_log(params),
                                         lambda: outfeed(tensors), tf.no_op)

            return outfeed_enqueue_op
Example #6
0
    def get_prediction_module(self, bert_model, features, is_training,
                              percent_done):
        final_hidden = bert_model.get_sequence_output()

        final_hidden_shape = modeling.get_shape_list(final_hidden,
                                                     expected_rank=3)
        batch_size = final_hidden_shape[0]
        seq_length = final_hidden_shape[1]

        answer_mask = tf.cast(features["input_mask"], tf.float32)
        answer_mask *= tf.cast(features["segment_ids"], tf.float32)
        answer_mask += tf.one_hot(0, seq_length)

        start_logits = tf.squeeze(tf.layers.dense(final_hidden, 1), -1)

        start_top_log_probs = tf.zeros([batch_size, self.config.beam_size])
        start_top_index = tf.zeros([batch_size, self.config.beam_size],
                                   tf.int32)
        end_top_log_probs = tf.zeros(
            [batch_size, self.config.beam_size, self.config.beam_size])
        end_top_index = tf.zeros(
            [batch_size, self.config.beam_size, self.config.beam_size],
            tf.int32)
        if self.config.joint_prediction:
            start_logits += 1000.0 * (answer_mask - 1)
            start_log_probs = tf.nn.log_softmax(start_logits)
            start_top_log_probs, start_top_index = tf.nn.top_k(
                start_log_probs, k=self.config.beam_size)

            if not is_training:
                # batch, beam, length, hidden
                end_features = tf.tile(tf.expand_dims(final_hidden, 1),
                                       [1, self.config.beam_size, 1, 1])
                # batch, beam, length
                start_index = tf.one_hot(start_top_index,
                                         depth=seq_length,
                                         axis=-1,
                                         dtype=tf.float32)
                # batch, beam, hidden
                start_features = tf.reduce_sum(
                    tf.expand_dims(final_hidden, 1) *
                    tf.expand_dims(start_index, -1),
                    axis=-2)
                # batch, beam, length, hidden
                start_features = tf.tile(tf.expand_dims(start_features, 2),
                                         [1, 1, seq_length, 1])
            else:
                start_index = tf.one_hot(features[self.name +
                                                  "_start_positions"],
                                         depth=seq_length,
                                         axis=-1,
                                         dtype=tf.float32)
                start_features = tf.reduce_sum(
                    tf.expand_dims(start_index, -1) * final_hidden, axis=1)
                start_features = tf.tile(tf.expand_dims(start_features, 1),
                                         [1, seq_length, 1])
                end_features = final_hidden

            final_repr = tf.concat([start_features, end_features], -1)
            final_repr = tf.layers.dense(final_repr,
                                         512,
                                         activation=modeling.gelu,
                                         name="qa_hidden")
            # batch, beam, length (batch, length when training)
            end_logits = tf.squeeze(tf.layers.dense(final_repr, 1),
                                    -1,
                                    name="qa_logits")
            if is_training:
                end_logits += 1000.0 * (answer_mask - 1)
            else:
                end_logits += tf.expand_dims(1000.0 * (answer_mask - 1), 1)

            if not is_training:
                end_log_probs = tf.nn.log_softmax(end_logits)
                end_top_log_probs, end_top_index = tf.nn.top_k(
                    end_log_probs, k=self.config.beam_size)
                end_logits = tf.zeros([batch_size, seq_length])
        else:
            end_logits = tf.squeeze(tf.layers.dense(final_hidden, 1), -1)
            start_logits += 1000.0 * (answer_mask - 1)
            end_logits += 1000.0 * (answer_mask - 1)

        def compute_loss(logits, positions):
            one_hot_positions = tf.one_hot(positions,
                                           depth=seq_length,
                                           dtype=tf.float32)
            log_probs = tf.nn.log_softmax(logits, axis=-1)
            loss = -tf.reduce_sum(one_hot_positions * log_probs, axis=-1)
            return loss

        start_positions = features[self.name + "_start_positions"]
        end_positions = features[self.name + "_end_positions"]

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)

        losses = (start_loss + end_loss) / 2.0

        answerable_logit = tf.zeros([batch_size])
        if self.config.answerable_classifier:
            final_repr = final_hidden[:, 0]
            if self.config.answerable_uses_start_logits:
                start_p = tf.nn.softmax(start_logits)
                start_feature = tf.reduce_sum(tf.expand_dims(start_p, -1) *
                                              final_hidden,
                                              axis=1)
                final_repr = tf.concat([final_repr, start_feature], -1)
                final_repr = tf.layers.dense(final_repr,
                                             512,
                                             activation=modeling.gelu)
            answerable_logit = tf.squeeze(tf.layers.dense(final_repr, 1), -1)
            answerable_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.cast(features[self.name + "_is_impossible"],
                               tf.float32),
                logits=answerable_logit)
            losses += answerable_loss * self.config.answerable_weight

        return losses, dict(
            loss=losses,
            start_logits=start_logits,
            end_logits=end_logits,
            answerable_logit=answerable_logit,
            start_positions=features[self.name + "_start_positions"],
            end_positions=features[self.name + "_end_positions"],
            start_top_log_probs=start_top_log_probs,
            start_top_index=start_top_index,
            end_top_log_probs=end_top_log_probs,
            end_top_index=end_top_index,
            eid=features[self.name + "_eid"],
        )
 def add_noise(v):
   return v + tf.cast(random_normal(tf.shape(input=v)), dtype=v.dtype)
Example #8
0
def model_definition(vector_dimension,
                     label_count,
                     slot_vectors,
                     value_vectors,
                     use_delex_features=False,
                     use_softmax=True,
                     value_specific_decoder=False,
                     learn_belief_state_update=True):
    """
    This method defines the model and returns the required TensorFlow operations.

    slot_vectors, value_vectors should be of size [label_count + 2, 300].
    For None, we should just pass zero vectors for both. 

    Then, replicate using these vectors the old NBT and then combine each value's (including NONE) into softmax. 


    List of values learned by this model: 

    1) h_utterance_representation, which uses a CNN to learn a representation of the utterance r.  
    2) candidates_transform, which includes w_candidates and b_candidates, which transforms candidate values to vector c.
    3) w_joint_hidden_layer and b_joint_hidden_layer, which collapses the interaction of r and c to an intermediate vector. 
    4) w_joint_presoftmax and b_joint_presoftmax, which collapse the intermediate layer to a single feature. 
    5) sysreq_w_hidden_layer and sysreq_b_hidden_layer, which compute intermediate sysreq representation.
    6) TODO: sysreq_w_softmax and sysreq_b_softmax, which map this to final decision. -- currently not size independent. 
    7) TODO: confirm_w1_hidden_layer, confirm_b1_hidden_layer, confirm_w1_softmax, confirm_b1_softmax: for confirmations. -- currently does not work. 
    8) a_memory, b_memory, a_current, b_current: for the belief state updates, composed into matrix.   

    If all of these are initialised and then supplied to each of the models, we could train them together (batch of each slot), and just save
    these variables, then at test time, just load them (as session even), and then initialise all of the models with them. 

    """

    print "=========================== Model declaration ==========================="
    if use_softmax:
        label_size = label_count + 1  # 1 is for NONE, dontcare is added to the ontology.
    else:
        label_size = label_count

    # these are actual NN hyperparameters that we might want to tune at some point:
    hidden_units_1 = 100
    longest_utterance_length = 40

    summary_feature_count = 10

    print "Hidden layer size:", hidden_units_1, "Label Size:", label_size, "Use Softmax:", use_softmax, "Use Delex Features:", use_delex_features

    utterance_representations_full = tf.placeholder(
        tf.float32, [None, 40, vector_dimension
                     ])  # full feature vector, which we want to convolve over.
    utterance_representations_delex = tf.placeholder(tf.float32,
                                                     [None, label_size])
    #    utterance_representations_delex = tf.placeholder(tf.float32, [None, label_size, 40, vector_dimension])

    system_act_slots = tf.placeholder(
        tf.float32,
        shape=(None, vector_dimension))  # just slots, for requestables.

    system_act_confirm_slots = tf.placeholder(tf.float32,
                                              shape=(None, vector_dimension))
    system_act_confirm_values = tf.placeholder(tf.float32,
                                               shape=(None, vector_dimension))

    #slot_values =  tf.placeholder(tf.float32, shape=(None, vector_dimension))
    #candidate_values = tf.placeholder(tf.float32, shape=(None, vector_dimension))

    # Initial (distributional) vectors. Needed for L2 regularisation.
    W_slots = tf.constant(slot_vectors, name="W_init")
    W_values = tf.constant(value_vectors, name="W_init")

    # output label, i.e. True / False, 1-hot encoded:
    y_ = tf.placeholder(tf.float32, [None, label_size])

    y_past_state = tf.placeholder(tf.float32, [None, label_size])

    # dropout placeholder, 0.5 for training, 1.0 for validation/testing:
    keep_prob = tf.placeholder("float")

    # constants useful for evaluation variables further below:
    ones = tf.constant(1.0, dtype="float")
    zeros = tf.constant(0.0, dtype="float")

    hidden_utterance_size = vector_dimension

    filter_sizes = [1, 2, 3]
    num_filters = 300
    hidden_utterance_size = num_filters  #* len(filter_sizes)

    #candidate_sum = candidate_values + slot_values # to avoid summing these two multiple times later.

    #w_candidates = tf.Variable(tf.random_normal([vector_dimension, vector_dimension]))
    #b_candidates = tf.Variable(tf.zeros([vector_dimension]))

    #candidates = tf.nn.sigmoid(tf.matmul(candidate_sum, w_candidates) + b_candidates)
    #candidates = tf.nn.sigmoid(tf.matmul(candidate_values, w_candidates) + b_candidates)

    # filter needs to be of shape: filter_height = 1,2,3, filter_width=300, in_channel=1, out_channel=num_filters
    # filter just dot products - in images these then overlap from different regions - we don't have that.
    h_utterance_representation = define_CNN_model(
        utterance_representations_full, num_filters, vector_dimension,
        longest_utterance_length)

    #candidate_sum = W_slots + W_values # size [label_size, vector_dimension]

    w_candidates = tf.Variable(
        tf.random_normal([vector_dimension, vector_dimension]))
    b_candidates = tf.Variable(tf.zeros([vector_dimension]))

    # multiply to get: [label_size, vector_dimension]
    candidates_transform = tf.nn.sigmoid(
        tf.matmul(W_values, w_candidates) + b_candidates)

    # Next, multiply candidates [label_size, vector_dimension] each with the uttereance representations [None, vector_dimension], to get [None, label_size, vector_dimension]
    # or utterance [None, vector_dimension] X [vector_dimension, label_size] to get [None, label_size]
    #h_utterance_representation_candidate_interaction = tf.Variable(tf.zeros([None, label_size, vector_dimension]))

    list_of_value_contributions = []

    # get interaction of utterance with each value:
    for value_idx in range(0, label_count):
        list_of_value_contributions.append(
            tf.multiply(h_utterance_representation,
                        candidates_transform[value_idx, :]))

    h_utterance_representation_candidate_interaction = tf.reshape(
        tf.transpose(tf.stack(list_of_value_contributions), [1, 0, 2]),
        [-1, vector_dimension])
    # the same transform now runs across each value's vector, multiplying.
    w_joint_hidden_layer = tf.Variable(
        tf.random_normal([vector_dimension, hidden_units_1]))
    b_joint_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

    # now multiply [None, label_size, vector_dimension] by [vector_dimension, hidden_units_1], to get [None, label_size, hidden_units_1]
    hidden_layer_joint = tf.nn.sigmoid(
        tf.reshape(
            tf.matmul(h_utterance_representation_candidate_interaction,
                      w_joint_hidden_layer) + b_joint_hidden_layer,
            [-1, label_count, hidden_units_1]))
    hidden_layer_joint_with_dropout = tf.nn.dropout(hidden_layer_joint,
                                                    keep_prob)

    # next initialise parameters that go into a softmax, i.e. mapping [None, label_size, hidden_units_1] -> [None, label_size]
    w_joint_presoftmax = tf.Variable(tf.random_normal([hidden_units_1,
                                                       1]))  # collapse to 1
    b_joint_presoftmax = tf.Variable(tf.zeros([1]))  # collapse to 1

    y_presoftmax = tf.reshape(
        tf.matmul(
            tf.reshape(hidden_layer_joint_with_dropout, [-1, hidden_units_1]),
            w_joint_presoftmax) + b_joint_presoftmax, [-1, label_count])

    # for now we do not implement this

    sysreq_contributions = []  # a list of contributions for each of the values
    confirm_contributions = [
    ]  # a list of contributions for each of the values

    # =================== NETWORK FOR SYSTEM REQUESTS ==========================

    # is the current slot offered
    system_act_candidate_interaction = tf.multiply(
        W_slots[0, :],
        system_act_slots)  # only multiply with slots for the requests.
    dot_product_sysreq = tf.reduce_mean(system_act_candidate_interaction, 1)

    #full_ones = tf.ones([tf.shape(dot_product_sysreq)[0], 1])
    #dot_product = tf.cast(tf.equal(dot_product_sysreq, full_ones), "float32")

    decision = tf.multiply(tf.expand_dims(dot_product_sysreq, 1),
                           h_utterance_representation)

    sysreq_w_hidden_layer = tf.Variable(
        tf.random_normal([vector_dimension, hidden_units_1]))
    sysreq_b_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

    # allow each value to learn to map different utterances to yes. Mainly dontcare.
    for value_idx in range(0, label_count):

        sysreq_hidden_layer_1 = tf.nn.sigmoid(
            tf.matmul(decision, sysreq_w_hidden_layer) + sysreq_b_hidden_layer)
        sysreq_hidden_layer_1_with_dropout = tf.nn.dropout(
            sysreq_hidden_layer_1, keep_prob)

        sysreq_w_softmax = tf.Variable(tf.random_normal([hidden_units_1, 1]))
        sysreq_b_softmax = tf.Variable(tf.zeros([1]))

        sysreq_contribution = tf.matmul(sysreq_hidden_layer_1_with_dropout,
                                        sysreq_w_softmax) + sysreq_b_softmax

        sysreq_contributions.append(sysreq_contribution)

    sysreq = tf.concat(sysreq_contributions, 1)  #, [-1, label_size])

    # =================== NETWORK FOR CONFIRMATIONS ==========================

    # here, we do want to tie across all values, as it will get a different signal depending on whether both things match.
    confirm_w1_hidden_layer = tf.Variable(
        tf.random_normal([vector_dimension, hidden_units_1]))
    confirm_b1_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

    confirm_w1_softmax = tf.Variable(tf.random_normal([hidden_units_1, 1]))
    confirm_b1_softmax = tf.Variable(tf.zeros([1]))

    for value_idx in range(0, label_count):

        dot_product = tf.multiply(
            tf.reduce_mean(
                tf.multiply(W_slots[0, :], system_act_confirm_slots), 1),
            tf.reduce_mean(
                tf.multiply(W_values[value_idx, :], system_act_confirm_values),
                1))  # dot product: slot equality and value equality

        full_ones = tf.ones(tf.shape(dot_product))
        dot_product = tf.cast(tf.equal(dot_product, full_ones), "float32")

        decision = tf.multiply(tf.expand_dims(dot_product, 1),
                               h_utterance_representation)

        confirm_hidden_layer_1 = tf.nn.sigmoid(
            tf.matmul(decision, confirm_w1_hidden_layer) +
            confirm_b1_hidden_layer)
        confirm_hidden_layer_1_with_dropout = tf.nn.dropout(
            confirm_hidden_layer_1, keep_prob)

        confirm_contribution = tf.matmul(
            confirm_hidden_layer_1_with_dropout,
            confirm_w1_softmax) + confirm_b1_softmax
        confirm_contributions.append(confirm_contribution)

    sysconf = tf.concat(confirm_contributions, 1)

    if use_softmax:

        append_zeros_none = tf.zeros([tf.shape(y_presoftmax)[0], 1])
        y_presoftmax = tf.concat([y_presoftmax, append_zeros_none], 1)

        append_zeros = tf.zeros([tf.shape(y_presoftmax)[0], 1])
        sysreq = tf.concat([sysreq, append_zeros], 1)
        sysconf = tf.concat([sysconf, append_zeros], 1)

        y_presoftmax = y_presoftmax + sysconf + sysreq

    if use_delex_features:
        y_presoftmax = y_presoftmax + utterance_representations_delex

    # value-specific decoder:
    if value_specific_decoder and False:

        h_utterance_representation_for_full_softmax = define_CNN_model(
            utterance_representations_full, num_filters, vector_dimension,
            longest_utterance_length)

        h_utterance_dropout = tf.nn.dropout(
            h_utterance_representation_for_full_softmax, keep_prob)

        ss_w_hidden_layer = tf.Variable(
            tf.random_normal([vector_dimension, hidden_units_1]))
        ss_b_hidden_layer = tf.Variable(tf.zeros([hidden_units_1]))

        ss_hidden_layer_1 = tf.nn.relu(
            tf.matmul(h_utterance_dropout, ss_w_hidden_layer) +
            ss_b_hidden_layer)
        ss_hidden_layer_1_with_dropout = tf.nn.dropout(ss_hidden_layer_1,
                                                       keep_prob)

        ss_w_softmax = tf.Variable(
            tf.random_normal([hidden_units_1, label_size]))
        ss_b_softmax = tf.Variable(tf.zeros([label_size]))

        ss_contribution = tf.matmul(ss_hidden_layer_1_with_dropout,
                                    ss_w_softmax) + ss_b_softmax

        y_presoftmax += ss_contribution

    # as we are returning always, can't be null
    update_coefficient = tf.constant(0.49)

    if use_softmax:

        if learn_belief_state_update:

            if value_specific_decoder:  # value-specific update

                update_coefficient = tf.constant(0.8)

                ss_W_memory = tf.Variable(
                    tf.random_normal([label_size, label_size]))

                ss_W_current = tf.Variable(
                    tf.random_normal([label_size, label_size]))

                y_combine = tf.matmul(y_past_state, ss_W_memory) + tf.matmul(
                    y_presoftmax, ss_W_current)

            else:

                update_coefficient = tf.constant(0.7)

                a_memory = tf.Variable(tf.random_normal([1, 1]))
                diag_memory = a_memory * tf.diag(tf.ones(label_size))

                b_memory = tf.Variable(tf.random_normal([1, 1]))
                non_diag_memory = tf.matrix_set_diag(
                    b_memory * tf.ones([label_size, label_size]),
                    tf.zeros(label_size))

                W_memory = diag_memory + non_diag_memory

                a_current = tf.Variable(tf.random_normal([1, 1]))
                diag_current = a_current * tf.diag(tf.ones(label_size))

                b_current = tf.Variable(tf.random_normal([1, 1]))
                non_diag_current = tf.matrix_set_diag(
                    b_current * tf.ones([label_size, label_size]),
                    tf.zeros(label_size))

                W_current = diag_current + non_diag_current

                y_combine = tf.matmul(y_past_state, W_memory) + tf.matmul(
                    y_presoftmax, W_current
                )  #+ tf.matmul(sysreq, W_current_req) + tf.matmul(sysconf, W_current_conf)

            y = tf.nn.softmax(y_combine)  # + y_ss_update_contrib)

        else:
            # This code runs the baseline experiments reported in Footnote 2 in the paper.
            update_coefficient = tf.Variable(
                0.5)  #this scales the contribution of the current turn.
            y_combine = update_coefficient * y_presoftmax + (
                1 - update_coefficient) * y_past_state
            y = tf.nn.softmax(y_combine)

    else:

        y = tf.nn.sigmoid(
            y_presoftmax
        )  # for requestables, we just have turn-level binary decisions

    # ======================== LOSS IS JUST CROSS ENTROPY ==========================================

    if use_softmax:
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
            logits=y_combine, labels=y_)
    else:
        cross_entropy = tf.reduce_sum(tf.square(y - y_))

    # ============================= EVALUATION =====================================================

    if use_softmax:
        predictions = tf.cast(tf.argmax(y, 1),
                              "float32")  # will have ones where positive
        true_predictions = tf.cast(tf.argmax(y_, 1), "float32")
        correct_prediction = tf.cast(tf.equal(predictions, true_predictions),
                                     "float")

        accuracy = tf.reduce_mean(correct_prediction)
        # this will count number of positives - they are marked with 1 in true_predictions
        num_positives = tf.reduce_sum(true_predictions)
        # positives are indicated with ones.
        classified_positives = tf.reduce_sum(predictions)
        # will have ones in all places where both are predicting positives
        true_positives = tf.multiply(predictions, true_predictions)
        # if indicators for positive of both are 1, then it is positive.
        num_true_positives = tf.reduce_sum(true_positives)

        recall = num_true_positives / num_positives
        precision = num_true_positives / classified_positives
        f_score = (2 * recall * precision) / (recall + precision)

    else:
        predictions = tf.cast(tf.round(y),
                              "float32")  # will have ones where positive
        true_predictions = tf.cast(tf.round(y_), "float32")
        correct_prediction = tf.cast(tf.equal(predictions, true_predictions),
                                     "float")

        num_positives = tf.reduce_sum(true_predictions)

        classified_positives = tf.reduce_sum(predictions)
        true_positives = tf.multiply(predictions, true_predictions)
        num_true_positives = tf.reduce_sum(true_positives)
        recall = num_true_positives / num_positives
        precision = num_true_positives / classified_positives
        f_score = (2 * recall * precision) / (recall + precision)

        accuracy = tf.reduce_mean(correct_prediction)

    optimizer = tf.train.AdamOptimizer(0.001)
    train_step = optimizer.minimize(cross_entropy)

    return keep_prob, utterance_representations_full, utterance_representations_delex, \
            system_act_slots, system_act_confirm_slots, system_act_confirm_values, \
            y_, y_past_state, accuracy, f_score, precision, \
           recall, num_true_positives, num_positives, classified_positives, y, \
           predictions, true_predictions, correct_prediction, true_positives, train_step, update_coefficient
# ---------------------------网络结束---------------------------
def regularizer(a):
    return ((tf.nn.l2_loss(a) * 2)**0.5) * 0.5 * 0.0001


# regularizer = tf.contrib.layers.l2_regularizer(0.0001)
logits = inference(x, False, regularizer)

# (小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor
b = tf.constant(value=1, dtype=tf.float32)
logits_eval = tf.multiply(logits, b, name='logits_eval')

loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_)
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
correct_prediction = tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))


# 定义一个函数,按批次取数据
def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batch_size]
        else:
            excerpt = slice(start_idx, start_idx + batch_size)
        yield inputs[excerpt], targets[excerpt]
Example #10
0
 def warmup_lr(step):
     return lr_warmup_init + (adjusted_lr - lr_warmup_init) * (
         tf.cast(step, tf.float32) / tf.cast(lr_warmup_step, tf.float32))
Example #11
0
def _generate_detections_tf(cls_outputs,
                            box_outputs,
                            anchor_boxes,
                            indices,
                            classes,
                            image_id,
                            image_scale,
                            min_score_thresh=MIN_SCORE_THRESH,
                            max_boxes_to_draw=MAX_DETECTIONS_PER_IMAGE,
                            soft_nms_sigma=0.0,
                            iou_threshold=0.5,
                            use_native_nms=True):
  """Generates detections with model outputs and anchors.

  Args:
    cls_outputs: a numpy array with shape [N, 1], which has the highest class
      scores on all feature levels. The N is the number of selected
      top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)
    box_outputs: a numpy array with shape [N, 4], which stacks box regression
      outputs on all feature levels. The N is the number of selected top-k
      total anchors on all levels. (k being MAX_DETECTION_POINTS)
    anchor_boxes: a numpy array with shape [N, 4], which stacks anchors on all
      feature levels. The N is the number of selected top-k total anchors on
      all levels.
    indices: a numpy array with shape [N], which is the indices from top-k
      selection.
    classes: a numpy array with shape [N], which represents the class
      prediction on all selected anchors from top-k selection.
    image_id: an integer number to specify the image id.
    image_scale: a float tensor representing the scale between original image
      and input image for the detector. It is used to rescale detections for
      evaluating with the original groundtruth annotations.
    min_score_thresh: A float representing the threshold for deciding when to
      remove boxes based on score.
    max_boxes_to_draw: Max number of boxes to draw.
    soft_nms_sigma: A scalar float representing the Soft NMS sigma parameter;
      See Bodla et al, https://arxiv.org/abs/1704.04503).  When
        `soft_nms_sigma=0.0` (which is default), we fall back to standard (hard)
        NMS.
    iou_threshold: A float representing the threshold for deciding whether boxes
      overlap too much with respect to IOU.
    use_native_nms: a bool that indicates whether to use native nms.

  Returns:
    detections: detection results in a tensor with each row representing
      [image_id, y, x, height, width, score, class]
  """
  logging.info('Using tf version of post-processing.')
  anchor_boxes = tf.gather(anchor_boxes, indices)

  scores = tf.math.sigmoid(cls_outputs)
  # apply bounding box regression to anchors
  boxes = decode_box_outputs_tf(
      tf.transpose(box_outputs, [1, 0]), tf.transpose(anchor_boxes, [1, 0]))

  if use_native_nms:
    logging.info('Using native nms.')
    top_detection_idx, scores = tf.image.non_max_suppression_with_scores(
        boxes,
        scores,
        max_boxes_to_draw,
        iou_threshold=iou_threshold,
        score_threshold=min_score_thresh,
        soft_nms_sigma=soft_nms_sigma)
    boxes = tf.gather(boxes, top_detection_idx)
  else:
    logging.info('Using customized nms.')
    scores = tf.expand_dims(scores, axis=1)
    all_detections = tf.concat([boxes, scores], axis=1)
    top_detection_idx = nms_tf(all_detections, iou_threshold)
    detections = tf.gather(all_detections, top_detection_idx)
    scores = detections[:, 4]
    boxes = detections[:, :4]
  height = boxes[:, 2] - boxes[:, 0]
  width = boxes[:, 3] - boxes[:, 1]

  detections = tf.stack([
      tf.cast(tf.tile(image_id, [tf.size(top_detection_idx)]), tf.float32),
      boxes[:, 0] * image_scale,
      boxes[:, 1] * image_scale,
      height * image_scale,
      width * image_scale,
      scores,
      tf.cast(tf.gather(classes, top_detection_idx) + 1, tf.float32)
  ], axis=1)
  return detections
Example #12
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
    """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """

    # Convert params (dict) to Config for easier access.
    def _model_outputs():
        return model(features, config=hparams_config.Config(params))

    if params['use_bfloat16']:
        with tf.tpu.bfloat16_scope():
            cls_outputs, box_outputs = _model_outputs()
            levels = cls_outputs.keys()
            for level in levels:
                cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
                box_outputs[level] = tf.cast(box_outputs[level], tf.float32)
    else:
        cls_outputs, box_outputs = _model_outputs()
        levels = cls_outputs.keys()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'image': features,
        }
        for level in levels:
            predictions['cls_outputs_%d' % level] = cls_outputs[level]
            predictions['box_outputs_%d' % level] = box_outputs[level]
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Set up training loss and learning rate.
    update_learning_rate_schedule_parameters(params)
    global_step = tf.train.get_or_create_global_step()
    learning_rate = learning_rate_schedule(params, global_step)

    # cls_loss and box_loss are for logging. only total_loss is optimized.
    det_loss, cls_loss, box_loss = detection_loss(cls_outputs, box_outputs,
                                                  labels, params)
    l2loss = reg_l2_loss(params['weight_decay'])
    total_loss = det_loss + l2loss

    if mode == tf.estimator.ModeKeys.TRAIN:
        utils.scalar('lrn_rate', learning_rate)
        utils.scalar('trainloss/cls_loss', cls_loss)
        utils.scalar('trainloss/box_loss', box_loss)
        utils.scalar('trainloss/det_loss', det_loss)
        utils.scalar('trainloss/l2_loss', l2loss)
        utils.scalar('trainloss/loss', total_loss)

    moving_average_decay = params['moving_average_decay']
    if moving_average_decay:
        ema = tf.train.ExponentialMovingAverage(decay=moving_average_decay,
                                                num_updates=global_step)
        ema_vars = utils.get_ema_vars()

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=params['momentum'])
        if params['use_tpu']:
            optimizer = tf.tpu.CrossShardOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        var_list = tf.trainable_variables()
        if variable_filter_fn:
            var_list = variable_filter_fn(var_list, params['resnet_depth'])

        if params.get('clip_gradients_norm', 0) > 0:
            logging.info('clip gradients norm by %f',
                         params['clip_gradients_norm'])
            grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
            with tf.name_scope('clip'):
                grads = [gv[0] for gv in grads_and_vars]
                tvars = [gv[1] for gv in grads_and_vars]
                clipped_grads, gnorm = tf.clip_by_global_norm(
                    grads, params['clip_gradients_norm'])
                utils.scalar('gnorm', gnorm)
                grads_and_vars = list(zip(clipped_grads, tvars))

            with tf.control_dependencies(update_ops):
                train_op = optimizer.apply_gradients(grads_and_vars,
                                                     global_step)
        else:
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(total_loss,
                                              global_step,
                                              var_list=var_list)

        if moving_average_decay:
            with tf.control_dependencies([train_op]):
                train_op = ema.apply(ema_vars)

    else:
        train_op = None

    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(**kwargs):
            """Returns a dictionary that has the evaluation metrics."""
            batch_size = params['batch_size']
            eval_anchors = anchors.Anchors(params['min_level'],
                                           params['max_level'],
                                           params['num_scales'],
                                           params['aspect_ratios'],
                                           params['anchor_scale'],
                                           params['image_size'])
            anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                                   params['num_classes'])
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                          params['val_json_file'], **kwargs)

            # Add metrics to output.
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics

        cls_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(cls_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        box_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(box_loss, 0), [
                params['batch_size'],
            ]), [params['batch_size'], 1])
        metric_fn_inputs = {
            'cls_loss_repeat': cls_loss_repeat,
            'box_loss_repeat': box_loss_repeat,
            'source_ids': labels['source_ids'],
            'groundtruth_data': labels['groundtruth_data'],
            'image_scales': labels['image_scales'],
        }
        add_metric_fn_inputs(params, cls_outputs, box_outputs,
                             metric_fn_inputs)
        eval_metrics = (metric_fn, metric_fn_inputs)

    if params['backbone_ckpt'] and mode == tf.estimator.ModeKeys.TRAIN:

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            logging.info('restore variables from %s', params['backbone_ckpt'])
            if params['ckpt_var_scope'] is None:
                ckpt_scope = params[
                    'backbone_name']  # Use backbone name in default.
            else:
                ckpt_scope = params['ckpt_var_scope']
            tf.train.init_from_checkpoint(
                params['backbone_ckpt'],
                utils.get_ckt_var_map(params['backbone_ckpt'],
                                      ckpt_scope + '/',
                                      params['backbone_name'] + '/'))
            return tf.train.Scaffold()
    elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:

        def scaffold_fn():
            """Load moving average variables for eval."""
            logging.info('Load EMA vars with ema_decay=%f',
                         moving_average_decay)
            restore_vars_dict = ema.variables_to_restore(ema_vars)
            saver = tf.train.Saver(restore_vars_dict)
            return tf.train.Scaffold(saver=saver)
    else:
        scaffold_fn = None

    return tf.estimator.tpu.TPUEstimatorSpec(mode=mode,
                                             loss=total_loss,
                                             train_op=train_op,
                                             eval_metrics=eval_metrics,
                                             host_call=utils.get_tpu_host_call(
                                                 global_step, params),
                                             scaffold_fn=scaffold_fn)
Example #13
0
 def decode_png_mask(image_buffer):
     image = tf.squeeze(tf.image.decode_image(image_buffer, channels=1),
                        axis=2)
     image.set_shape([None, None])
     image = tf.cast(tf.greater(image, 0), dtype=tf.float32)
     return image
Example #14
0
    def decode(self, tf_example_string_tensor):
        """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
        serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
        decoder = slim_example_decoder.TFExampleDecoder(
            self.keys_to_features, self.items_to_handlers)
        keys = decoder.list_items()
        tensors = decoder.decode(serialized_example, items=keys)
        tensor_dict = dict(zip(keys, tensors))
        is_crowd = fields.InputDataFields.groundtruth_is_crowd
        tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
        tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
        tensor_dict[
            fields.InputDataFields.original_image_spatial_shape] = tf.shape(
                tensor_dict[fields.InputDataFields.image])[:2]

        if fields.InputDataFields.image_additional_channels in tensor_dict:
            channels = tensor_dict[
                fields.InputDataFields.image_additional_channels]
            channels = tf.squeeze(channels, axis=3)
            channels = tf.transpose(channels, perm=[1, 2, 0])
            tensor_dict[
                fields.InputDataFields.image_additional_channels] = channels

        def default_groundtruth_weights():
            return tf.ones([
                tf.shape(
                    tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]
            ],
                           dtype=tf.float32)

        tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
            tf.greater(
                tf.shape(tensor_dict[
                    fields.InputDataFields.groundtruth_weights])[0], 0),
            lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
            default_groundtruth_weights)

        if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
            # Set all keypoints that are not labeled to NaN.
            gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
            gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
            visibilities_tiled = tf.tile(
                tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1), [1, 1, 2])
            tensor_dict[gt_kpt_fld] = tf.where(
                visibilities_tiled, tensor_dict[gt_kpt_fld],
                np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

        if self._expand_hierarchy_labels:
            input_fields = fields.InputDataFields
            image_classes, image_confidences = self._expand_image_label_hierarchy(
                tensor_dict[input_fields.groundtruth_image_classes],
                tensor_dict[input_fields.groundtruth_image_confidences])
            tensor_dict[input_fields.groundtruth_image_classes] = image_classes
            tensor_dict[input_fields.groundtruth_image_confidences] = (
                image_confidences)

            box_fields = [
                fields.InputDataFields.groundtruth_group_of,
                fields.InputDataFields.groundtruth_is_crowd,
                fields.InputDataFields.groundtruth_difficult,
                fields.InputDataFields.groundtruth_area,
                fields.InputDataFields.groundtruth_boxes,
                fields.InputDataFields.groundtruth_weights,
            ]

            def expand_field(field_name):
                return self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[field_name])

            # pylint: disable=cell-var-from-loop
            for field in box_fields:
                if field in tensor_dict:
                    tensor_dict[field] = tf.cond(
                        tf.size(tensor_dict[field]) > 0,
                        lambda: expand_field(field),
                        lambda: tensor_dict[field])
            # pylint: enable=cell-var-from-loop

            tensor_dict[input_fields.groundtruth_classes] = (
                self._expansion_box_field_labels(
                    tensor_dict[input_fields.groundtruth_classes],
                    tensor_dict[input_fields.groundtruth_classes], True))

        if fields.InputDataFields.groundtruth_group_of in tensor_dict:
            group_of = fields.InputDataFields.groundtruth_group_of
            tensor_dict[group_of] = tf.cast(tensor_dict[group_of],
                                            dtype=tf.bool)

        if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_num_points],
                    dtype=tf.int32)
            tensor_dict[
                fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
                    tensor_dict[
                        fields.InputDataFields.groundtruth_dp_part_ids],
                    dtype=tf.int32)

        if fields.InputDataFields.groundtruth_track_ids in tensor_dict:
            tensor_dict[
                fields.InputDataFields.groundtruth_track_ids] = tf.cast(
                    tensor_dict[fields.InputDataFields.groundtruth_track_ids],
                    dtype=tf.int32)

        return tensor_dict
Example #15
0
def test_compress(args):
    """Compresses an image."""
    fn = tf.placeholder(tf.string, [])

    # Load input image and add batch dimension.
    x = read_png(fn)
    x = tf.expand_dims(x, 0)
    x.set_shape([1, None, None, 3])
    x_shape = tf.shape(x)

    # Instantiate model.
    analysis_transform = AnalysisTransform(args.num_filters)
    synthesis_transform = SynthesisTransform(args.num_filters)
    hyper_analysis_transform = HyperAnalysisTransform(args.num_filters)
    hyper_synthesis_transform = HyperSynthesisTransform(args.num_filters)
    entropy_bottleneck = tfc.EntropyBottleneck()

    # Transform and compress the image.
    y = analysis_transform(x)
    y_shape = tf.shape(y)
    z = hyper_analysis_transform(abs(y))
    z_hat, z_likelihoods = entropy_bottleneck(z, training=False)
    sigma = hyper_synthesis_transform(z_hat)
    sigma = sigma[:, :y_shape[1], :y_shape[2], :]
    scale_table = np.exp(
        np.linspace(np.log(SCALES_MIN), np.log(SCALES_MAX), SCALES_LEVELS))
    conditional_bottleneck = DynamicGaussianConditional(
        sigma, scale_table, name="gaussian_conditional")

    side_string = entropy_bottleneck.compress(z)
    string = conditional_bottleneck.compress(y)

    # Transform the quantized image back (if requested).
    y_hat, y_likelihoods = conditional_bottleneck(y, training=False)
    x_hat = synthesis_transform(y_hat)
    x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

    num_pixels = tf.cast(tf.reduce_prod(tf.shape(x)[:-1]), dtype=tf.float32)

    # Total number of bits divided by number of pixels.
    eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods)) + tf.reduce_sum(
        tf.log(z_likelihoods))) / (-np.log(2) * num_pixels)

    # Bring both images back to 0..255 range.
    x *= 255
    x_hat = tf.clip_by_value(x_hat, 0, 1)
    x_hat = tf.round(x_hat * 255)

    mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
    psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
    msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        latest = tf.train.latest_checkpoint(checkpoint_dir=args.checkpoint_dir)
        tf.train.Saver().restore(sess, save_path=latest)
        #a = sess.run( tf.reduce_sum(tf.log(y_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels))
        #b = sess.run( tf.reduce_sum(tf.log(z_likelihoods), axis=(0,1,2)) / (-np.log(2) * num_pixels))
        #np.savetxt('ay.csv', a, delimiter = ',')
        #np.savetxt('bz.csv', b, delimiter = ',')
        #return

        const = tf.constant([1] * 256 + [0] * 224, dtype=tf.float32)
        f = open("e2.csv", "w")
        print("active, fn, bpp, mse, np", file=f)
        for active in range(256, 31, -16):
            #conditional_bottleneck.input_spec = tf.keras.layers.InputSpec(ndim=4, axes={3: active})
            mask = const[256 - active:512 - active]
            rate = tf.reduce_sum(mask) / 256
            y_itc = y * mask / rate

            string = conditional_bottleneck.compress(y_itc)
            y_itc_hat = conditional_bottleneck.decompress(string)

            # Transform the quantized image back (if requested).
            x_hat = synthesis_transform(y_itc_hat)
            x_hat = x_hat[:, :x_shape[1], :x_shape[2], :]

            eval_bpp = (tf.reduce_sum(tf.log(y_likelihoods[:, :, :, :active]))
                        + tf.reduce_sum(tf.log(z_likelihoods))) / (-np.log(2) *
                                                                   num_pixels)

            x_hat = tf.clip_by_value(x_hat, 0, 1)
            x_hat = tf.round(x_hat * 255)

            mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
            psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
            msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))

            #tensors = [string, side_string,
            #          tf.shape(x)[1:-1], tf.shape(y)[1:-1], tf.shape(z)[1:-1]]
            #arrays = sess.run(tensors)

            # Write a binary file with the shape information and the compressed string.
            #packed = tfc.PackedTensors()
            #packed.pack(tensors, arrays)

            for filename in glob.glob("kodak/*.png"):

                v_eval_bpp, v_mse, v_num_pixels = sess.run(
                    [eval_bpp, mse, num_pixels], feed_dict={fn: filename})

                print("%.2f, %s, %.4f, %.4f, %d" %
                      (active, filename, v_eval_bpp, v_mse, v_num_pixels),
                      file=f)

        f.close()
Example #16
0
def lagrangian_optimizer_kld(
    train_set, additive_slack, learning_rate, learning_rate_constraint, loops):
  """Implements surrogate-based Lagrangian optimizer (Algorithm 2).

  Specifically solves:
    min_{theta} sum_{G = 0, 1} KLD(p, pprG(theta))
      s.t. error_rate <= additive_slack,
    where p is the overall proportion of positives and pprG is the positive
    prediction rate for group G.

  We frame this as a constrained optimization problem:
    min_{theta, xi_pos0, xi_pos1, xi_neg0, xi_neg1} {
      -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1)
        -(1-p) log(xi_neg1)}
    s.t.
      error_rate <= additive_slack,
        xi_pos0 <= ppr0(theta), xi_neg0 <= npr0(theta),
        xi_pos1 <= ppr1(theta), xi_neg1 <= npr1(theta),
  and formulate the Lagrangian:
    max_{lambda's >= 0} min_{xi's} {
      -p log(xi_pos0) - (1-p) log(xi_neg0) - p log(xi_pos1)
        -(1-p) log(xi_neg1)
       + lambda_pos0 (xi_pos0 - ppr0(theta))
       + lambda_neg0 (xi_neg0 - npr0(theta))
       + lambda_pos1 (xi_pos1 - ppr1(theta))
       + lambda_neg1 (xi_neg1 - npr1(theta))}
    s.t.
      error_rate <= additive_slack.

  We do best response for the slack variables xi:
    BR for xi_pos0 = p / lambda_pos0
    BR for xi_neg0 = (1 - p) / lambda_neg0
    BR for xi_pos1 = p / lambda_pos1
    BR for xi_neg1 = (1 - p) / lambda_neg1
  We do gradient ascent on the lambda's, where
    Gradient w.r.t. lambda_pos0
      = BR for xi_pos0 - ppr0(theta)
      = p / lambda_pos0 - ppr0(theta)
      = Gradient w.r.t. lambda_pos0 of
        (p log(lambda_pos0) - lambda_pos0 ppr0(theta))
    Gradient w.r.t. lambda_neg0
      = Gradient w.r.t. lambda_neg0 of
        ((1 - p) log(lambda_neg0) - lambda_neg0 npr0(theta))
    Gradient w.r.t. lambda_pos1
      = Gradient w.r.t. lambda_pos1 of
        (p log(lambda_pos1) - lambda_pos1 ppr1(theta))
    Gradient w.r.t. lambda_neg1
      = Gradient w.r.t. lambda_neg1 of
        ((1 - p) log(lambda_neg1) - lambda_neg1 npr1(theta)).
  We do gradient descent on thetas's, with ppr's and npr's replaced with hinge
  surrogates. We use concave lower bounds on ppr's and npr's, so that when they
  get negated in the updates, we get convex upper bounds.

  See Appendix D.1 in the paper for more details.

  Args:
    train_set: (features, labels, groups)
    additive_slack: float, additive slack on error rate constraint
    learning_rate: float, learning rate for model parameters
    learning_rate_constraint: float, learning rate for Lagrange multipliers
    loops: int, number of iterations

  Returns:
    stochastic_model containing list of models and probabilities,
    deterministic_model.
  """
  x_train, y_train, z_train = train_set
  dimension = x_train.shape[-1]

  tf.reset_default_graph()

  # Data tensors.
  features_tensor = tf.constant(x_train.astype("float32"), name="features")
  labels_tensor = tf.constant(y_train.astype("float32"), name="labels")

  # Linear model.
  weights = tf.Variable(tf.zeros(dimension, dtype=tf.float32),
                        name="weights")
  threshold = tf.Variable(0, name="threshold", dtype=tf.float32)
  predictions_tensor = (tf.tensordot(features_tensor, weights, axes=(1, 0))
                        + threshold)

  # Group-specific predictions.
  predictions_group0 = tf.boolean_mask(predictions_tensor, mask=(z_train < 1))
  num_examples0 = np.sum(z_train < 1)
  predictions_group1 = tf.boolean_mask(predictions_tensor, mask=(z_train > 0))
  num_examples1 = np.sum(z_train > 0)

  # We use the TF Constrained Optimization (TFCO) library to set up the
  # constrained optimization problem. The library doesn't currently support best
  # responses for slack variables. So we maintain explicit Lagrange multipliers
  # for the slack variables, and let the library deal with the Lagrange
  # multipliers for the error rate constraint.

  # Since we need to perform a gradient descent update on the model parameters,
  # and an ascent update on the Lagrange multipliers on the slack variables, we
  # create a single "minimization" objective using stop gradients, where a
  # descent gradient update has the effect of minimizing over the model
  # parameters and maximizing over the Lagrange multipliers for the slack
  # variables. As noted above, the ascent update on the Lagrange multipliers for
  # the error rate constraint is done by the library internally.

  # Placeholders for Lagrange multipliers for the four slack variables.
  lambda_pos0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos0")
  lambda_neg0 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg0")
  lambda_pos1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_pos1")
  lambda_neg1 = tf.Variable(0.5, dtype=tf.float32, name="lambda_neg1")

  # Set up prediction rates and surrogate relaxations on them.
  p = np.mean(y_train)  # Proportion of positives.

  # Positive and negative prediction rates for group 0 and group 1.
  ppr_group0 = tf.reduce_sum(tf.cast(
      tf.greater(predictions_group0, tf.zeros(num_examples0, dtype="float32")),
      "float32")) / num_examples0
  npr_group0 = 1 - ppr_group0
  ppr_group1 = tf.reduce_sum(tf.cast(
      tf.greater(predictions_group1, tf.zeros(num_examples1, dtype="float32")),
      "float32")) / num_examples1
  npr_group1 = 1 - ppr_group1

  # Hinge concave lower bounds on the positive and negative prediction rates.
  # In the gradient updates, these get negated and become convex upper bounds.
  # For group 0:
  ppr_hinge_group0 = tf.reduce_sum(
      1 - tf.nn.relu(1 - predictions_group0)) * 1.0 / num_examples0
  npr_hinge_group0 = tf.reduce_sum(
      1 - tf.nn.relu(1 + predictions_group0)) * 1.0 / num_examples0
  # For group 1:
  ppr_hinge_group1 = tf.reduce_sum(
      1 - tf.nn.relu(1 - predictions_group1)) * 1.0 / num_examples1
  npr_hinge_group1 = tf.reduce_sum(
      1 - tf.nn.relu(1 + predictions_group1)) * 1.0 / num_examples1

  # Set up KL-divergence objective for constrained optimization.
  # We use stop gradients to ensure that a single descent gradient update on the
  # objective has the effect of minimizing over the model parameters and
  # maximizing over the Lagrange multipliers for the slack variables.

  # KL-divergence for group 0.
  kld_hinge_pos_group0 = (
      - tf.stop_gradient(lambda_pos0) * ppr_hinge_group0
      - p * tf.log(lambda_pos0) + lambda_pos0 * tf.stop_gradient(ppr_group0))
  kld_hinge_neg_group0 = (
      - tf.stop_gradient(lambda_neg0) * npr_hinge_group0
      - (1 - p) * tf.log(lambda_neg0)
      + lambda_neg0 * tf.stop_gradient(npr_group0))
  kld_hinge_group0 = kld_hinge_pos_group0 + kld_hinge_neg_group0

  # KL-divergence for group 1.
  kld_hinge_pos_group1 = (
      - tf.stop_gradient(lambda_pos1) * ppr_hinge_group1
      - p * tf.log(lambda_pos1) + lambda_pos1 * tf.stop_gradient(ppr_group1))
  kld_hinge_neg_group1 = (
      - tf.stop_gradient(lambda_neg1) * npr_hinge_group1
      - (1 - p) * tf.log(lambda_neg1)
      + lambda_neg1 * tf.stop_gradient(npr_group1))
  kld_hinge_group1 = kld_hinge_pos_group1 + kld_hinge_neg_group1

  # Wrap the objective into a rate object.
  objective = tfco.wrap_rate(kld_hinge_group0 + kld_hinge_group1)

  # Set up error rate constraint for constrained optimization.
  context = tfco.rate_context(predictions_tensor, labels_tensor)
  error = tfco.error_rate(context)
  constraints = [error <= additive_slack]

  # Cretae rate minimization problem object.
  problem = tfco.RateMinimizationProblem(objective, constraints)

  # Set up optimizer.
  optimizer = tfco.LagrangianOptimizerV1(
      tf.train.AdamOptimizer(learning_rate=learning_rate),
      constraint_optimizer=tf.train.AdamOptimizer(
          learning_rate=learning_rate_constraint))
  train_op = optimizer.minimize(problem)

  # Start TF session and initialize variables.
  session = tf.Session()
  session.run(tf.global_variables_initializer())

  # We maintain a list of objectives and model weights during training.
  objectives = []
  violations = []
  models = []

  # Perform full gradient updates.
  for ii in range(loops):

    # Gradient updates.
    session.run(train_op)

    # Checkpoint once in 10 iterations.
    if ii % 10 == 0:
      # Model weights.
      model = [session.run(weights), session.run(threshold)]
      models.append(model)

      # Objective.
      klds = evaluation.expected_group_klds(
          x_train, y_train, z_train, [model], [1.0])
      objectives.append(sum(klds))

      # Violation.
      error = evaluation.expected_error_rate(
          x_train, y_train, [model], [1.0])
      violations.append([error - additive_slack])

  # Use the recorded objectives and constraints to find the best iterate.
  best_iterate = tfco.find_best_candidate_index(
      np.array(objectives), np.array(violations))
  deterministic_model = models[best_iterate]

  # Use shrinking to find a sparse distribution over iterates.
  probabilities = tfco.find_best_candidate_distribution(
      np.array(objectives), np.array(violations))
  models_pruned = [models[i] for i in range(len(models)) if
                   probabilities[i] > 0.0]
  probabilities_pruned = probabilities[probabilities > 0.0]

  return (models_pruned, probabilities_pruned), deterministic_model
Example #17
0
def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            pointclouds_pl, labels_pl = MODEL.placeholder_inputs(
                BATCH_SIZE, NUM_POINT)
            is_training_pl = tf.placeholder(tf.bool, shape=())
            print(is_training_pl)

            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to helpfully increment the 'batch' parameter for you every time it trains.
            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss
            pred, end_points = MODEL.get_model(pointclouds_pl,
                                               is_training_pl,
                                               bn_decay=bn_decay)
            loss = MODEL.get_loss(pred, labels_pl, end_points)
            tf.summary.scalar('loss', loss)

            correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct,
                                             tf.float32)) / float(BATCH_SIZE)
            tf.summary.scalar('accuracy', accuracy)

            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(loss, global_step=batch)

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        #merged = tf.merge_all_summaries()
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'))

        # Init variables
        init = tf.global_variables_initializer()
        # To fix the bug introduced in TF 0.12.1 as in
        # http://stackoverflow.com/questions/41543774/invalidargumenterror-for-tensor-bool-tensorflow-0-12-1
        #sess.run(init)
        sess.run(init, {is_training_pl: True})

        ops = {
            'pointclouds_pl': pointclouds_pl,
            'labels_pl': labels_pl,
            'is_training_pl': is_training_pl,
            'pred': pred,
            'loss': loss,
            'train_op': train_op,
            'merged': merged,
            'step': batch
        }

        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()

            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess,
                                       os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
Example #18
0
    loss = tf.reduce_mean(xentropy, name="loss")
    loss_summary = tf.summary.scalar('log_loss', loss)

# In[43]:

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

# In[44]:

with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    accuracy_summary = tf.summary.scalar('accuracy', accuracy)

# In[45]:

init = tf.global_variables_initializer()
saver = tf.train.Saver()

# Now we need to define the directory to write the TensorBoard logs to:

# In[46]:

from datetime import datetime


def log_dir(prefix=""):
 def make_global_state(self, l2_norm_clip, stddev):
   """Creates a global state from the given parameters."""
   return self._GlobalState(tf.cast(l2_norm_clip, tf.float32),
                            tf.cast(stddev, tf.float32))
Example #20
0
    def parse_train_data(self, data):
        """Parse data for ShapeMask training."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        masks = data['groundtruth_instance_masks']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            masks = tf.gather(masks, indices)

        # If not using category, makes all categories with id = 1.
        if not self._use_category:
            classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

        image = self.get_normalized_image(data)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, masks = input_utils.random_horizontal_flip(
                image, boxes, masks)

        # Converts boxes from normalized coordinates to pixel coordinates.
        image_shape = tf.shape(image)[0:2]
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        self._train_image_scale = image_info[2, :]
        self._train_offset = image_info[3, :]

        # Resizes and crops boxes and masks.
        boxes = input_utils.resize_and_crop_boxes(boxes,
                                                  self._train_image_scale,
                                                  image_info[1, :],
                                                  self._train_offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        masks = tf.gather(masks, indices)

        # Assigns anchors.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size, self._output_size)
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(
             boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))

        # Sample groundtruth masks/boxes/classes for mask branch.
        num_masks = tf.shape(masks)[0]
        mask_shape = tf.shape(masks)[1:3]

        # Randomly shuffle groundtruth masks for mask branch training.
        rand_indices = tf.random.shuffle(tf.range(num_masks))
        shuffled_boxes = tf.gather(boxes, rand_indices)
        shuffled_classes = tf.gather(classes, rand_indices)
        shuffled_masks = tf.gather(masks, rand_indices)

        # Pad sampled boxes/masks/classes to a constant batch size. If the image
        # has more masks than `num_sampled_masks`, the tensor will be clipped.
        padded_boxes = input_utils.clip_or_pad_to_fixed_size(
            shuffled_boxes, self._num_sampled_masks)
        padded_classes = input_utils.clip_or_pad_to_fixed_size(
            shuffled_classes, self._num_sampled_masks)
        padded_masks = input_utils.clip_or_pad_to_fixed_size(
            shuffled_masks, self._num_sampled_masks)

        # Jitter the sampled boxes to mimic the noisy detections.
        padded_boxes = box_utils.jitter_boxes(
            padded_boxes, noise_scale=self._box_jitter_scale)
        padded_boxes = box_utils.clip_boxes(padded_boxes, self._output_size)
        # Compute mask targets in feature crop. A feature crop fully contains a
        # sampled box.
        mask_outer_boxes = box_utils.compute_outer_boxes(
            padded_boxes, tf.shape(image)[0:2], scale=self._outer_box_scale)
        mask_outer_boxes = box_utils.clip_boxes(mask_outer_boxes,
                                                self._output_size)
        # Compensate the offset of mask_outer_boxes to map it back to original image
        # scale.
        mask_outer_boxes_ori = mask_outer_boxes
        mask_outer_boxes_ori += tf.tile(
            tf.expand_dims(self._train_offset, axis=0), [1, 2])
        mask_outer_boxes_ori /= tf.tile(
            tf.expand_dims(self._train_image_scale, axis=0), [1, 2])
        norm_mask_outer_boxes_ori = box_utils.normalize_boxes(
            mask_outer_boxes_ori, mask_shape)

        # Set sampled_masks shape to [batch_size, height, width, 1].
        padded_masks = tf.cast(tf.expand_dims(padded_masks, axis=-1),
                               tf.float32)
        mask_targets = tf.image.crop_and_resize(
            padded_masks,
            norm_mask_outer_boxes_ori,
            box_ind=tf.range(self._num_sampled_masks),
            crop_size=[self._mask_crop_size, self._mask_crop_size],
            method='bilinear',
            extrapolation_value=0,
            name='train_mask_targets')
        mask_targets = tf.where(tf.greater_equal(mask_targets, 0.5),
                                tf.ones_like(mask_targets),
                                tf.zeros_like(mask_targets))
        mask_targets = tf.squeeze(mask_targets, axis=-1)
        if self._up_sample_factor > 1:
            fine_mask_targets = tf.image.crop_and_resize(
                padded_masks,
                norm_mask_outer_boxes_ori,
                box_ind=tf.range(self._num_sampled_masks),
                crop_size=[
                    self._mask_crop_size * self._up_sample_factor,
                    self._mask_crop_size * self._up_sample_factor
                ],
                method='bilinear',
                extrapolation_value=0,
                name='train_mask_targets')
            fine_mask_targets = tf.where(
                tf.greater_equal(fine_mask_targets, 0.5),
                tf.ones_like(fine_mask_targets),
                tf.zeros_like(fine_mask_targets))
            fine_mask_targets = tf.squeeze(fine_mask_targets, axis=-1)
        else:
            fine_mask_targets = mask_targets

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        valid_image = tf.cast(tf.not_equal(num_masks, 0), tf.int32)
        if self._mask_train_class == 'all':
            mask_is_valid = valid_image * tf.ones_like(padded_classes,
                                                       tf.int32)
        else:
            # Get the intersection of sampled classes with training splits.
            mask_valid_classes = tf.cast(
                tf.expand_dims(
                    class_utils.coco_split_class_ids(self._mask_train_class),
                    1), padded_classes.dtype)
            match = tf.reduce_any(
                tf.equal(tf.expand_dims(padded_classes, 0),
                         mask_valid_classes), 0)
            mask_is_valid = valid_image * tf.cast(match, tf.int32)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': input_anchor.multilevel_boxes,
            'num_positives': num_positives,
            'image_info': image_info,
            # For ShapeMask.
            'mask_boxes': padded_boxes,
            'mask_outer_boxes': mask_outer_boxes,
            'mask_targets': mask_targets,
            'fine_mask_targets': fine_mask_targets,
            'mask_classes': padded_classes,
            'mask_is_valid': mask_is_valid,
        }
        return image, labels
Example #21
0
def _to_int32(tensor):
    return tf.cast(tensor, tf.int32)
Example #22
0
    def parse_predict_data(self, data):
        """Parse data for ShapeMask prediction."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        masks = data['groundtruth_instance_masks']

        # If not using category, makes all categories with id = 1.
        if not self._use_category:
            classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

        image = self.get_normalized_image(data)

        # Converts boxes from normalized coordinates to pixel coordinates.
        image_shape = tf.shape(image)[0:2]
        boxes = box_utils.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = input_utils.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Resizes and crops boxes and masks.
        boxes = input_utils.resize_and_crop_boxes(boxes, image_scale,
                                                  image_info[1, :], offset)
        masks = input_utils.resize_and_crop_masks(
            tf.expand_dims(masks, axis=-1), image_scale, self._output_size,
            offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_utils.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchors.
        input_anchor = anchor.Anchor(self._min_level, self._max_level,
                                     self._num_scales, self._aspect_ratios,
                                     self._anchor_size, self._output_size)
        anchor_labeler = anchor.AnchorLabeler(input_anchor,
                                              self._match_threshold,
                                              self._unmatched_threshold)

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        labels = {
            'anchor_boxes': input_anchor.multilevel_boxes,
            'image_info': image_info,
        }
        if self._mode == ModeKeys.PREDICT_WITH_GT:
            # Converts boxes from normalized coordinates to pixel coordinates.
            groundtruths = {
                'source_id':
                data['source_id'],
                'height':
                data['height'],
                'width':
                data['width'],
                'num_detections':
                tf.shape(data['groundtruth_classes']),
                'boxes':
                box_utils.denormalize_boxes(data['groundtruth_boxes'],
                                            image_shape),
                'classes':
                data['groundtruth_classes'],
                # 'masks': tf.squeeze(masks, axis=-1),
                'areas':
                data['groundtruth_area'],
                'is_crowds':
                tf.cast(data['groundtruth_is_crowd'], tf.int32),
            }
            groundtruths['source_id'] = dataloader_utils.process_source_id(
                groundtruths['source_id'])
            groundtruths = dataloader_utils.pad_groundtruths_to_fixed_size(
                groundtruths, self._max_num_instances)
            # Computes training labels.
            (cls_targets, box_targets,
             num_positives) = anchor_labeler.label_anchors(
                 boxes, tf.cast(tf.expand_dims(classes, axis=1), tf.float32))
            # Packs labels for model_fn outputs.
            labels.update({
                'cls_targets': cls_targets,
                'box_targets': box_targets,
                'num_positives': num_positives,
                'groundtruths': groundtruths,
            })
        return {
            'images': image,
            'labels': labels,
        }
    def step_fn(self, params, model):
        """Separate implementation."""
        train_batch_size = params.train_batch_size
        num_replicas = params.num_replicas
        batch_size = train_batch_size // num_replicas

        dtypes = [
            tf.bfloat16 if params.use_bfloat16 else tf.float32, tf.float32,
            tf.bfloat16 if params.use_bfloat16 else tf.float32,
            tf.bfloat16 if params.use_bfloat16 else tf.float32
        ]
        shapes = [[batch_size, params.image_size, params.image_size, 3],
                  [batch_size, params.num_classes],
                  [
                      batch_size * params.uda_data, params.image_size,
                      params.image_size, 3
                  ],
                  [
                      batch_size * params.uda_data, params.image_size,
                      params.image_size, 3
                  ]]

        if params.use_xla_sharding and params.num_cores_per_replica > 1:
            q = tpu_feed._PartitionedInfeedQueue(
                number_of_tuple_elements=4,
                host_id=0,
                input_partition_dims=[
                    [1, 1, params.num_cores_per_replica, 1],
                    [1, 1],
                    [1, 1, params.num_cores_per_replica, 1],
                    [1, 1, params.num_cores_per_replica, 1],
                ],
                device_assignment=params.device_assignment)
            q.set_tuple_types(dtypes)
            q.set_tuple_shapes(shapes)
            l_images, l_labels, u_images_ori, u_images_aug = q.generate_dequeue_op(
            )
            l_images = xla_sharding.split(l_images, 2,
                                          params.num_cores_per_replica)
            u_images_ori = xla_sharding.split(u_images_ori, 2,
                                              params.num_cores_per_replica)
            u_images_aug = xla_sharding.split(u_images_aug, 2,
                                              params.num_cores_per_replica)
        else:
            with tf.device(tf.tpu.core(0)):
                (l_images, l_labels, u_images_ori,
                 u_images_aug) = tf.raw_ops.InfeedDequeueTuple(dtypes=dtypes,
                                                               shapes=shapes)

        all_images = tf.concat([l_images, u_images_ori, u_images_aug], axis=0)
        global_step = tf.train.get_or_create_global_step()
        num_replicas = tf.cast(params.num_replicas, tf.float32)

        with tf.variable_scope(MODEL_SCOPE, reuse=tf.AUTO_REUSE):
            _, _, masks, cross_entropy = UDA.build_uda_cross_entropy(
                params, model, all_images, l_labels)

        l2_reg_rate = tf.cast(params.weight_decay / params.num_replicas,
                              tf.float32)
        weight_dec = common_utils.get_l2_loss()
        uda_weight = params.uda_weight * tf.minimum(
            1.,
            tf.cast(global_step, tf.float32) / float(params.uda_steps))
        total_loss = (cross_entropy['u'] * uda_weight + cross_entropy['l'] +
                      weight_dec * l2_reg_rate)
        variables = tf.trainable_variables()
        gradients = tf.gradients(total_loss, variables)
        gradients = [tf.tpu.cross_replica_sum(g) for g in gradients]
        gradients, grad_norm = tf.clip_by_global_norm(gradients,
                                                      params.grad_bound)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        learning_rate, optimizer = common_utils.get_optimizer(params)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.apply_gradients(zip(gradients, variables),
                                                 global_step=global_step)

        with tf.control_dependencies([train_op]):
            ema_train_op = common_utils.setup_ema(
                params, f'{MODEL_SCOPE}/{model.name}')

        with tf.control_dependencies([ema_train_op]):
            logs = collections.OrderedDict()
            logs['global_step'] = tf.cast(global_step, tf.float32)
            logs['loss/total'] = total_loss
            logs['loss/cross_entropy'] = cross_entropy['l']
            logs['loss/lr'] = tf.identity(learning_rate) / num_replicas
            logs['loss/grad_norm'] = tf.identity(grad_norm) / num_replicas
            logs['loss/weight_dec'] = weight_dec / num_replicas

            logs['uda/cross_entropy'] = cross_entropy['u']
            logs['uda/u_ratio'] = tf.reduce_mean(masks['u']) / num_replicas
            logs['uda/l_ratio'] = tf.reduce_mean(masks['l']) / num_replicas
            logs['uda/weight'] = uda_weight / num_replicas

            tensors = [tf.expand_dims(t, axis=0) for t in logs.values()]
            self.step_info = {k: [tf.float32, [1]] for k in logs.keys()}
            outfeed_enqueue_op = tf.cond(
                common_utils.should_log(params),
                lambda: tf.raw_ops.OutfeedEnqueueTuple(inputs=tensors),
                tf.no_op)
        return outfeed_enqueue_op
Example #24
0
    def __call__(self, state):
        hparams = self.hparams
        c_layers = self.componenets_layers
        default_params = {'learning_rate': 0.001, 'gradient_clip_value': 0.0}

        if 'task_id' not in state:
            state['task_id'] = tf.placeholder(tf.int32,
                                              shape=[],
                                              name='task_id')

        task_id = state['task_id']

        assert len(c_layers[0].components) == len(c_layers[-1].components)
        task_id_one_hot = tf.cast(tf.one_hot(task_id,
                                             len(c_layers[0].components)),
                                  dtype=tf.float32)

        # Total allocation entropy.
        entropy = tf.zeros([])

        layer_id = 0
        out_paths_weights = task_id_one_hot

        # Layers execution.
        while layer_id < len(c_layers):
            if layer_id == len(c_layers) - 1:  # Last iteration
                out_paths_weights = task_id_one_hot
                num_out_paths = 1
            else:
                num_out_paths = len(c_layers[layer_id + 1].components)

            components_layer = c_layers[layer_id]
            (outputs, out_paths_weights, curr_entropy) = components_layer(
                layer_id=layer_id,
                state=state,
                active_paths_weights=out_paths_weights,
                num_out_paths=num_out_paths)
            state.update(outputs)

            entropy += curr_entropy
            layer_id += 1

        # Param util.
        def get_optimizer_params(hparams, defaults, prefix=''):
            rtn = {}
            for key in defaults:
                rtn[key] = getattr(hparams, prefix + key, defaults[key])
            return rtn

        # Task optimizer.
        task_loss = state['task_loss']
        task_optimizer_params = get_optimizer_params(hparams, default_params)
        with tf.variable_scope('global/task_optimizer'):
            all_vars = tf.trainable_variables()

            router_vars = [
                var for var in all_vars if 'router_dist' in var.name
            ]
            model_vars = [
                var for var in all_vars if 'router_dist' not in var.name
            ]

            all_vars = model_vars + router_vars

            model_optimizer_params = task_optimizer_params
            model_opt = tf.compat.v1.train.AdamOptimizer(
                model_optimizer_params['learning_rate'])

            tf.logging.info('building model optimizer with params: %s',
                            model_optimizer_params)

            # To optimize the router variables, we use the same hyperparams as
            # for the model variables, unless a corresponding hyperparam with
            # a 'router_' prefix was supplied.
            router_optimizer_params = get_optimizer_params(
                hparams, model_optimizer_params, 'router_')
            router_opt = tf.compat.v1.train.AdamOptimizer(
                router_optimizer_params['learning_rate'])

            tf.logging.info('building router optimizer with params: %s',
                            router_optimizer_params)

            all_gradients = list(
                clip_gradients(
                    zip(tf.gradients(task_loss, all_vars), all_vars),
                    task_optimizer_params['gradient_clip_value']))

            model_gradients = all_gradients[:len(model_vars)]
            router_gradients = all_gradients[len(model_vars):]

            model_train_op = model_opt.apply_gradients(
                model_gradients,
                global_step=tf.train.get_or_create_global_step())

            if router_vars:
                router_train_op = router_opt.apply_gradients(router_gradients)
                train_op = tf.group(model_train_op, router_train_op)
            else:
                train_op = model_train_op

        state['train_op'] = train_op

        # Summaries.
        tf.contrib.summary.scalar('global/entropy', entropy)
        tf.contrib.summary.scalar('global/task_loss', task_loss)

        return state
    def step_fn(self, params, model):
        """A single step for supervised learning."""

        batch_size = params.train_batch_size // params.num_replicas
        dtypes = [
            tf.bfloat16 if params.use_bfloat16 else tf.float32, tf.float32
        ]
        shapes = [[batch_size, params.image_size, params.image_size, 3],
                  [batch_size, params.num_classes]]

        if params.use_xla_sharding and params.num_cores_per_replica > 1:
            q = tpu_feed._PartitionedInfeedQueue(
                number_of_tuple_elements=2,
                host_id=0,
                input_partition_dims=[[1, 1, params.num_cores_per_replica, 1],
                                      [1, 1]],
                device_assignment=params.device_assignment)
            q.set_tuple_types(dtypes)
            q.set_tuple_shapes(shapes)
            images, labels = q.generate_dequeue_op()
            images = xla_sharding.split(images, 2,
                                        params.num_cores_per_replica)
        else:
            with tf.device(tf.tpu.core(0)):
                images, labels = tf.raw_ops.InfeedDequeueTuple(dtypes=dtypes,
                                                               shapes=shapes)

        if labels.dtype == tf.int32:
            labels = tf.one_hot(labels,
                                depth=params.num_classes,
                                dtype=tf.float32)
        global_step = tf.train.get_or_create_global_step()

        train_batch_size = tf.cast(params.train_batch_size, tf.float32)
        num_replicas = tf.cast(params.num_replicas, tf.float32)

        with tf.variable_scope(MODEL_SCOPE):
            logits = model(images, training=True)

        if 'noisy_student' in params.dataset_name.lower():
            cross_entropy = labels * tf.nn.log_softmax(logits, axis=-1)
            cross_entropy = tf.reduce_sum(-cross_entropy) / train_batch_size
        else:
            cross_entropy = tf.losses.softmax_cross_entropy(
                onehot_labels=labels,
                logits=logits,
                label_smoothing=params.label_smoothing,
                reduction=tf.losses.Reduction.SUM) / train_batch_size

        l2_reg_rate = tf.cast(params.weight_decay / params.num_replicas,
                              tf.float32)
        weight_dec = common_utils.get_l2_loss()
        total_loss = cross_entropy + weight_dec * l2_reg_rate

        variables = tf.trainable_variables()
        gradients = tf.gradients(total_loss, variables)
        gradients = [tf.tpu.cross_replica_sum(g) for g in gradients]
        gradients, grad_norm = tf.clip_by_global_norm(gradients,
                                                      params.grad_bound)

        learning_rate, optimizer = common_utils.get_optimizer(params)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        train_op = tf.cond(
            tf.math.is_finite(grad_norm), lambda: optimizer.apply_gradients(
                zip(gradients, variables), global_step=global_step), tf.no_op)
        with tf.control_dependencies(update_ops + [train_op]):
            ema_train_op = common_utils.setup_ema(
                params, f'{MODEL_SCOPE}/{model.name}')

        with tf.control_dependencies([ema_train_op]):
            logs = collections.OrderedDict()
            logs['global_step'] = tf.cast(global_step, tf.float32)
            logs['loss/total'] = total_loss
            logs['loss/weight_decay'] = weight_dec / num_replicas
            logs['loss/cross_entropy'] = cross_entropy
            logs['loss/lr'] = tf.identity(learning_rate) / num_replicas
            logs['loss/grad_norm'] = grad_norm / num_replicas

            tensors = [tf.expand_dims(t, axis=0) for t in logs.values()]
            self.step_info = {k: [tf.float32, [1]] for k in logs.keys()}
            outfeed_enqueue_op = tf.cond(
                common_utils.should_log(params),
                lambda: tf.raw_ops.OutfeedEnqueueTuple(inputs=tensors),
                tf.no_op)
        return outfeed_enqueue_op
    def train(self, data_dir, save_model_path):
        print('ready load train dataset')
        X, y = self.init_data(data_dir)
        print('success load' + str(len(y)) + 'datas')
        train_x, test_x, train_y, test_y = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=0)

        out_put = self.cnn_construct()
        predicts = tf.nn.softmax(out_put)
        predicts = tf.argmax(predicts, axis=1)
        actual_y = tf.argmax(self.y_place, axis=1)
        accuracy = tf.reduce_mean(
            tf.cast(tf.equal(predicts, actual_y), dtype=tf.float32))
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=out_put,
                                                    labels=self.y_place))
        opt = tf.train.AdamOptimizer(learning_rate=0.001)
        train_step = opt.minimize(cost)

        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            step = 0
            saver = tf.train.Saver()
            while True:
                train_index = np.random.choice(len(train_x),
                                               self.batch_size,
                                               replace=False)
                train_randx = train_x[train_index]
                train_randy = train_y[train_index]
                _, loss = sess.run(
                    [train_step, cost],
                    feed_dict={
                        self.x_place: train_randx,
                        self.y_place: train_randy,
                        self.keep_place: 0.75
                    })
                step += 1

                if step % 10 == 0:
                    test_index = np.random.choice(len(test_x),
                                                  self.batch_size,
                                                  replace=False)
                    test_randx = test_x[test_index]
                    test_randy = test_y[test_index]
                    acc = sess.run(accuracy,
                                   feed_dict={
                                       self.x_place: test_randx,
                                       self.y_place: test_randy,
                                       self.keep_place: 1.0
                                   })
                    print(step, loss)
                    if step % 50 == 0:
                        print('accuracy:' + str(acc))
                    if step % 500 == 0:
                        saver.save(sess, save_model_path, global_step=step)
                    if acc > 0.99 and step > 500:
                        saver.save(sess, save_model_path, global_step=step)
                        break
Example #27
0
  def __call__(self,
               images,
               locations,
               state_rnn,
               use_resolution,
               prev_locations=None,
               is_training=False,
               policy="learned",
               sampling_stddev=1e-5,
               stop_gradient_between_cells=False,
               stop_gradient_after_glimpse=False):
    """Builds DRAM cell.

    Args:
      images: 4-D Tensor of shape [batch, height, width, channels].
      locations: Glimpse location.
      state_rnn: Tuple of size two for the state of RNN layers.
      use_resolution: (List of Boolean of size num_resolutions) Indicates which
        resolutions to use from high (small receptive field)
        to low (wide receptive field). None indicates use all resolutions.
      prev_locations: If not None, add prev_locations to current proposed
        locations (i.e. using relative locations).
      is_training: (Boolean) To indicate training or inference modes.
      policy: (String) 'learned': uses learned policy, 'random': uses random
        policy, or 'center': uses center look policy.
      sampling_stddev: Sampling distribution standard deviation.
      stop_gradient_between_cells: (Boolean) Whether to stop the gradient
        between the classification and location sub cells of the DRAM cell.
      stop_gradient_after_glimpse: (Boolean) Whether to stop the gradient
        after the glimpse net output.
    Returns:
      logits: Model logits.
      locations: New glimpse location.
      state_rnn: Tuple of length two for the new state of RNN layers.
    """
    if self.var_list:
      reuse = True
    else:
      reuse = False

    if is_training and self.config.rnn_dropout_rate > 0:
      keep_prob = 1.0 - self.config.rnn_dropout_rate
      rnn_layers = []
      for layer in self.rnn_layers:
        rnn_layers.append(
            tf.nn.rnn_cell.DropoutWrapper(
                layer, input_keep_prob=keep_prob, output_keep_prob=keep_prob))
    else:
      rnn_layers = self.rnn_layers

    endpoints = {}
    glimpse_size = tf.cast(self.glimpse_net.glimpse_shape[0], dtype=tf.float32)
    image_size = tf.cast(tf.shape(images)[1], dtype=tf.float32)
    # Ensure glimpses within image.
    location_scale = 1. - glimpse_size / image_size
    with tf.name_scope("glimpse_network"):
      # First rnn layer (for classification).
      g, endpoints["glimpse_network"] = self.glimpse_net(
          images, locations, is_training=is_training,
          use_resolution=use_resolution)
    with tf.variable_scope("dram_cell_0", reuse=reuse):
      if stop_gradient_after_glimpse:
        input_rnn_classification = tf.stop_gradient(g)
      else:
        input_rnn_classification = g
      output_rnn0, state_rnn0 = rnn_layers[0](input_rnn_classification,
                                              state_rnn[0])

    with tf.name_scope("classification_network"):
      logits, endpoints["classification_network"] = self.classification_net(
          output_rnn0)

    # Second rnn layer (for glimpse locations).
    with tf.variable_scope("dram_cell_1", reuse=reuse):
      if stop_gradient_between_cells:
        input_rnn_location = tf.stop_gradient(output_rnn0)
      else:
        input_rnn_location = output_rnn0
      output_rnn1, state_rnn1 = rnn_layers[1](input_rnn_location, state_rnn[1])

    with tf.name_scope("emission_network"):
      locations, endpoints["emission_network"] = self.emission_net(
          output_rnn1,
          location_scale=location_scale,
          prev_locations=prev_locations,
          is_training=is_training,
          policy=policy,
          sampling_stddev=sampling_stddev)

      mean_locations = endpoints["emission_network"]["mean_locations"]
    state_rnn = (state_rnn0, state_rnn1)
    output_rnn = (output_rnn0, output_rnn1)

    endpoints["cell_outputs"] = {
        "locations": locations,
        "state_rnn": state_rnn,
        "output_rnn": output_rnn,
        "mean_locations": mean_locations,
    }

    if not reuse:
      self.collect_variables()

    return logits, endpoints
def bilinear_sampler(img, x, y):
    """
    Performs bilinear sampling of the input images according to the
    normalized coordinates provided by the sampling grid. Note that
    the sampling is done identically for each channel of the input.
    To test if the function works properly, output image should be
    identical to input image when theta is initialized to identity
    transform.
    Input
    -----
    - img: batch of images in (B, H, W, C) layout.
    - grid: x, y which is the output of affine_grid_generator.
    Returns
    -------
    - interpolated images according to grids. Same size as grid.
    """
    # prepare useful params
    B = tf.shape(img)[0]
    H = tf.shape(img)[1]
    W = tf.shape(img)[2]
    C = tf.shape(img)[3]

    max_y = tf.cast(H - 1, 'int32')
    max_x = tf.cast(W - 1, 'int32')
    zero = tf.zeros([], dtype='int32')

    # cast indices as float32 (for rescaling)
    x = tf.cast(x, 'float32')
    y = tf.cast(y, 'float32')

    # rescale x and y to [0, W/H]
    x = 0.5 * ((x + 1.0) * tf.cast(W, 'float32'))
    y = 0.5 * ((y + 1.0) * tf.cast(H, 'float32'))

    # grab 4 nearest corner points for each (x_i, y_i)
    # i.e. we need a rectangle around the point of interest
    x0 = tf.cast(tf.floor(x), 'int32')
    x1 = x0 + 1
    y0 = tf.cast(tf.floor(y), 'int32')
    y1 = y0 + 1

    # clip to range [0, H/W] to not violate img boundaries
    x0 = tf.clip_by_value(x0, zero, max_x)
    x1 = tf.clip_by_value(x1, zero, max_x)
    y0 = tf.clip_by_value(y0, zero, max_y)
    y1 = tf.clip_by_value(y1, zero, max_y)

    # get pixel value at corner coords
    Ia = get_pixel_value(img, x0, y0)
    Ib = get_pixel_value(img, x0, y1)
    Ic = get_pixel_value(img, x1, y0)
    Id = get_pixel_value(img, x1, y1)

    # recast as float for delta calculation
    x0 = tf.cast(x0, 'float32')
    x1 = tf.cast(x1, 'float32')
    y0 = tf.cast(y0, 'float32')
    y1 = tf.cast(y1, 'float32')

    # calculate deltas
    wa = (x1 - x) * (y1 - y)
    wb = (x1 - x) * (y - y0)
    wc = (x - x0) * (y1 - y)
    wd = (x - x0) * (y - y0)

    # add dimension for addition
    wa = tf.expand_dims(wa, axis=3)
    wb = tf.expand_dims(wb, axis=3)
    wc = tf.expand_dims(wc, axis=3)
    wd = tf.expand_dims(wd, axis=3)

    # compute output
    out = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])

    return out
Example #29
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
  """Model definition entry.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.

  Raises:
    RuntimeError: if both ckpt and backbone_ckpt are set.
  """
  # Convert params (dict) to Config for easier access.
  training_hooks = None
  if params['data_format'] == 'channels_first':
    features = tf.transpose(features, [0, 3, 1, 2])
  def _model_outputs(inputs):
    return model(inputs, config=hparams_config.Config(params))

  cls_outputs, box_outputs = utils.build_model_with_precision(
      params['precision'], _model_outputs, features)

  levels = cls_outputs.keys()
  for level in levels:
    cls_outputs[level] = tf.cast(cls_outputs[level], tf.float32)
    box_outputs[level] = tf.cast(box_outputs[level], tf.float32)

  # First check if it is in PREDICT mode.
  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'image': features,
    }
    for level in levels:
      predictions['cls_outputs_%d' % level] = cls_outputs[level]
      predictions['box_outputs_%d' % level] = box_outputs[level]
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  # Set up training loss and learning rate.
  update_learning_rate_schedule_parameters(params)
  global_step = tf.train.get_or_create_global_step()
  learning_rate = learning_rate_schedule(params, global_step)

  # cls_loss and box_loss are for logging. only total_loss is optimized.
  det_loss, cls_loss, box_loss, box_iou_loss = detection_loss(
      cls_outputs, box_outputs, labels, params)
  reg_l2loss = reg_l2_loss(params['weight_decay'])
  total_loss = det_loss + reg_l2loss

  if mode == tf.estimator.ModeKeys.TRAIN:
    utils.scalar('lrn_rate', learning_rate)
    utils.scalar('trainloss/cls_loss', cls_loss)
    utils.scalar('trainloss/box_loss', box_loss)
    utils.scalar('trainloss/box_iou_loss', box_iou_loss)
    utils.scalar('trainloss/det_loss', det_loss)
    utils.scalar('trainloss/reg_l2_loss', reg_l2loss)
    utils.scalar('trainloss/loss', total_loss)

  moving_average_decay = params['moving_average_decay']
  if moving_average_decay:
    ema = tf.train.ExponentialMovingAverage(
        decay=moving_average_decay, num_updates=global_step)
    ema_vars = utils.get_ema_vars()
  if params['strategy'] == 'horovod':
    import horovod.tensorflow as hvd   # pylint: disable=g-import-not-at-top
    learning_rate = learning_rate * hvd.size()
  if mode == tf.estimator.ModeKeys.TRAIN:
    if params['optimizer'].lower() == 'sgd':
      optimizer = tf.train.MomentumOptimizer(
          learning_rate, momentum=params['momentum'])
    elif params['optimizer'].lower() == 'adam':
      optimizer = tf.train.AdamOptimizer(
          learning_rate)
    else:
      raise ValueError('optimizers should be adam or sgd')

    if params['strategy'] == 'tpu':
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)
    elif params['strategy'] == 'horovod':
      optimizer = hvd.DistributedOptimizer(optimizer)
      training_hooks = [hvd.BroadcastGlobalVariablesHook(0)]

    # Batch norm requires update_ops to be added as a train_op dependency.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    var_list = tf.trainable_variables()
    if variable_filter_fn:
      var_list = variable_filter_fn(var_list)

    if params.get('clip_gradients_norm', 0) > 0:
      logging.info('clip gradients norm by %f', params['clip_gradients_norm'])
      grads_and_vars = optimizer.compute_gradients(total_loss, var_list)
      with tf.name_scope('clip'):
        grads = [gv[0] for gv in grads_and_vars]
        tvars = [gv[1] for gv in grads_and_vars]
        clipped_grads, gnorm = tf.clip_by_global_norm(
            grads, params['clip_gradients_norm'])
        utils.scalar('gnorm', gnorm)
        grads_and_vars = list(zip(clipped_grads, tvars))

      with tf.control_dependencies(update_ops):
        train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    else:
      with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(
            total_loss, global_step, var_list=var_list)

    if moving_average_decay:
      with tf.control_dependencies([train_op]):
        train_op = ema.apply(ema_vars)

  else:
    train_op = None

  eval_metrics = None
  if mode == tf.estimator.ModeKeys.EVAL:
    def metric_fn(**kwargs):
      """Returns a dictionary that has the evaluation metrics."""
      batch_size = params['batch_size']
      if params['strategy'] == 'tpu':
        batch_size = params['batch_size'] * params['num_shards']
      eval_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
      anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                             params['num_classes'])
      cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
      box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])

      if params.get('testdev_dir', None):
        logging.info('Eval testdev_dir %s', params['testdev_dir'])
        coco_metrics = coco_metric_fn(
            batch_size,
            anchor_labeler,
            params['val_json_file'],
            testdev_dir=params['testdev_dir'],
            disable_pyfun=params.get('disable_pyfun', None),
            **kwargs)
      else:
        logging.info('Eval val with groudtruths %s.', params['val_json_file'])
        coco_metrics = coco_metric_fn(batch_size, anchor_labeler,
                                      params['val_json_file'], **kwargs)

      # Add metrics to output.
      output_metrics = {
          'cls_loss': cls_loss,
          'box_loss': box_loss,
      }
      output_metrics.update(coco_metrics)
      return output_metrics

    cls_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(cls_loss, 0), [params['batch_size'],]),
        [params['batch_size'], 1])
    box_loss_repeat = tf.reshape(
        tf.tile(tf.expand_dims(box_loss, 0), [params['batch_size'],]),
        [params['batch_size'], 1])
    metric_fn_inputs = {
        'cls_loss_repeat': cls_loss_repeat,
        'box_loss_repeat': box_loss_repeat,
        'source_ids': labels['source_ids'],
        'groundtruth_data': labels['groundtruth_data'],
        'image_scales': labels['image_scales'],
    }
    add_metric_fn_inputs(params, cls_outputs, box_outputs, metric_fn_inputs)
    eval_metrics = (metric_fn, metric_fn_inputs)

  checkpoint = params.get('ckpt') or params.get('backbone_ckpt')

  if checkpoint and mode == tf.estimator.ModeKeys.TRAIN:
    # Initialize the model from an EfficientDet or backbone checkpoint.
    if params.get('ckpt') and params.get('backbone_ckpt'):
      raise RuntimeError(
          '--backbone_ckpt and --checkpoint are mutually exclusive')

    if params.get('backbone_ckpt'):
      var_scope = params['backbone_name'] + '/'
      if params['ckpt_var_scope'] is None:
        # Use backbone name as default checkpoint scope.
        ckpt_scope = params['backbone_name'] + '/'
      else:
        ckpt_scope = params['ckpt_var_scope'] + '/'
    else:
      # Load every var in the given checkpoint
      var_scope = ckpt_scope = '/'

    def scaffold_fn():
      """Loads pretrained model through scaffold function."""
      logging.info('restore variables from %s', checkpoint)

      var_map = utils.get_ckpt_var_map(
          ckpt_path=checkpoint,
          ckpt_scope=ckpt_scope,
          var_scope=var_scope,
          var_exclude_expr=params.get('var_exclude_expr', None))

      tf.train.init_from_checkpoint(checkpoint, var_map)

      return tf.train.Scaffold()
  elif mode == tf.estimator.ModeKeys.EVAL and moving_average_decay:
    def scaffold_fn():
      """Load moving average variables for eval."""
      logging.info('Load EMA vars with ema_decay=%f', moving_average_decay)
      restore_vars_dict = ema.variables_to_restore(ema_vars)
      saver = tf.train.Saver(restore_vars_dict)
      return tf.train.Scaffold(saver=saver)
  else:
    scaffold_fn = None

  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode,
      loss=total_loss,
      train_op=train_op,
      eval_metrics=eval_metrics,
      host_call=utils.get_tpu_host_call(global_step, params),
      scaffold_fn=scaffold_fn,
      training_hooks=training_hooks)
Example #30
0
X = tf.placeholder(tf.float32, shape=[None, 8])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([8, 1]), name='weight')  # 가중치
b = tf.Variable(tf.random_normal([1]), name='bias')  # 편향

hypothesis = tf.sigmoid(tf.matmul(X, W) + b)  # sigmoid 사용 matmul이 뭔지 ??

# cost / loss function
cost = -tf.reduce_mean(Y * tf.log(hypothesis) +
                       (1 - Y) * tf.log(1 - hypothesis))

train = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(cost)

# Accuracy computation
# True if hypothesis > 0.5 else False  casting
predicted = tf.cast(hypothesis > 0.5, dtype=tf.float32)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted, Y), dtype=tf.float32))

# Train the model
# Launch graph
with tf.Session() as sess:
    # Initialize TensorFlow variables
    sess.run(tf.global_variables_initializer())

    for step in range(10001):
        cost_val, _ = sess.run([cost, train], feed_dict={X: x_data, Y: y_data})
        if step % 200 == 0:
            print(step, cost_val)

    h, c, a = sess.run([hypothesis, predicted, accuracy],
                       feed_dict={