예제 #1
0
  def encode_coordinates_temporal_fn(self, net):
    """Adds one-hot encoding of coordinates to different views in the networks.

    For each "pixel" of a feature map it adds a onehot encoded x and y
    coordinates.

    Args:
      net: a tensor of shape=[batch_size, height, 8*width, num_features]#1 X 8 x 32 x 60 x 256

    Returns:
      a tensor with the same height and width, but altered feature_size.
    """
    mparams = self._mparams['encode_coordinates_fn']
    if mparams.enabled:
      print("net", net)#1, 8, 14, 28, 1088
      batch_size, t, h, w, _ = net.shape.as_list()
      x, y, t1  = tf.meshgrid(tf.range(w),tf.range(h),tf.range(t))#1, 8, 14, 28, 1088
      print(t1)#14, 8, 28
      w_loc = slim.one_hot_encoding(x, num_classes=w)
      h_loc = slim.one_hot_encoding(y, num_classes=h)
      t_loc = slim.one_hot_encoding(t1, num_classes=t)
      loc = tf.concat([t_loc, h_loc, w_loc], 3)#w,h,t,w+h+t
      loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1, 1])#bXhXwXtXsum
      loc = tf.transpose(loc, [0, 3, 1, 2, 4])#1X8XHXwX3
      return tf.concat([net, loc], 4)
    else:
      return net
예제 #2
0
 def _encode_coordinates(self, features):
     _, h, w, _ = features.shape.as_list()
     x, y = tf.meshgrid(tf.range(w), tf.range(h))
     w_loc = slim.one_hot_encoding(x, num_classes=w)
     h_loc = slim.one_hot_encoding(y, num_classes=h)
     loc = tf.concat([h_loc, w_loc], 2)
     loc = tf.tile(tf.expand_dims(loc, 0), [self.batch_size, 1, 1, 1])
     return tf.concat([features, loc], 3)
예제 #3
0
    def _init_model(self):
        '''
        init modle for train
        :return:
        '''
        # tf.set_random_seed(20)
        # with tf.Graph().as_default():

        self.global_step = slim.get_or_create_global_step()
        self.batch_data = tf.placeholder(
            dtype=tf.float32,
            shape=[None, self.input_size, self.input_size, self.input_channel],
            name='input_images')  #image
        self.input_label = tf.placeholder(dtype=tf.int64,
                                          shape=[None],
                                          name='input_labels')  #label
        self.input_pose = tf.placeholder(dtype=tf.int64,
                                         shape=[None],
                                         name='input_poses')  #pose
        self.input_light = tf.placeholder(
            dtype=tf.int64, shape=[None],
            name='input_illumination')  #illumination
        # self.index = tf.placeholder(tf.int32, None,name='input_nums')
        #mk onehot labels
        self.labels = slim.one_hot_encoding(self.input_label, self.class_nums)
        self.pose = slim.one_hot_encoding(self.input_pose,
                                          self.pose_c)  #pose code pose label
        self.pose_reverse = tf.concat(tf.split(self.pose, 2, axis=0)[::-1],
                                      axis=0)
        self.light = slim.one_hot_encoding(self.input_light, self.light_c)
        self.light_reverse = tf.concat(tf.split(self.light, 2, axis=0)[::-1],
                                       axis=0)
        # self.noise = tf.random_uniform(shape=(self.index,1,1,self.noise_z),minval=-1,maxval=1,dtype=tf.float32,name='input_noise')
        # self.noise_reverse = tf.concat(tf.split(self.noise, 2, axis=0)[::-1], axis=0)
        #comput loss
        self._predict_drgan_multipie()

        self._loss_gan_multipie()
        self._loss_compute()

        #pre 1e-3 adv 1e-3 id_p 3e-3 pixel 1 tv 1e-4

        self.summary_train = tf.summary.merge_all()
        #select var list
        train_vars = tf.trainable_variables()
        self.varsg = [var for var in train_vars if 'generator' in var.name]
        self.varsd = [var for var in train_vars if 'discriminator' in var.name]
        self.fc_add = [
            var for var in train_vars if 'recognation_fc' in var.name
        ]

        self.vard_fr = [var for var in train_vars if 'resnet_yd' in var.name]
        # self.init_vars=self.vard_fr+self.varsd+self.varsg+self.fc_add
        self.init_vars = self.vard_fr
        # self.var_total=self.varsg+self.varsd+self.vard_fr
        # self.varsd = self.varsd+self.vard_fr+self.fc_add###finetu fr net??

        self._get_train_op(self.global_step)
예제 #4
0
def build_model(x,
                y,
                num_classes=2,
                num_estimator=32,
                subsample=0.25,
                is_training=True,
                reuse=None):
    """
	 handle model. calculate the loss and the prediction for some input x and the corresponding labels y
	 input: x shape=[None,bands,frames,num_channels], y shape=[None]
	 output: loss shape=(1), prediction shape=[None]

        """
    #preprocess
    y = slim.one_hot_encoding(y, num_classes)

    loss = 0
    predictions = y * 0
    batch_size = x.get_shape()[0].value

    #models
    for i in range(num_estimator):
        #sample from minibatch - instead of bootstrap / TODO something better?
        idx = np.random.randint(batch_size,
                                size=(int(round(batch_size * subsample)), ))
        bx = tf.gather(x, idx)
        by = tf.gather(y, idx)

        logits = classify(bx,
                          num_estimator=num_estimator,
                          num_classes=num_classes,
                          is_training=is_training,
                          reuse=reuse,
                          scope='H%d' % i)
        loss += loss_fkt(logits, by)

        #majority vote
        if not is_training:
            logits = classify(x,
                              num_estimator=num_estimator,
                              num_classes=num_classes,
                              is_training=is_training,
                              reuse=True,
                              scope='H%d' % i)
            predictions += slim.one_hot_encoding(
                tf.argmax(slim.softmax(logits), 1), num_classes)

    predictions = tf.argmax(predictions, 1)

    return loss, predictions
예제 #5
0
    def __init__(self, lr, s_size, a_size):

        #These lines established the feed-forward part of the network.
        # The agent takes a state and produces an action.

        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        state_in_OH = slim.one_hot_encoding(self.state_in, s_size)

        output = slim.fully_connected(
            state_in_OH,
            a_size,
            biases_initializer=None,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.ones_initializer())

        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, 0)

        #The next six lines establish the training procedure.
        # We feed the reward and chosen action into the network
        #to compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
예제 #6
0
def get_batch_data():
    dataset = dataset_factory.get_dataset(train_config['dataset_name'],
                                          train_config['dataset_split_name'],
                                          train_config['dataset_dir'])

    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        num_readers=train_config['num_readers'],
        common_queue_capacity=20 * train_config['batch_size'],
        common_queue_min=10 * train_config['batch_size'])

    image_preprocessing_name = train_config[
        'preprocessing_name'] or train_config['model_name']
    image_preprocessing_fn = preprocessing_factory.get_preprocessing(
        image_preprocessing_name, is_training=True)

    [image, label] = provider.get(['image', 'label'])
    label -= train_config['labels_offset']
    train_image_size = train_config['train_image_size']
    print('train image size is : ', train_image_size)
    image = image_preprocessing_fn(image, train_image_size, train_image_size)
    images, labels = tf.train.batch(
        [image, label],
        batch_size=train_config['batch_size'],
        num_threads=train_config['num_preprocessing_threads'],
        capacity=5 * train_config['batch_size'])
    labels = slim.one_hot_encoding(
        labels, dataset.num_classes - train_config['labels_offset'])
    batch_queue = slim.prefetch_queue.prefetch_queue(
        [images, labels], capacity=2 * train_config['num_clones'])
    image_batch, label_batch = batch_queue.dequeue()
    return image_batch, label_batch, dataset
예제 #7
0
def getImageBatchAndOneHotLabels(dataset_dir, dataset_name, num_readers,
                                 num_preprocessing_threads, batch_size):
    '''
    :param dataset_dir: directory where the tfrecord files are stored
    :param dataset_name: name of the dataset e.g. train / validation
    :return:
    '''
    dataset = imagenet.get_split(dataset_name, dataset_dir)
    # DataSetProvider on CPU
    with tf.device('/device:CPU:0'):
        # ------- Dataset Provider ---------
        provider_train = slim.dataset_data_provider.DatasetDataProvider(
            dataset,
            num_readers=num_readers,
            common_queue_capacity=2 * batch_size,
            common_queue_min=batch_size)
        [image, label] = provider_train.get(['image', 'label'])

    # Preprocessing of Dataset
    train_image_size = alexnet.alexnet_v2.default_image_size
    image = alexnet_preprocessing.preprocess_image(image, train_image_size,
                                                   train_image_size)

    # Generate Batches
    images, labels = tf.train.batch([image, label],
                                    batch_size=batch_size,
                                    num_threads=num_preprocessing_threads,
                                    capacity=5 * batch_size)
    labels = slim.one_hot_encoding(labels, dataset.num_classes)
    return dataset, images, labels
def det_net_loss(seg_masks_in,
                 reg_masks_in,
                 seg_preds,
                 reg_preds,
                 reg_loss_weight=10.0,
                 epsilon=1e-5):

    with tf.variable_scope('loss'):
        out_size = seg_preds.get_shape()[1:3]
        seg_masks_in_ds = tf.image.resize_images(
            seg_masks_in[:, :, :, tf.newaxis], out_size[0], out_size[1],
            tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        reg_masks_in_ds = tf.image.resize_images(
            reg_masks_in, out_size[0], out_size[1],
            tf.image.ResizeMethod.NEAREST_NEIGHBOR)

        # segmentation loss
        seg_masks_onehot = slim.one_hot_encoding(seg_masks_in_ds[:, :, :, 0],
                                                 2)
        seg_loss = -tf.reduce_mean(
            seg_masks_onehot * tf.log(seg_preds + epsilon))

        # regression loss
        mask = tf.to_float(seg_masks_in_ds)
        reg_loss = tf.reduce_sum(mask * (reg_preds - reg_masks_in_ds)**2)
        reg_loss = reg_loss / (tf.reduce_sum(mask) + 1.0)

    return seg_loss + reg_loss_weight * reg_loss
예제 #9
0
def model_fn(inputs, mode, **kwargs):
    # In train or eval, id_or_labels represents labels. In predict, id_or_labels represents id.
    images, id_or_labels, angles = inputs
    # Reshape angles from [batch_size] to [batch_size, 1]
    angles = tf.expand_dims(angles, 1)
    # Apply your version of model
    logits = model_v1(images, angles, mode)

    if mode == mox.ModeKeys.PREDICT:
        logits = tf.nn.softmax(logits)
        # clip logits to get lower loss value.
        logits = tf.clip_by_value(logits,
                                  clip_value_min=0.05,
                                  clip_value_max=0.95)
        model_spec = mox.ModelSpec(output_info={
            'id': id_or_labels,
            'logits': logits
        })
    elif mode == mox.ModeKeys.EXPORT:
        predictions = tf.nn.softmax(logits)
        export_spec = mox.ExportSpec(inputs_dict={
            'images': images,
            'angles': angles
        },
                                     outputs_dict={'predictions': predictions},
                                     version='model')
        model_spec = mox.ModelSpec(export_spec=export_spec)
    else:
        labels_one_hot = slim.one_hot_encoding(id_or_labels, 2)
        loss = tf.losses.softmax_cross_entropy(logits=logits,
                                               onehot_labels=labels_one_hot,
                                               label_smoothing=0.0,
                                               weights=1.0)
        model_spec = mox.ModelSpec(loss=loss, log_info={'loss': loss})
    return model_spec
예제 #10
0
    def __init__(self, lr, state_dim, action_dim):
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        # one hot encoding of state:
        state_enc = slim.one_hot_encoding(self.state_in, state_dim)
        output = slim.fully_connected(
            state_enc,
            action_dim,
            biases_initializer=None,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.ones_initializer())

        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, 0)

        # Training pipeline
        self.reward = tf.placeholder(name='reward',
                                     shape=[1],
                                     dtype=tf.float32)
        self.action = tf.placeholder(name='action', shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action, [1])

        self.loss = -tf.log(self.responsible_weight) * self.reward

        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)

        self.update = optimizer.minimize(self.loss)
예제 #11
0
    def compute_loss(self):
        image, label = self.get_image_labels()
        with tf.device("/device:GPU:0"):
            with tf.name_scope("batching"):
                images, labels = tf.train.batch(
                    [image, label],
                    batch_size=FLAGS.batch_size,
                    num_threads=FLAGS.num_batching_threads,
                    capacity=FLAGS.batch_queue_size * FLAGS.batch_size,
                    shapes=[image.get_shape(), []])
                labels = slim.one_hot_encoding(labels, self.dataset.num_classes)

            # summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))
            logits, end_points = self.network_fn(images)#, reuse=gpu_idx!=0)

            correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
            acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            tf.summary.scalar("train-accuracy", acc)

            tf.losses.softmax_cross_entropy(labels, logits)
            losses = tf.get_collection(tf.GraphKeys.LOSSES, None)# not sure None is necessary
            #Ignoring other types of losses...
            loss = tf.add_n(losses, name="loss")
            tf.summary.scalar("loss", loss)
        return loss
예제 #12
0
    def model_fn(inputs, mode):
        images, labels = inputs

        # 获取一个resnet50的模型,输入images,输入logits和end_points,这里不关心end_points,仅取logits
        logits, _ = mox.get_model_fn(name='resnet_v1_50',
                                     run_mode=mode,
                                     num_classes=data_meta.num_classes,
                                     weight_decay=0.00004)(images)

        # 计算交叉熵损失值
        labels_one_hot = slim.one_hot_encoding(labels, data_meta.num_classes)
        loss = tf.losses.softmax_cross_entropy(logits=logits,
                                               onehot_labels=labels_one_hot)

        # 获取正则项损失值,并加到loss上,这里必须要用mox.get_collection代替tf.get_collection
        regularization_losses = mox.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        regularization_loss = tf.add_n(regularization_losses)
        loss = loss + regularization_loss

        # 计算分类正确率
        accuracy = tf.reduce_mean(
            tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32))

        # 返回MoXing-TensorFlow用于定义模型的类ModelSpec
        return mox.ModelSpec(loss=loss,
                             log_info={
                                 'loss': loss,
                                 'accuracy': accuracy
                             })
    def __init__(self, lr, s_size, a_size):
        ##Setting up the Agent
        #These two lines established the feed-forward part of the network. This does the actual choosing.
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        state_in_OH = slim.one_hot_encoding(
            self.state_in, s_size
        )  #tf-slim, https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim  (simpler setup of tf models)  #one hot encoder: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html
        output = slim.fully_connected(
            state_in_OH,
            a_size,
            biases_initializer=None,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.ones_initializer())
        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, 0)

        #The next six lines establish the training proceedure. We feed the reward and chosen action into the network
        #to compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
예제 #14
0
def build_model(x,
                y,
                num_classes=2,
                is_training=True,
                num_estimator=None,
                num_filter=None,
                reuse=None):
    """
	 handle model. calculate the loss and the prediction for some input x and the corresponding labels y
	 input: x shape=[None,bands,frames,num_channels], y shape=[None]
	 output: loss shape=(1), prediction shape=[None]

	CAUTION! controller.py uses a function whith this name and arguments.
        """
    #preprocess
    y = slim.one_hot_encoding(y, num_classes)
    print('input: ', x.get_shape())
    #model
    logits = RNN_deepcough(x,
                           num_outputs=num_classes,
                           reuse=reuse,
                           is_training=is_training)

    #results
    loss = tf.reduce_mean(softmax_cross_entropy(logits=logits,
                                                onehot_labels=y))
    predictions = tf.argmax(slim.softmax(logits), 1)

    return loss, predictions
예제 #15
0
def _build_model(inputs_queue, clone_batch_size):
    """Builds a clone of train model.

  Args:
    inputs_queue: A prefetch queue for images and labels.
  Returns:
    A dictionary of logits names to logits.
  """
    samples = inputs_queue.dequeue()
    batch_size = clone_batch_size * FLAGS.num_classes
    inputs = tf.identity(samples['image'], name='image')
    labels = tf.identity(samples['label'], name='label')
    model_options = common.ModelOptions(output_stride=FLAGS.output_stride)
    net, end_points = model.get_features(
        inputs,
        model_options=model_options,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)
    logits, _ = model.classification(net,
                                     end_points,
                                     num_classes=FLAGS.num_classes,
                                     is_training=True)
    if FLAGS.multi_label:
        with tf.name_scope('Multilabel_logits'):
            logits = slim.softmax(logits)
            half_batch_size = batch_size / 2
            for i in range(1, FLAGS.num_classes):
                class_logits = tf.identity(logits[:, i],
                                           name='class_logits_%02d' % (i))
                class_labels = tf.identity(labels[:, i],
                                           name='class_labels_%02d' % (i))
                num_positive = tf.reduce_sum(class_labels)
                num_negative = batch_size - num_positive
                weights = tf.where(
                    tf.equal(class_labels, 1.0),
                    tf.tile([half_batch_size / num_positive], [batch_size]),
                    tf.tile([half_batch_size / num_negative], [batch_size]))
                train_utils.focal_loss(class_labels,
                                       class_logits,
                                       weights=weights,
                                       scope='class_loss_%02d' % (i))
    else:
        logits = slim.softmax(logits)
        train_utils.focal_loss(labels, logits, scope='cls_loss')

    if (FLAGS.dataset == 'protein') and FLAGS.add_counts_logits:
        counts = tf.identity(samples['counts'] - 1, name='counts')
        one_hot_counts = slim.one_hot_encoding(counts, 5)
        counts_logits, _ = model.classification(net,
                                                end_points,
                                                num_classes=5,
                                                is_training=True,
                                                scope='Counts_logits')
        counts_logits = slim.softmax(counts_logits)
        train_utils.focal_loss(one_hot_counts,
                               counts_logits,
                               scope='counts_loss')
        return logits, counts_logits
    return logits
예제 #16
0
    def __init__(self, lr, s_size, a_size):
        # lr : learning rate
        # s_size : state size
        # a_size : action size
        # The agent input the state, and then return action

        # 2-1) Input, output 요소 구현 (Neural network)
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        state_in_OH = slim.one_hot_encoding(
            self.state_in, s_size)  # params: (label, size of OH).
        # 가령 '1', '2', '3' 밴딧이 있고, '2'를 골랐다면, self.state_in : '2', s_size : 3.
        # Return example: '2' --> 0 1 0 이런식으로 input을 one hot encoding 됨.
        # Input이 단순히 3 이런 식이 아니라, 신경망의 output과 연결 weights들의 수를 충분히 하기 위해 OH 수행.

        output = slim.fully_connected(state_in_OH, a_size, # input output shape
                                      biases_initializer=None, activation_fn=tf.nn.sigmoid,\
                                      weights_initializer=tf.ones_initializer())
        self.output = tf.reshape(output, [-1])  # 가로로 결과 쫙 피기
        # !!!output, self.output type, shape 확인해보기 (텐서?)!!!

        self.chosen_action = tf.argmax(self.output, 0)  # <-- 선택한 액션.

        # 2-2) 학습 과정 신경망 구현(Neural network)
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])  # self.output (액션에 대한 확률) 가운데,
        # self.action_holder에 담긴 값 추출
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
예제 #17
0
파일: model.py 프로젝트: 812864539/models
  def char_predictions(self, chars_logit):
    """Returns confidence scores (softmax values) for predicted characters.

    Args:
      chars_logit: chars logits, a tensor with shape
        [batch_size x seq_length x num_char_classes]

    Returns:
      A tuple (ids, log_prob, scores), where:
        ids - predicted characters, a int32 tensor with shape
          [batch_size x seq_length];
        log_prob - a log probability of all characters, a float tensor with
          shape [batch_size, seq_length, num_char_classes];
        scores - corresponding confidence scores for characters, a float
        tensor
          with shape [batch_size x seq_length].
    """
    log_prob = utils.logits_to_log_prob(chars_logit)
    ids = tf.to_int32(tf.argmax(log_prob, axis=2), name='predicted_chars')
    mask = tf.cast(
      slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool)
    all_scores = tf.nn.softmax(chars_logit)
    selected_scores = tf.boolean_mask(all_scores, mask, name='char_scores')
    scores = tf.reshape(selected_scores, shape=(-1, self._params.seq_length))
    return ids, log_prob, scores
    def __init__(self, lr, s_size, a_size):
        # c state_in in agent class: placeholder for state as input data
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)

        # c state_in_OH in agent class: one hot encoded version of state
        state_in_OH = slim.one_hot_encoding(self.state_in, s_size)

        # You find value (output) about action based on weight
        output = slim.fully_connected(
            state_in_OH,
            a_size,
            biases_initializer=None,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.ones_initializer())

        # You reshape output into (4,)
        self.output = tf.reshape(output, [-1])
        # You choose one action by choosing highest value from self.output
        self.chosen_action = tf.argmax(self.output, 0)

        # Following 6 lines proceed step of training
        # You send reward and chosen action into network,
        # then, you find loss,
        # then, you update network based on loss
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])
        # You use cross entropy loss function
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
    def __init__(
            self, lr, s_size, a_size
    ):  #learning rate, number of bandits, number of arms per bandit
        #These lines established the feed-forward part of the network.
        #The agent takes a state and produces an action.
        self.state_in = tf.placeholder(
            shape=[1], dtype=tf.int32
        )  #Environment state input. In this case, the active bandit
        self.state_in_OH = slim.one_hot_encoding(self.state_in, s_size)
        print(self.state_in, "OH: ", self.state_in_OH)

        output = slim.fully_connected(  #Tensor("fully_connected/Sigmoid:0", shape=(1, 4), dtype=float32)
            self.state_in_OH,
            a_size,
            biases_initializer=None,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.ones_initializer())
        self.output = tf.reshape(
            output, [-1])  #Tensor("Reshape:0", shape=(4,), dtype=float32)
        print(self.output)
        self.chosen_action = tf.argmax(self.output, 0)

        #The next six lines establish the training procedure.
        #We feed the reward and chosen action into the network to
        #compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
예제 #20
0
    def __init__(self, lr, s_size, a_size):
        """
        :param lr: learning rate
        :param s_size: number of states
        :param a_size: number of actions
        """

        " Feed-forward part : input-current state / output - action"
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        state_in_OH = slim.one_hot_encoding(self.state_in, s_size)
        # output : a single layer neural network
        #
        output = slim.fully_connected(
            state_in_OH,
            a_size,
            biases_initializer=None,
            activation_fn=tf.nn.sigmoid,
            weights_initializer=tf.ones_initializer())
        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, axis=0)

        " Training Procedure "
        # feed the reward and chosen action into network to compute the loss
        # then, use the loss to update the network
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder,
                                           [1])
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
예제 #21
0
def _multi_class_l2_loss(pred, labels, EPS=1e-12):
    num_classes = 2000
    labels = slim.one_hot_encoding(labels, num_classes)
    losses = tf.squared_difference(pred, labels)
    loss = tf.reduce_mean(losses) * num_classes
    slim.losses.add_loss(loss)
    return loss
    def __init__(self, lr, s_size, a_size):
        #These lines established the feed-forward part of the network. The agent takes a state and produces an action.
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        state_in_OH = slim.one_hot_encoding(self.state_in, s_size)

        # Сейчас задан один полносвязный слой
        # output = slim.fully_connected(state_in_OH,a_size,\
        # 	biases_initializer=None,activation_fn=tf.nn.sigmoid,weights_initializer=tf.ones_initializer())

        # output = slim.fully_connected(state_in_OH,a_size,\
        # 	biases_initializer=None,activation_fn=None,weights_initializer=tf.ones_initializer())

        # Я хочу добавить второй полносвязный слой с, допустим, 32-мя вершинами
        # Правильно ли я это делаю и почему эффективность падает?
        output = slim.fully_connected(state_in_OH,3,\
         biases_initializer=None,activation_fn=None,weights_initializer=tf.ones_initializer())
        output = slim.fully_connected(output,a_size,\
         biases_initializer=None,activation_fn=tf.nn.sigmoid,weights_initializer=tf.ones_initializer())

        output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(output, 0)

        #The next six lines establish the training proceedure. We feed the reward and chosen action into the network
        #to compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(output, self.action_holder, [1])
        #self.responsible_weight = tf.slice(output,tf.cast(tf.reshape(self.chosen_action, [1]), dtype=tf.int32),[1])
        #self.responsible_weight = output[self.chosen_action]
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
예제 #23
0
def build_model(x, 
		y,
	        num_classes=2,
                is_training=True,
		reuse=None
		):
        """
	 handle model. calculate the loss and the prediction for some input x and the corresponding labels y
	 input: x shape=[None,bands,frames,num_channels], y shape=[None]
	 output: loss shape=(1), prediction shape=[None]

	CAUTION! controller.py uses a function whith this name and arguments.
        """
	#preprocess
        y = slim.one_hot_encoding(y, num_classes)

	#model
        with slim.arg_scope(densenet_arg_scope(is_training)): 
             x = tf.expand_dims(x, -1) 
             logits = densenet(x, num_classes, reuse=reuse)

	#results
        loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits = logits, onehot_labels = y)) 
        predictions = tf.argmax(slim.softmax(logits),1)

        return loss, predictions 	
    def __init__(self, learningRate, numberOfStates, numberOfActions):

        with tf.name_scope('input'):
            self.state_in = tf.placeholder(shape=[1],
                                           dtype=tf.int32,
                                           name='state_in')
            state_in_OH = slim.one_hot_encoding(self.state_in, numberOfStates)
            self.reward_holder = tf.placeholder(shape=[1],
                                                dtype=tf.float32,
                                                name='reward')

        outputVector = slim.fully_connected(state_in_OH, numberOfActions,\
            biases_initializer=None,activation_fn=tf.nn.sigmoid,\
                weights_initializer=tf.ones_initializer(), scope='layer1')

        with tf.name_scope('output'):
            self.action_holder = tf.placeholder(shape=[1],
                                                dtype=tf.int32,
                                                name='action')
            self.output = tf.reshape(outputVector, [-1])
            self.selected_output = tf.slice(self.output, self.action_holder,
                                            [1])
            self.chosen_action = tf.argmax(
                self.output, 0)  #index of the largest value in output

        with tf.name_scope('calculations'):
            self.loss = -(tf.log(self.selected_output) * self.reward_holder)
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learningRate)
            self.update = optimizer.minimize(self.loss)
예제 #25
0
def build_model(x, 
		y,
	        num_classes=2,
                num_estimator=None, #we missuse num_estimator for the number of convolutions
                num_filter=16,
                is_training=True,
		reuse=None
		):
	"""
	 handle model. calculate the loss and the prediction for some input x and the corresponding labels y
	 input: x shape=[None,bands,frames,num_channels], y shape=[None]
	 output: loss shape=(1), prediction shape=[None]

	CAUTION! controller.py uses a function whith this name and arguments.
	"""
	#preprocess
	y = slim.one_hot_encoding(y, num_classes)
	#model
	logits = classify(x, num_classes=num_classes, num_filter=num_filter, route=num_estimator, is_training=is_training, reuse=reuse)	

	#results
	loss = tf.reduce_mean(softmax_cross_entropy(logits = logits, onehot_labels = y)) 
	predictions = tf.argmax(slim.softmax(logits),1)

	return loss, predictions 	
예제 #26
0
def train():

    with tf.Graph().as_default():
        tf.logging.set_verbosity(tf.logging.INFO)
        
        dataset = flowers.get_split('train', flowers_data_dir)
        images, _, labels = load_batch(dataset)
      
        # Create the model:
        logits ,_= squeezenet.squeezenet(images, num_classes=dataset.num_classes, is_training=True)
     
        # Specify the loss function:
        one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)
        slim.losses.softmax_cross_entropy(logits, one_hot_labels)
        total_loss = slim.losses.get_total_loss()
    
        # Create some summaries to visualize the training process:
        tf.summary.scalar('losses/Total Loss', total_loss)
      
        # Specify the optimizer and create the train op:
        optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
        train_op = slim.learning.create_train_op(total_loss, optimizer)
    
        # Run the training:
        final_loss = slim.learning.train(
          train_op,
          logdir=train_dir,
          number_of_steps=100, # For speed, we just do 1 epoch
          save_interval_secs=600,
          save_summaries_secs=6000,
          log_every_n_steps =1,)
      
        print('Finished training. Final batch loss %d' % final_loss)
예제 #27
0
    def char_predictions(self, chars_logit):
        """Returns confidence scores (softmax values) for predicted characters.

    Args:
      chars_logit: chars logits, a tensor with shape
        [batch_size x seq_length x num_char_classes]

    Returns:
      A tuple (ids, log_prob, scores), where:
        ids - predicted characters, a int32 tensor with shape
          [batch_size x seq_length];
        log_prob - a log probability of all characters, a float tensor with
          shape [batch_size, seq_length, num_char_classes];
        scores - corresponding confidence scores for characters, a float
        tensor
          with shape [batch_size x seq_length].
    """
        log_prob = utils.logits_to_log_prob(chars_logit)
        ids = tf.to_int32(tf.argmax(log_prob, axis=2), name="predicted_chars")
        mask = tf.cast(
            slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool)
        all_scores = tf.nn.softmax(chars_logit)
        selected_scores = tf.boolean_mask(all_scores, mask, name="char_scores")
        scores = tf.reshape(selected_scores,
                            shape=(-1, self._params.seq_length))
        return ids, log_prob, scores
예제 #28
0
def build_model(x, 
		y,
	        num_classes=2,
		num_estimator=10,
                is_training=True,
		reuse=None
		):
        """
	 handle model. calculate the loss and the prediction for some input x and the corresponding labels y
	 input: x shape=[None,bands,frames,num_channels], y shape=[None]
	 output: loss shape=(1), prediction shape=[None]

	CAUTION! controller.py uses a function whith this name and arguments.

	here we do boosting without additive training

        """
        #preprocess
        y = slim.one_hot_encoding(y, num_classes)

        #model	
        logits = 0 
        offset = 30 // num_estimator
        for i in range(num_estimator):
                #x = tf.image.crop_to_bounding_box(x, 0, offset * i, 16, 16)
                predictions, gamma = classify(x, num_estimator=num_estimator, num_classes=num_classes, is_training=is_training, reuse=reuse, scope='c%d'%i)
                zeta = gamma * 2 / (i+1) 
                logits = (1-zeta) * logits + zeta * predictions
    

        #results
        loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits = logits, onehot_labels = y, label_smoothing=0.05)) 
        predictions = tf.argmax(slim.softmax(logits),1)

        return loss, predictions 	
예제 #29
0
    def __init__(self, lr, s_size, a_size, banditos):
        tf.reset_default_graph()
        self.bandits = banditos
        self.state_in = tf.placeholder(shape=[1], dtype=tf.int32)
        self.state_in_one_hot = slim.one_hot_encoding(self.state_in, s_size)
        output = slim.fully_connected(self.state_in_one_hot, a_size,
                                      biases_initializer=None,
                                      activation_fn=tf.nn.sigmoid,
                                      weights_initializer=tf.ones_initializer())

        self.output = tf.reshape(output, [-1])
        self.chosen_action = tf.argmax(self.output, 0)

        # The next six lines establish the training proceedure.
        # We feed the reward and chosen action into the network
        # to compute the loss, and use it to update the network.
        self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32)
        self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32)
        self.responsible_weight = tf.slice(self.output, self.action_holder, [1])
        self.loss = -(tf.log(self.responsible_weight) * self.reward_holder)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        self.update = optimizer.minimize(self.loss)
        self.weights = None
        self.session = None
        self.optimal_weight = None
        self.weights = tf.trainable_variables()[0]
        self.init = tf.global_variables_initializer()
예제 #30
0
def build_model(
        x,
        y,
        num_classes=2,
        num_estimator=3,  #we missuse num_estimator for the number of convolutions
        num_filter=128,
        is_training=True,
        reuse=None):
    """
	 handle model. calculate the loss and the prediction for some input x and the corresponding labels y
	 input: x shape=[None,bands,frames,num_channels], y shape=[None]
	 output: loss shape=(1), prediction shape=[None]

	CAUTION! controller.py uses a function whith this name and arguments.

	here we do boosting without additive training

        """
    #preprocess
    y = slim.one_hot_encoding(y, num_classes)

    #model
    with tf.variable_scope('model_v1'):
        predictions = classify(x,
                               num_classes=num_classes,
                               num_filter=num_filter,
                               route=num_estimator,
                               is_training=is_training,
                               reuse=reuse,
                               scope='wk')
        loss = loss_fkt(predictions, y)

    #results
    predictions = tf.argmax(slim.softmax(predictions), 1)
    return loss, predictions
예제 #31
0
    def _init_model(self):
        '''
        init modle for train
        :return:
        '''
        # tf.set_random_seed(20)
        # with tf.Graph().as_default():

        self.global_step = slim.get_or_create_global_step()
        self.batch_data = tf.placeholder(dtype=tf.float32,shape=[None,self.input_size,self.input_size,self.input_channel],name='input_images')#image
        self.batch_label = tf.placeholder(dtype= tf.int64,shape=[None],name='input_labels')#label
       #mk onehot labels
        self.labels = slim.one_hot_encoding(self.batch_label,self.class_nums)
        #comput loss
        self.softmax_real,self.logits,self.fc=nets.inference_recognition(self.batch_data,self.class_nums)
        self.loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
            labels=self.labels,logits=self.logits
        ))
        lo=tf.summary.scalar('train/pre_loss',self.loss)
        reshape_R = tf.reshape(self.softmax_real, [-1, self.class_nums])
        max_r = tf.argmax(reshape_R, 1)
        self.predict_rate = tf.equal(max_r, self.batch_label)
        self.accuracy_r = tf.reduce_mean(tf.cast(self.predict_rate, tf.float32))
        acc=tf.summary.scalar('train/pre_rate',self.accuracy_r )
        self.summary_train = tf.summary.merge([lo,acc])

        train_vars = tf.trainable_variables()
        self.fc_add = [var for var in train_vars if 'recognition_fc' in var.name]
        self.vard_fr= [var for var in train_vars if 'resnet_yd' in var.name]
        self.init_vars=self.vard_fr
        self.var_all=self.vard_fr+self.fc_add
        train_optimizer=tf.train.MomentumOptimizer(learning_rate=0.0001,momentum=0.99,name='optimizer')
        self.train_op=train_optimizer.minimize(self.loss,var_list=self.var_all,global_step=self.global_step)
예제 #32
0
  def char_one_hot(self, logit):
    """Creates one hot encoding for a logit of a character.

    Args:
      logit: A tensor with shape [batch_size, num_char_classes].

    Returns:
      A tensor with shape [batch_size, num_char_classes]
    """
    prediction = tf.argmax(logit, axis=1)
    return slim.one_hot_encoding(prediction, self._params.num_char_classes)
예제 #33
0
파일: model.py 프로젝트: 812864539/models
  def encode_coordinates_fn(self, net):
    """Adds one-hot encoding of coordinates to different views in the networks.

    For each "pixel" of a feature map it adds a onehot encoded x and y
    coordinates.

    Args:
      net: a tensor of shape=[batch_size, height, width, num_features]

    Returns:
      a tensor with the same height and width, but altered feature_size.
    """
    mparams = self._mparams['encode_coordinates_fn']
    if mparams.enabled:
      batch_size, h, w, _ = net.shape.as_list()
      x, y = tf.meshgrid(tf.range(w), tf.range(h))
      w_loc = slim.one_hot_encoding(x, num_classes=w)
      h_loc = slim.one_hot_encoding(y, num_classes=h)
      loc = tf.concat([h_loc, w_loc], 2)
      loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
      return tf.concat([net, loc], 3)
    else:
      return net
예제 #34
0
def get_data(dataset,
             batch_size,
             augment=False,
             central_crop_size=None,
             shuffle_config=None,
             shuffle=True):
  """Wraps calls to DatasetDataProviders and shuffle_batch.

  For more details about supported Dataset objects refer to datasets/fsns.py.

  Args:
    dataset: a slim.data.dataset.Dataset object.
    batch_size: number of samples per batch.
    augment: optional, if True does random image distortion.
    central_crop_size: A CharLogittuple (crop_width, crop_height).
    shuffle_config: A namedtuple ShuffleBatchConfig.
    shuffle: if True use data shuffling.

  Returns:

  """
  if not shuffle_config:
    shuffle_config = DEFAULT_SHUFFLE_CONFIG

  provider = slim.dataset_data_provider.DatasetDataProvider(
      dataset,
      shuffle=shuffle,
      common_queue_capacity=2 * batch_size,
      common_queue_min=batch_size)
  image_orig, label = provider.get(['image', 'label'])

  image = preprocess_image(
      image_orig, augment, central_crop_size, num_towers=dataset.num_of_views)
  label_one_hot = slim.one_hot_encoding(label, dataset.num_char_classes)

  images, images_orig, labels, labels_one_hot = (tf.train.shuffle_batch(
      [image, image_orig, label, label_one_hot],
      batch_size=batch_size,
      num_threads=shuffle_config.num_batching_threads,
      capacity=shuffle_config.queue_capacity,
      min_after_dequeue=shuffle_config.min_after_dequeue))

  return InputEndpoints(
      images=images,
      images_orig=images_orig,
      labels=labels,
      labels_one_hot=labels_one_hot)
예제 #35
0
def get_data(dataset,
             model_name,
             batch_size = 32,
             shuffle_config = None,
             shuffle=None,
             is_training=True,
             height=0,
             width=0):
    """return input data for Model input
    Args:
        dataset: a slim Dataset object.
        model_name: specify Network.
        shuffle_config: a namedtuple to control shuffle queue.
         fields: {queue_capacity, num_batching_threads, min_after_dequeue}.
        shuffle: control data provider whether shuffle.
        is_training: if Ture preprocess image for train.
        width: excepted resized width
        height: excepted resized height
    """
    if not shuffle_config:
        shuffle_config = DEFAULT_SHUFFLE_CONFIG
    provider = slim.dataset_data_provider.DatasetDataProvider(
        dataset,
        shuffle=shuffle,
        common_queue_capacity = 2 * batch_size,
        common_queue_min = batch_size
    )
    [image_orig, label] = provider.get(['image', 'label'])
    tf.summary.image('image_org', tf.expand_dims(image_orig, 0))
    tf.summary.scalar('label_orig', label)
    preprocessing_fn = preprocessing_factory.get_preprocessing(model_name)
    image = preprocessing_fn(image_orig,
                          width,
                          height,
                          is_training)
    label_one_shot = slim.one_hot_encoding(label, dataset.num_classes)
    images, labels, labels_one_hot = (tf.train.shuffle_batch(
        tensors=[image, label, label_one_shot],
        batch_size = batch_size,
        capacity=shuffle_config.queue_capacity,
        num_threads=shuffle_config.num_batching_threads,
        min_after_dequeue=shuffle_config.min_after_dequeue))

    return InputEndpoints(
        images=images,
        labels=labels,
        labels_one_hot=labels_one_hot)
예제 #36
0
 def test_create_summaries_is_runnable(self):
   ocr_model = self.create_model()
   data = data_provider.InputEndpoints(
       images=self.fake_images,
       images_orig=self.fake_images,
       labels=self.fake_labels,
       labels_one_hot=slim.one_hot_encoding(self.fake_labels,
                                            self.num_char_classes))
   endpoints = ocr_model.create_base(
       images=self.fake_images, labels_one_hot=None)
   charset = create_fake_charset(self.num_char_classes)
   summaries = ocr_model.create_summaries(
       data, endpoints, charset, is_training=False)
   with self.test_session() as sess:
     sess.run(tf.global_variables_initializer())
     sess.run(tf.local_variables_initializer())
     tf.tables_initializer().run()
     sess.run(summaries)  # just check it is runnable
예제 #37
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    FLAGS = settings()

    np.random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)

    # Slim dataset contains data sources, decoder, reader and other meta-information
    dataset = mnist.get_split('train', FLAGS.dataset_dir)
    iterations_per_epoch = dataset.num_samples // FLAGS.batch_size # 60,000/24 = 2500

    # images: Tensor (?, 28, 28, 1)
    # labels: Tensor (?)
    images, labels = load_batch(
        dataset,
        FLAGS.batch_size)

    # Tensor(?, 10)
    one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes)

    # poses: Tensor(?, 10, 4, 4) activations: (?, 10)
    poses, activations = m_capsules.nets.capsules_net(images, num_classes=10, iterations=3, batch_size=FLAGS.batch_size, name='capsules_em')

    global_step = tf.train.get_or_create_global_step()
    loss = m_capsules.nets.spread_loss(
        one_hot_labels, activations, iterations_per_epoch, global_step, name='spread_loss'
    )
    tf.summary.scalar('losses/spread_loss', loss)

    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train_tensor = slim.learning.create_train_op(
        loss, optimizer, global_step=global_step, clip_gradient_norm=4.0
    )

    slim.learning.train(
        train_tensor,
        logdir=FLAGS.log_dir,
        log_every_n_steps=10,
        save_summaries_secs=60,
        saver=tf.train.Saver(max_to_keep=2),
        save_interval_secs=600,
    )
예제 #38
0
def fake_labels(batch_size, seq_length, num_char_classes):
  labels_np = tf.convert_to_tensor(
      np.random.randint(
          low=0, high=num_char_classes, size=(batch_size, seq_length)))
  return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)
예제 #39
0
def build_losses(pyramid, outputs, gt_boxes, gt_masks,
                 num_classes, base_anchors,
                 rpn_box_lw =1.0, rpn_cls_lw = 1.0,
                 refined_box_lw=1.0, refined_cls_lw=1.0,
                 mask_lw=1.0):
  """Building 3-way output losses, totally 5 losses
  Params:
  ------
  outputs: output of build_heads
  gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class]
  gt_masks: A tensor of shape (G, ih, iw),  {0, 1}Ì[MaÌ[MaÌ]]
  *_lw: loss weight of rpn, refined and mask losses
  
  Returns:
  -------
  l: a loss tensor
  """

  # losses for pyramid
  losses = []
  rpn_box_losses, rpn_cls_losses = [], []
  refined_box_losses, refined_cls_losses = [], []
  mask_losses = []
  
  # watch some info during training
  rpn_batch = []
  refine_batch = []
  mask_batch = []
  rpn_batch_pos = []
  refine_batch_pos = []
  mask_batch_pos = []

  arg_scope = _extra_conv_arg_scope(activation_fn=None)
  with slim.arg_scope(arg_scope):
      with tf.variable_scope('pyramid'):

        ## assigning gt_boxes
        [assigned_gt_boxes, assigned_layer_inds] = assign_boxes(gt_boxes, [gt_boxes], [2, 3, 4, 5])

        ## build losses for PFN

        for i in range(5, 1, -1):
            p = 'P%d' % i
            stride = 2 ** i
            shape = tf.shape(pyramid[p])
            height, width = shape[1], shape[2]

            splitted_gt_boxes = assigned_gt_boxes[i-2]
            
            ### rpn losses
            # 1. encode ground truth
            # 2. compute distances
            # anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)]
            # all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
            all_anchors = outputs['rpn'][p]['anchor']
            labels, bbox_targets, bbox_inside_weights = \
              anchor_encoder(splitted_gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder')
            boxes = outputs['rpn'][p]['box']
            classes = tf.reshape(outputs['rpn'][p]['cls'], (1, height, width, base_anchors, 2))

            labels, classes, boxes, bbox_targets, bbox_inside_weights = \
                    _filter_negative_samples(tf.reshape(labels, [-1]), [
                        tf.reshape(labels, [-1]),
                        tf.reshape(classes, [-1, 2]),
                        tf.reshape(boxes, [-1, 4]),
                        tf.reshape(bbox_targets, [-1, 4]),
                        tf.reshape(bbox_inside_weights, [-1, 4])
                        ])
            # _, frac_ = _get_valid_sample_fraction(labels)
            rpn_batch.append(
                    tf.reduce_sum(tf.cast(
                        tf.greater_equal(labels, 0), tf.float32
                        )))
            rpn_batch_pos.append(
                    tf.reduce_sum(tf.cast(
                        tf.greater_equal(labels, 1), tf.float32
                        )))
            rpn_box_loss = bbox_inside_weights * _smooth_l1_dist(boxes, bbox_targets)
            rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4])
            rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1)
            rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) 
            tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss)
            rpn_box_losses.append(rpn_box_loss)

            # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses 
            # BUT these examples still count when computing the average of softmax_cross_entropy, 
            # the loss become smaller by a factor (None_negtive_labels / all_labels)
            # the BEST practise still should be gathering all none-negative examples
            labels = slim.one_hot_encoding(labels, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros
            rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=classes) 
            rpn_cls_loss = tf.reduce_mean(rpn_cls_loss) 
            tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss)
            rpn_cls_losses.append(rpn_cls_loss)
            

        ### refined loss
        # 1. encode ground truth
        # 2. compute distances
        rois = outputs['roi']['box']
        
        boxes = outputs['refined']['box']
        classes = outputs['refined']['cls']
        labels, bbox_targets, bbox_inside_weights = \
          roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder')

        labels, classes, boxes, bbox_targets, bbox_inside_weights = \
                _filter_negative_samples(tf.reshape(labels, [-1]),[
                    tf.reshape(labels, [-1]),
                    tf.reshape(classes, [-1, num_classes]),
                    tf.reshape(boxes, [-1, num_classes * 4]),
                    tf.reshape(bbox_targets, [-1, num_classes * 4]),
                    tf.reshape(bbox_inside_weights, [-1, num_classes * 4])
                    ] )
        # frac, frac_ = _get_valid_sample_fraction(labels, 1)
        refine_batch.append(
                tf.reduce_sum(tf.cast(
                    tf.greater_equal(labels, 0), tf.float32
                    )))
        refine_batch_pos.append(
                tf.reduce_sum(tf.cast(
                    tf.greater_equal(labels, 1), tf.float32
                    )))

        refined_box_loss = bbox_inside_weights * _smooth_l1_dist(boxes, bbox_targets)
        refined_box_loss = tf.reshape(refined_box_loss, [-1, 4])
        refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1)
        refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss) # * frac_
        tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss)
        refined_box_losses.append(refined_box_loss)

        labels = slim.one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0)
        refined_cls_loss = refined_cls_lw * tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=classes) 
        refined_cls_loss = tf.reduce_mean(refined_cls_loss) # * frac_
        tf.add_to_collection(tf.GraphKeys.LOSSES, refined_cls_loss)
        refined_cls_losses.append(refined_cls_loss)

        ### mask loss
        # mask of shape (N, h, w, num_classes)
        masks = outputs['mask']['mask']
        # mask_shape = tf.shape(masks)
        # masks = tf.reshape(masks, (mask_shape[0], mask_shape[1],
        #                            mask_shape[2], tf.cast(mask_shape[3]/2, tf.int32), 2))
        labels, mask_targets, mask_inside_weights = \
          mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder')
        labels, masks, mask_targets, mask_inside_weights = \
                _filter_negative_samples(tf.reshape(labels, [-1]), [
                    tf.reshape(labels, [-1]),
                    masks,
                    mask_targets, 
                    mask_inside_weights, 
                    ])
        # _, frac_ = _get_valid_sample_fraction(labels)
        mask_batch.append(
                tf.reduce_sum(tf.cast(
                    tf.greater_equal(labels, 0), tf.float32
                    )))
        mask_batch_pos.append(
                tf.reduce_sum(tf.cast(
                    tf.greater_equal(labels, 1), tf.float32
                    )))
        # mask_targets = slim.one_hot_encoding(mask_targets, 2, on_value=1.0, off_value=0.0)
        # mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy(mask_targets, masks)
        # NOTE: w/o competition between classes. 
        mask_targets = tf.cast(mask_targets, tf.float32)
        mask_loss = mask_lw * tf.nn.sigmoid_cross_entropy_with_logits(labels=mask_targets, logits=masks) 
        mask_loss = tf.reduce_mean(mask_loss) 
        mask_loss = tf.cond(tf.greater(tf.size(labels), 0), lambda: mask_loss, lambda: tf.constant(0.0))
        tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss)
        mask_losses.append(mask_loss)

  rpn_box_losses = tf.add_n(rpn_box_losses)
  rpn_cls_losses = tf.add_n(rpn_cls_losses)
  refined_box_losses = tf.add_n(refined_box_losses)
  refined_cls_losses = tf.add_n(refined_cls_losses)
  mask_losses = tf.add_n(mask_losses)
  losses = [rpn_box_losses, rpn_cls_losses, refined_box_losses, refined_cls_losses, mask_losses]
  total_loss = tf.add_n(losses)

  rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32)
  refine_batch = tf.cast(tf.add_n(refine_batch), tf.float32)
  mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32)
  rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32)
  refine_batch_pos = tf.cast(tf.add_n(refine_batch_pos), tf.float32)
  mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32)
    
  return total_loss, losses, [rpn_batch_pos, rpn_batch, \
                              refine_batch_pos, refine_batch, \
                              mask_batch_pos, mask_batch]