def encode_coordinates_temporal_fn(self, net): """Adds one-hot encoding of coordinates to different views in the networks. For each "pixel" of a feature map it adds a onehot encoded x and y coordinates. Args: net: a tensor of shape=[batch_size, height, 8*width, num_features]#1 X 8 x 32 x 60 x 256 Returns: a tensor with the same height and width, but altered feature_size. """ mparams = self._mparams['encode_coordinates_fn'] if mparams.enabled: print("net", net)#1, 8, 14, 28, 1088 batch_size, t, h, w, _ = net.shape.as_list() x, y, t1 = tf.meshgrid(tf.range(w),tf.range(h),tf.range(t))#1, 8, 14, 28, 1088 print(t1)#14, 8, 28 w_loc = slim.one_hot_encoding(x, num_classes=w) h_loc = slim.one_hot_encoding(y, num_classes=h) t_loc = slim.one_hot_encoding(t1, num_classes=t) loc = tf.concat([t_loc, h_loc, w_loc], 3)#w,h,t,w+h+t loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1, 1])#bXhXwXtXsum loc = tf.transpose(loc, [0, 3, 1, 2, 4])#1X8XHXwX3 return tf.concat([net, loc], 4) else: return net
def _encode_coordinates(self, features): _, h, w, _ = features.shape.as_list() x, y = tf.meshgrid(tf.range(w), tf.range(h)) w_loc = slim.one_hot_encoding(x, num_classes=w) h_loc = slim.one_hot_encoding(y, num_classes=h) loc = tf.concat([h_loc, w_loc], 2) loc = tf.tile(tf.expand_dims(loc, 0), [self.batch_size, 1, 1, 1]) return tf.concat([features, loc], 3)
def _init_model(self): ''' init modle for train :return: ''' # tf.set_random_seed(20) # with tf.Graph().as_default(): self.global_step = slim.get_or_create_global_step() self.batch_data = tf.placeholder( dtype=tf.float32, shape=[None, self.input_size, self.input_size, self.input_channel], name='input_images') #image self.input_label = tf.placeholder(dtype=tf.int64, shape=[None], name='input_labels') #label self.input_pose = tf.placeholder(dtype=tf.int64, shape=[None], name='input_poses') #pose self.input_light = tf.placeholder( dtype=tf.int64, shape=[None], name='input_illumination') #illumination # self.index = tf.placeholder(tf.int32, None,name='input_nums') #mk onehot labels self.labels = slim.one_hot_encoding(self.input_label, self.class_nums) self.pose = slim.one_hot_encoding(self.input_pose, self.pose_c) #pose code pose label self.pose_reverse = tf.concat(tf.split(self.pose, 2, axis=0)[::-1], axis=0) self.light = slim.one_hot_encoding(self.input_light, self.light_c) self.light_reverse = tf.concat(tf.split(self.light, 2, axis=0)[::-1], axis=0) # self.noise = tf.random_uniform(shape=(self.index,1,1,self.noise_z),minval=-1,maxval=1,dtype=tf.float32,name='input_noise') # self.noise_reverse = tf.concat(tf.split(self.noise, 2, axis=0)[::-1], axis=0) #comput loss self._predict_drgan_multipie() self._loss_gan_multipie() self._loss_compute() #pre 1e-3 adv 1e-3 id_p 3e-3 pixel 1 tv 1e-4 self.summary_train = tf.summary.merge_all() #select var list train_vars = tf.trainable_variables() self.varsg = [var for var in train_vars if 'generator' in var.name] self.varsd = [var for var in train_vars if 'discriminator' in var.name] self.fc_add = [ var for var in train_vars if 'recognation_fc' in var.name ] self.vard_fr = [var for var in train_vars if 'resnet_yd' in var.name] # self.init_vars=self.vard_fr+self.varsd+self.varsg+self.fc_add self.init_vars = self.vard_fr # self.var_total=self.varsg+self.varsd+self.vard_fr # self.varsd = self.varsd+self.vard_fr+self.fc_add###finetu fr net?? self._get_train_op(self.global_step)
def build_model(x, y, num_classes=2, num_estimator=32, subsample=0.25, is_training=True, reuse=None): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] """ #preprocess y = slim.one_hot_encoding(y, num_classes) loss = 0 predictions = y * 0 batch_size = x.get_shape()[0].value #models for i in range(num_estimator): #sample from minibatch - instead of bootstrap / TODO something better? idx = np.random.randint(batch_size, size=(int(round(batch_size * subsample)), )) bx = tf.gather(x, idx) by = tf.gather(y, idx) logits = classify(bx, num_estimator=num_estimator, num_classes=num_classes, is_training=is_training, reuse=reuse, scope='H%d' % i) loss += loss_fkt(logits, by) #majority vote if not is_training: logits = classify(x, num_estimator=num_estimator, num_classes=num_classes, is_training=is_training, reuse=True, scope='H%d' % i) predictions += slim.one_hot_encoding( tf.argmax(slim.softmax(logits), 1), num_classes) predictions = tf.argmax(predictions, 1) return loss, predictions
def __init__(self, lr, s_size, a_size): #These lines established the feed-forward part of the network. # The agent takes a state and produces an action. self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) state_in_OH = slim.one_hot_encoding(self.state_in, s_size) output = slim.fully_connected( state_in_OH, a_size, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, 0) #The next six lines establish the training procedure. # We feed the reward and chosen action into the network #to compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def get_batch_data(): dataset = dataset_factory.get_dataset(train_config['dataset_name'], train_config['dataset_split_name'], train_config['dataset_dir']) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=train_config['num_readers'], common_queue_capacity=20 * train_config['batch_size'], common_queue_min=10 * train_config['batch_size']) image_preprocessing_name = train_config[ 'preprocessing_name'] or train_config['model_name'] image_preprocessing_fn = preprocessing_factory.get_preprocessing( image_preprocessing_name, is_training=True) [image, label] = provider.get(['image', 'label']) label -= train_config['labels_offset'] train_image_size = train_config['train_image_size'] print('train image size is : ', train_image_size) image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=train_config['batch_size'], num_threads=train_config['num_preprocessing_threads'], capacity=5 * train_config['batch_size']) labels = slim.one_hot_encoding( labels, dataset.num_classes - train_config['labels_offset']) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * train_config['num_clones']) image_batch, label_batch = batch_queue.dequeue() return image_batch, label_batch, dataset
def getImageBatchAndOneHotLabels(dataset_dir, dataset_name, num_readers, num_preprocessing_threads, batch_size): ''' :param dataset_dir: directory where the tfrecord files are stored :param dataset_name: name of the dataset e.g. train / validation :return: ''' dataset = imagenet.get_split(dataset_name, dataset_dir) # DataSetProvider on CPU with tf.device('/device:CPU:0'): # ------- Dataset Provider --------- provider_train = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, common_queue_capacity=2 * batch_size, common_queue_min=batch_size) [image, label] = provider_train.get(['image', 'label']) # Preprocessing of Dataset train_image_size = alexnet.alexnet_v2.default_image_size image = alexnet_preprocessing.preprocess_image(image, train_image_size, train_image_size) # Generate Batches images, labels = tf.train.batch([image, label], batch_size=batch_size, num_threads=num_preprocessing_threads, capacity=5 * batch_size) labels = slim.one_hot_encoding(labels, dataset.num_classes) return dataset, images, labels
def det_net_loss(seg_masks_in, reg_masks_in, seg_preds, reg_preds, reg_loss_weight=10.0, epsilon=1e-5): with tf.variable_scope('loss'): out_size = seg_preds.get_shape()[1:3] seg_masks_in_ds = tf.image.resize_images( seg_masks_in[:, :, :, tf.newaxis], out_size[0], out_size[1], tf.image.ResizeMethod.NEAREST_NEIGHBOR) reg_masks_in_ds = tf.image.resize_images( reg_masks_in, out_size[0], out_size[1], tf.image.ResizeMethod.NEAREST_NEIGHBOR) # segmentation loss seg_masks_onehot = slim.one_hot_encoding(seg_masks_in_ds[:, :, :, 0], 2) seg_loss = -tf.reduce_mean( seg_masks_onehot * tf.log(seg_preds + epsilon)) # regression loss mask = tf.to_float(seg_masks_in_ds) reg_loss = tf.reduce_sum(mask * (reg_preds - reg_masks_in_ds)**2) reg_loss = reg_loss / (tf.reduce_sum(mask) + 1.0) return seg_loss + reg_loss_weight * reg_loss
def model_fn(inputs, mode, **kwargs): # In train or eval, id_or_labels represents labels. In predict, id_or_labels represents id. images, id_or_labels, angles = inputs # Reshape angles from [batch_size] to [batch_size, 1] angles = tf.expand_dims(angles, 1) # Apply your version of model logits = model_v1(images, angles, mode) if mode == mox.ModeKeys.PREDICT: logits = tf.nn.softmax(logits) # clip logits to get lower loss value. logits = tf.clip_by_value(logits, clip_value_min=0.05, clip_value_max=0.95) model_spec = mox.ModelSpec(output_info={ 'id': id_or_labels, 'logits': logits }) elif mode == mox.ModeKeys.EXPORT: predictions = tf.nn.softmax(logits) export_spec = mox.ExportSpec(inputs_dict={ 'images': images, 'angles': angles }, outputs_dict={'predictions': predictions}, version='model') model_spec = mox.ModelSpec(export_spec=export_spec) else: labels_one_hot = slim.one_hot_encoding(id_or_labels, 2) loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels_one_hot, label_smoothing=0.0, weights=1.0) model_spec = mox.ModelSpec(loss=loss, log_info={'loss': loss}) return model_spec
def __init__(self, lr, state_dim, action_dim): self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) # one hot encoding of state: state_enc = slim.one_hot_encoding(self.state_in, state_dim) output = slim.fully_connected( state_enc, action_dim, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, 0) # Training pipeline self.reward = tf.placeholder(name='reward', shape=[1], dtype=tf.float32) self.action = tf.placeholder(name='action', shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action, [1]) self.loss = -tf.log(self.responsible_weight) * self.reward optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def compute_loss(self): image, label = self.get_image_labels() with tf.device("/device:GPU:0"): with tf.name_scope("batching"): images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_batching_threads, capacity=FLAGS.batch_queue_size * FLAGS.batch_size, shapes=[image.get_shape(), []]) labels = slim.one_hot_encoding(labels, self.dataset.num_classes) # summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) logits, end_points = self.network_fn(images)#, reuse=gpu_idx!=0) correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) acc = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar("train-accuracy", acc) tf.losses.softmax_cross_entropy(labels, logits) losses = tf.get_collection(tf.GraphKeys.LOSSES, None)# not sure None is necessary #Ignoring other types of losses... loss = tf.add_n(losses, name="loss") tf.summary.scalar("loss", loss) return loss
def model_fn(inputs, mode): images, labels = inputs # 获取一个resnet50的模型,输入images,输入logits和end_points,这里不关心end_points,仅取logits logits, _ = mox.get_model_fn(name='resnet_v1_50', run_mode=mode, num_classes=data_meta.num_classes, weight_decay=0.00004)(images) # 计算交叉熵损失值 labels_one_hot = slim.one_hot_encoding(labels, data_meta.num_classes) loss = tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=labels_one_hot) # 获取正则项损失值,并加到loss上,这里必须要用mox.get_collection代替tf.get_collection regularization_losses = mox.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) regularization_loss = tf.add_n(regularization_losses) loss = loss + regularization_loss # 计算分类正确率 accuracy = tf.reduce_mean( tf.cast(tf.nn.in_top_k(logits, labels, 1), tf.float32)) # 返回MoXing-TensorFlow用于定义模型的类ModelSpec return mox.ModelSpec(loss=loss, log_info={ 'loss': loss, 'accuracy': accuracy })
def __init__(self, lr, s_size, a_size): ##Setting up the Agent #These two lines established the feed-forward part of the network. This does the actual choosing. self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) state_in_OH = slim.one_hot_encoding( self.state_in, s_size ) #tf-slim, https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim (simpler setup of tf models) #one hot encoder: http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html output = slim.fully_connected( state_in_OH, a_size, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, 0) #The next six lines establish the training proceedure. We feed the reward and chosen action into the network #to compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def build_model(x, y, num_classes=2, is_training=True, num_estimator=None, num_filter=None, reuse=None): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. """ #preprocess y = slim.one_hot_encoding(y, num_classes) print('input: ', x.get_shape()) #model logits = RNN_deepcough(x, num_outputs=num_classes, reuse=reuse, is_training=is_training) #results loss = tf.reduce_mean(softmax_cross_entropy(logits=logits, onehot_labels=y)) predictions = tf.argmax(slim.softmax(logits), 1) return loss, predictions
def _build_model(inputs_queue, clone_batch_size): """Builds a clone of train model. Args: inputs_queue: A prefetch queue for images and labels. Returns: A dictionary of logits names to logits. """ samples = inputs_queue.dequeue() batch_size = clone_batch_size * FLAGS.num_classes inputs = tf.identity(samples['image'], name='image') labels = tf.identity(samples['label'], name='label') model_options = common.ModelOptions(output_stride=FLAGS.output_stride) net, end_points = model.get_features( inputs, model_options=model_options, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) logits, _ = model.classification(net, end_points, num_classes=FLAGS.num_classes, is_training=True) if FLAGS.multi_label: with tf.name_scope('Multilabel_logits'): logits = slim.softmax(logits) half_batch_size = batch_size / 2 for i in range(1, FLAGS.num_classes): class_logits = tf.identity(logits[:, i], name='class_logits_%02d' % (i)) class_labels = tf.identity(labels[:, i], name='class_labels_%02d' % (i)) num_positive = tf.reduce_sum(class_labels) num_negative = batch_size - num_positive weights = tf.where( tf.equal(class_labels, 1.0), tf.tile([half_batch_size / num_positive], [batch_size]), tf.tile([half_batch_size / num_negative], [batch_size])) train_utils.focal_loss(class_labels, class_logits, weights=weights, scope='class_loss_%02d' % (i)) else: logits = slim.softmax(logits) train_utils.focal_loss(labels, logits, scope='cls_loss') if (FLAGS.dataset == 'protein') and FLAGS.add_counts_logits: counts = tf.identity(samples['counts'] - 1, name='counts') one_hot_counts = slim.one_hot_encoding(counts, 5) counts_logits, _ = model.classification(net, end_points, num_classes=5, is_training=True, scope='Counts_logits') counts_logits = slim.softmax(counts_logits) train_utils.focal_loss(one_hot_counts, counts_logits, scope='counts_loss') return logits, counts_logits return logits
def __init__(self, lr, s_size, a_size): # lr : learning rate # s_size : state size # a_size : action size # The agent input the state, and then return action # 2-1) Input, output 요소 구현 (Neural network) self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) state_in_OH = slim.one_hot_encoding( self.state_in, s_size) # params: (label, size of OH). # 가령 '1', '2', '3' 밴딧이 있고, '2'를 골랐다면, self.state_in : '2', s_size : 3. # Return example: '2' --> 0 1 0 이런식으로 input을 one hot encoding 됨. # Input이 단순히 3 이런 식이 아니라, 신경망의 output과 연결 weights들의 수를 충분히 하기 위해 OH 수행. output = slim.fully_connected(state_in_OH, a_size, # input output shape biases_initializer=None, activation_fn=tf.nn.sigmoid,\ weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) # 가로로 결과 쫙 피기 # !!!output, self.output type, shape 확인해보기 (텐서?)!!! self.chosen_action = tf.argmax(self.output, 0) # <-- 선택한 액션. # 2-2) 학습 과정 신경망 구현(Neural network) self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) # self.output (액션에 대한 확률) 가운데, # self.action_holder에 담긴 값 추출 self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.AdamOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def char_predictions(self, chars_logit): """Returns confidence scores (softmax values) for predicted characters. Args: chars_logit: chars logits, a tensor with shape [batch_size x seq_length x num_char_classes] Returns: A tuple (ids, log_prob, scores), where: ids - predicted characters, a int32 tensor with shape [batch_size x seq_length]; log_prob - a log probability of all characters, a float tensor with shape [batch_size, seq_length, num_char_classes]; scores - corresponding confidence scores for characters, a float tensor with shape [batch_size x seq_length]. """ log_prob = utils.logits_to_log_prob(chars_logit) ids = tf.to_int32(tf.argmax(log_prob, axis=2), name='predicted_chars') mask = tf.cast( slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool) all_scores = tf.nn.softmax(chars_logit) selected_scores = tf.boolean_mask(all_scores, mask, name='char_scores') scores = tf.reshape(selected_scores, shape=(-1, self._params.seq_length)) return ids, log_prob, scores
def __init__(self, lr, s_size, a_size): # c state_in in agent class: placeholder for state as input data self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) # c state_in_OH in agent class: one hot encoded version of state state_in_OH = slim.one_hot_encoding(self.state_in, s_size) # You find value (output) about action based on weight output = slim.fully_connected( state_in_OH, a_size, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) # You reshape output into (4,) self.output = tf.reshape(output, [-1]) # You choose one action by choosing highest value from self.output self.chosen_action = tf.argmax(self.output, 0) # Following 6 lines proceed step of training # You send reward and chosen action into network, # then, you find loss, # then, you update network based on loss self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) # You use cross entropy loss function self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def __init__( self, lr, s_size, a_size ): #learning rate, number of bandits, number of arms per bandit #These lines established the feed-forward part of the network. #The agent takes a state and produces an action. self.state_in = tf.placeholder( shape=[1], dtype=tf.int32 ) #Environment state input. In this case, the active bandit self.state_in_OH = slim.one_hot_encoding(self.state_in, s_size) print(self.state_in, "OH: ", self.state_in_OH) output = slim.fully_connected( #Tensor("fully_connected/Sigmoid:0", shape=(1, 4), dtype=float32) self.state_in_OH, a_size, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) self.output = tf.reshape( output, [-1]) #Tensor("Reshape:0", shape=(4,), dtype=float32) print(self.output) self.chosen_action = tf.argmax(self.output, 0) #The next six lines establish the training procedure. #We feed the reward and chosen action into the network to #compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def __init__(self, lr, s_size, a_size): """ :param lr: learning rate :param s_size: number of states :param a_size: number of actions """ " Feed-forward part : input-current state / output - action" self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) state_in_OH = slim.one_hot_encoding(self.state_in, s_size) # output : a single layer neural network # output = slim.fully_connected( state_in_OH, a_size, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, axis=0) " Training Procedure " # feed the reward and chosen action into network to compute the loss # then, use the loss to update the network self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def _multi_class_l2_loss(pred, labels, EPS=1e-12): num_classes = 2000 labels = slim.one_hot_encoding(labels, num_classes) losses = tf.squared_difference(pred, labels) loss = tf.reduce_mean(losses) * num_classes slim.losses.add_loss(loss) return loss
def __init__(self, lr, s_size, a_size): #These lines established the feed-forward part of the network. The agent takes a state and produces an action. self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) state_in_OH = slim.one_hot_encoding(self.state_in, s_size) # Сейчас задан один полносвязный слой # output = slim.fully_connected(state_in_OH,a_size,\ # biases_initializer=None,activation_fn=tf.nn.sigmoid,weights_initializer=tf.ones_initializer()) # output = slim.fully_connected(state_in_OH,a_size,\ # biases_initializer=None,activation_fn=None,weights_initializer=tf.ones_initializer()) # Я хочу добавить второй полносвязный слой с, допустим, 32-мя вершинами # Правильно ли я это делаю и почему эффективность падает? output = slim.fully_connected(state_in_OH,3,\ biases_initializer=None,activation_fn=None,weights_initializer=tf.ones_initializer()) output = slim.fully_connected(output,a_size,\ biases_initializer=None,activation_fn=tf.nn.sigmoid,weights_initializer=tf.ones_initializer()) output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(output, 0) #The next six lines establish the training proceedure. We feed the reward and chosen action into the network #to compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(output, self.action_holder, [1]) #self.responsible_weight = tf.slice(output,tf.cast(tf.reshape(self.chosen_action, [1]), dtype=tf.int32),[1]) #self.responsible_weight = output[self.chosen_action] self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss)
def build_model(x, y, num_classes=2, is_training=True, reuse=None ): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model with slim.arg_scope(densenet_arg_scope(is_training)): x = tf.expand_dims(x, -1) logits = densenet(x, num_classes, reuse=reuse) #results loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits = logits, onehot_labels = y)) predictions = tf.argmax(slim.softmax(logits),1) return loss, predictions
def __init__(self, learningRate, numberOfStates, numberOfActions): with tf.name_scope('input'): self.state_in = tf.placeholder(shape=[1], dtype=tf.int32, name='state_in') state_in_OH = slim.one_hot_encoding(self.state_in, numberOfStates) self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32, name='reward') outputVector = slim.fully_connected(state_in_OH, numberOfActions,\ biases_initializer=None,activation_fn=tf.nn.sigmoid,\ weights_initializer=tf.ones_initializer(), scope='layer1') with tf.name_scope('output'): self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32, name='action') self.output = tf.reshape(outputVector, [-1]) self.selected_output = tf.slice(self.output, self.action_holder, [1]) self.chosen_action = tf.argmax( self.output, 0) #index of the largest value in output with tf.name_scope('calculations'): self.loss = -(tf.log(self.selected_output) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer( learning_rate=learningRate) self.update = optimizer.minimize(self.loss)
def build_model(x, y, num_classes=2, num_estimator=None, #we missuse num_estimator for the number of convolutions num_filter=16, is_training=True, reuse=None ): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model logits = classify(x, num_classes=num_classes, num_filter=num_filter, route=num_estimator, is_training=is_training, reuse=reuse) #results loss = tf.reduce_mean(softmax_cross_entropy(logits = logits, onehot_labels = y)) predictions = tf.argmax(slim.softmax(logits),1) return loss, predictions
def train(): with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) dataset = flowers.get_split('train', flowers_data_dir) images, _, labels = load_batch(dataset) # Create the model: logits ,_= squeezenet.squeezenet(images, num_classes=dataset.num_classes, is_training=True) # Specify the loss function: one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) slim.losses.softmax_cross_entropy(logits, one_hot_labels) total_loss = slim.losses.get_total_loss() # Create some summaries to visualize the training process: tf.summary.scalar('losses/Total Loss', total_loss) # Specify the optimizer and create the train op: optimizer = tf.train.AdamOptimizer(learning_rate=0.01) train_op = slim.learning.create_train_op(total_loss, optimizer) # Run the training: final_loss = slim.learning.train( train_op, logdir=train_dir, number_of_steps=100, # For speed, we just do 1 epoch save_interval_secs=600, save_summaries_secs=6000, log_every_n_steps =1,) print('Finished training. Final batch loss %d' % final_loss)
def char_predictions(self, chars_logit): """Returns confidence scores (softmax values) for predicted characters. Args: chars_logit: chars logits, a tensor with shape [batch_size x seq_length x num_char_classes] Returns: A tuple (ids, log_prob, scores), where: ids - predicted characters, a int32 tensor with shape [batch_size x seq_length]; log_prob - a log probability of all characters, a float tensor with shape [batch_size, seq_length, num_char_classes]; scores - corresponding confidence scores for characters, a float tensor with shape [batch_size x seq_length]. """ log_prob = utils.logits_to_log_prob(chars_logit) ids = tf.to_int32(tf.argmax(log_prob, axis=2), name="predicted_chars") mask = tf.cast( slim.one_hot_encoding(ids, self._params.num_char_classes), tf.bool) all_scores = tf.nn.softmax(chars_logit) selected_scores = tf.boolean_mask(all_scores, mask, name="char_scores") scores = tf.reshape(selected_scores, shape=(-1, self._params.seq_length)) return ids, log_prob, scores
def build_model(x, y, num_classes=2, num_estimator=10, is_training=True, reuse=None ): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. here we do boosting without additive training """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model logits = 0 offset = 30 // num_estimator for i in range(num_estimator): #x = tf.image.crop_to_bounding_box(x, 0, offset * i, 16, 16) predictions, gamma = classify(x, num_estimator=num_estimator, num_classes=num_classes, is_training=is_training, reuse=reuse, scope='c%d'%i) zeta = gamma * 2 / (i+1) logits = (1-zeta) * logits + zeta * predictions #results loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits = logits, onehot_labels = y, label_smoothing=0.05)) predictions = tf.argmax(slim.softmax(logits),1) return loss, predictions
def __init__(self, lr, s_size, a_size, banditos): tf.reset_default_graph() self.bandits = banditos self.state_in = tf.placeholder(shape=[1], dtype=tf.int32) self.state_in_one_hot = slim.one_hot_encoding(self.state_in, s_size) output = slim.fully_connected(self.state_in_one_hot, a_size, biases_initializer=None, activation_fn=tf.nn.sigmoid, weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, 0) # The next six lines establish the training proceedure. # We feed the reward and chosen action into the network # to compute the loss, and use it to update the network. self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) self.update = optimizer.minimize(self.loss) self.weights = None self.session = None self.optimal_weight = None self.weights = tf.trainable_variables()[0] self.init = tf.global_variables_initializer()
def build_model( x, y, num_classes=2, num_estimator=3, #we missuse num_estimator for the number of convolutions num_filter=128, is_training=True, reuse=None): """ handle model. calculate the loss and the prediction for some input x and the corresponding labels y input: x shape=[None,bands,frames,num_channels], y shape=[None] output: loss shape=(1), prediction shape=[None] CAUTION! controller.py uses a function whith this name and arguments. here we do boosting without additive training """ #preprocess y = slim.one_hot_encoding(y, num_classes) #model with tf.variable_scope('model_v1'): predictions = classify(x, num_classes=num_classes, num_filter=num_filter, route=num_estimator, is_training=is_training, reuse=reuse, scope='wk') loss = loss_fkt(predictions, y) #results predictions = tf.argmax(slim.softmax(predictions), 1) return loss, predictions
def _init_model(self): ''' init modle for train :return: ''' # tf.set_random_seed(20) # with tf.Graph().as_default(): self.global_step = slim.get_or_create_global_step() self.batch_data = tf.placeholder(dtype=tf.float32,shape=[None,self.input_size,self.input_size,self.input_channel],name='input_images')#image self.batch_label = tf.placeholder(dtype= tf.int64,shape=[None],name='input_labels')#label #mk onehot labels self.labels = slim.one_hot_encoding(self.batch_label,self.class_nums) #comput loss self.softmax_real,self.logits,self.fc=nets.inference_recognition(self.batch_data,self.class_nums) self.loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( labels=self.labels,logits=self.logits )) lo=tf.summary.scalar('train/pre_loss',self.loss) reshape_R = tf.reshape(self.softmax_real, [-1, self.class_nums]) max_r = tf.argmax(reshape_R, 1) self.predict_rate = tf.equal(max_r, self.batch_label) self.accuracy_r = tf.reduce_mean(tf.cast(self.predict_rate, tf.float32)) acc=tf.summary.scalar('train/pre_rate',self.accuracy_r ) self.summary_train = tf.summary.merge([lo,acc]) train_vars = tf.trainable_variables() self.fc_add = [var for var in train_vars if 'recognition_fc' in var.name] self.vard_fr= [var for var in train_vars if 'resnet_yd' in var.name] self.init_vars=self.vard_fr self.var_all=self.vard_fr+self.fc_add train_optimizer=tf.train.MomentumOptimizer(learning_rate=0.0001,momentum=0.99,name='optimizer') self.train_op=train_optimizer.minimize(self.loss,var_list=self.var_all,global_step=self.global_step)
def char_one_hot(self, logit): """Creates one hot encoding for a logit of a character. Args: logit: A tensor with shape [batch_size, num_char_classes]. Returns: A tensor with shape [batch_size, num_char_classes] """ prediction = tf.argmax(logit, axis=1) return slim.one_hot_encoding(prediction, self._params.num_char_classes)
def encode_coordinates_fn(self, net): """Adds one-hot encoding of coordinates to different views in the networks. For each "pixel" of a feature map it adds a onehot encoded x and y coordinates. Args: net: a tensor of shape=[batch_size, height, width, num_features] Returns: a tensor with the same height and width, but altered feature_size. """ mparams = self._mparams['encode_coordinates_fn'] if mparams.enabled: batch_size, h, w, _ = net.shape.as_list() x, y = tf.meshgrid(tf.range(w), tf.range(h)) w_loc = slim.one_hot_encoding(x, num_classes=w) h_loc = slim.one_hot_encoding(y, num_classes=h) loc = tf.concat([h_loc, w_loc], 2) loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1]) return tf.concat([net, loc], 3) else: return net
def get_data(dataset, batch_size, augment=False, central_crop_size=None, shuffle_config=None, shuffle=True): """Wraps calls to DatasetDataProviders and shuffle_batch. For more details about supported Dataset objects refer to datasets/fsns.py. Args: dataset: a slim.data.dataset.Dataset object. batch_size: number of samples per batch. augment: optional, if True does random image distortion. central_crop_size: A CharLogittuple (crop_width, crop_height). shuffle_config: A namedtuple ShuffleBatchConfig. shuffle: if True use data shuffling. Returns: """ if not shuffle_config: shuffle_config = DEFAULT_SHUFFLE_CONFIG provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=shuffle, common_queue_capacity=2 * batch_size, common_queue_min=batch_size) image_orig, label = provider.get(['image', 'label']) image = preprocess_image( image_orig, augment, central_crop_size, num_towers=dataset.num_of_views) label_one_hot = slim.one_hot_encoding(label, dataset.num_char_classes) images, images_orig, labels, labels_one_hot = (tf.train.shuffle_batch( [image, image_orig, label, label_one_hot], batch_size=batch_size, num_threads=shuffle_config.num_batching_threads, capacity=shuffle_config.queue_capacity, min_after_dequeue=shuffle_config.min_after_dequeue)) return InputEndpoints( images=images, images_orig=images_orig, labels=labels, labels_one_hot=labels_one_hot)
def get_data(dataset, model_name, batch_size = 32, shuffle_config = None, shuffle=None, is_training=True, height=0, width=0): """return input data for Model input Args: dataset: a slim Dataset object. model_name: specify Network. shuffle_config: a namedtuple to control shuffle queue. fields: {queue_capacity, num_batching_threads, min_after_dequeue}. shuffle: control data provider whether shuffle. is_training: if Ture preprocess image for train. width: excepted resized width height: excepted resized height """ if not shuffle_config: shuffle_config = DEFAULT_SHUFFLE_CONFIG provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=shuffle, common_queue_capacity = 2 * batch_size, common_queue_min = batch_size ) [image_orig, label] = provider.get(['image', 'label']) tf.summary.image('image_org', tf.expand_dims(image_orig, 0)) tf.summary.scalar('label_orig', label) preprocessing_fn = preprocessing_factory.get_preprocessing(model_name) image = preprocessing_fn(image_orig, width, height, is_training) label_one_shot = slim.one_hot_encoding(label, dataset.num_classes) images, labels, labels_one_hot = (tf.train.shuffle_batch( tensors=[image, label, label_one_shot], batch_size = batch_size, capacity=shuffle_config.queue_capacity, num_threads=shuffle_config.num_batching_threads, min_after_dequeue=shuffle_config.min_after_dequeue)) return InputEndpoints( images=images, labels=labels, labels_one_hot=labels_one_hot)
def test_create_summaries_is_runnable(self): ocr_model = self.create_model() data = data_provider.InputEndpoints( images=self.fake_images, images_orig=self.fake_images, labels=self.fake_labels, labels_one_hot=slim.one_hot_encoding(self.fake_labels, self.num_char_classes)) endpoints = ocr_model.create_base( images=self.fake_images, labels_one_hot=None) charset = create_fake_charset(self.num_char_classes) summaries = ocr_model.create_summaries( data, endpoints, charset, is_training=False) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) tf.tables_initializer().run() sess.run(summaries) # just check it is runnable
def main(_): tf.logging.set_verbosity(tf.logging.INFO) FLAGS = settings() np.random.seed(FLAGS.seed) tf.set_random_seed(FLAGS.seed) # Slim dataset contains data sources, decoder, reader and other meta-information dataset = mnist.get_split('train', FLAGS.dataset_dir) iterations_per_epoch = dataset.num_samples // FLAGS.batch_size # 60,000/24 = 2500 # images: Tensor (?, 28, 28, 1) # labels: Tensor (?) images, labels = load_batch( dataset, FLAGS.batch_size) # Tensor(?, 10) one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) # poses: Tensor(?, 10, 4, 4) activations: (?, 10) poses, activations = m_capsules.nets.capsules_net(images, num_classes=10, iterations=3, batch_size=FLAGS.batch_size, name='capsules_em') global_step = tf.train.get_or_create_global_step() loss = m_capsules.nets.spread_loss( one_hot_labels, activations, iterations_per_epoch, global_step, name='spread_loss' ) tf.summary.scalar('losses/spread_loss', loss) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_tensor = slim.learning.create_train_op( loss, optimizer, global_step=global_step, clip_gradient_norm=4.0 ) slim.learning.train( train_tensor, logdir=FLAGS.log_dir, log_every_n_steps=10, save_summaries_secs=60, saver=tf.train.Saver(max_to_keep=2), save_interval_secs=600, )
def fake_labels(batch_size, seq_length, num_char_classes): labels_np = tf.convert_to_tensor( np.random.randint( low=0, high=num_char_classes, size=(batch_size, seq_length))) return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)
def build_losses(pyramid, outputs, gt_boxes, gt_masks, num_classes, base_anchors, rpn_box_lw =1.0, rpn_cls_lw = 1.0, refined_box_lw=1.0, refined_cls_lw=1.0, mask_lw=1.0): """Building 3-way output losses, totally 5 losses Params: ------ outputs: output of build_heads gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class] gt_masks: A tensor of shape (G, ih, iw), {0, 1}Ì[MaÌ[MaÌ]] *_lw: loss weight of rpn, refined and mask losses Returns: ------- l: a loss tensor """ # losses for pyramid losses = [] rpn_box_losses, rpn_cls_losses = [], [] refined_box_losses, refined_cls_losses = [], [] mask_losses = [] # watch some info during training rpn_batch = [] refine_batch = [] mask_batch = [] rpn_batch_pos = [] refine_batch_pos = [] mask_batch_pos = [] arg_scope = _extra_conv_arg_scope(activation_fn=None) with slim.arg_scope(arg_scope): with tf.variable_scope('pyramid'): ## assigning gt_boxes [assigned_gt_boxes, assigned_layer_inds] = assign_boxes(gt_boxes, [gt_boxes], [2, 3, 4, 5]) ## build losses for PFN for i in range(5, 1, -1): p = 'P%d' % i stride = 2 ** i shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] splitted_gt_boxes = assigned_gt_boxes[i-2] ### rpn losses # 1. encode ground truth # 2. compute distances # anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)] # all_anchors = gen_all_anchors(height, width, stride, anchor_scales) all_anchors = outputs['rpn'][p]['anchor'] labels, bbox_targets, bbox_inside_weights = \ anchor_encoder(splitted_gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder') boxes = outputs['rpn'][p]['box'] classes = tf.reshape(outputs['rpn'][p]['cls'], (1, height, width, base_anchors, 2)) labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, 2]), tf.reshape(boxes, [-1, 4]), tf.reshape(bbox_targets, [-1, 4]), tf.reshape(bbox_inside_weights, [-1, 4]) ]) # _, frac_ = _get_valid_sample_fraction(labels) rpn_batch.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 0), tf.float32 ))) rpn_batch_pos.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 1), tf.float32 ))) rpn_box_loss = bbox_inside_weights * _smooth_l1_dist(boxes, bbox_targets) rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4]) rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1) rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss) rpn_box_losses.append(rpn_box_loss) # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses # BUT these examples still count when computing the average of softmax_cross_entropy, # the loss become smaller by a factor (None_negtive_labels / all_labels) # the BEST practise still should be gathering all none-negative examples labels = slim.one_hot_encoding(labels, 2, on_value=1.0, off_value=0.0) # this will set -1 label to all zeros rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=classes) rpn_cls_loss = tf.reduce_mean(rpn_cls_loss) tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss) rpn_cls_losses.append(rpn_cls_loss) ### refined loss # 1. encode ground truth # 2. compute distances rois = outputs['roi']['box'] boxes = outputs['refined']['box'] classes = outputs['refined']['cls'] labels, bbox_targets, bbox_inside_weights = \ roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder') labels, classes, boxes, bbox_targets, bbox_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]),[ tf.reshape(labels, [-1]), tf.reshape(classes, [-1, num_classes]), tf.reshape(boxes, [-1, num_classes * 4]), tf.reshape(bbox_targets, [-1, num_classes * 4]), tf.reshape(bbox_inside_weights, [-1, num_classes * 4]) ] ) # frac, frac_ = _get_valid_sample_fraction(labels, 1) refine_batch.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 0), tf.float32 ))) refine_batch_pos.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 1), tf.float32 ))) refined_box_loss = bbox_inside_weights * _smooth_l1_dist(boxes, bbox_targets) refined_box_loss = tf.reshape(refined_box_loss, [-1, 4]) refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1) refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss) refined_box_losses.append(refined_box_loss) labels = slim.one_hot_encoding(labels, num_classes, on_value=1.0, off_value=0.0) refined_cls_loss = refined_cls_lw * tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=classes) refined_cls_loss = tf.reduce_mean(refined_cls_loss) # * frac_ tf.add_to_collection(tf.GraphKeys.LOSSES, refined_cls_loss) refined_cls_losses.append(refined_cls_loss) ### mask loss # mask of shape (N, h, w, num_classes) masks = outputs['mask']['mask'] # mask_shape = tf.shape(masks) # masks = tf.reshape(masks, (mask_shape[0], mask_shape[1], # mask_shape[2], tf.cast(mask_shape[3]/2, tf.int32), 2)) labels, mask_targets, mask_inside_weights = \ mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder') labels, masks, mask_targets, mask_inside_weights = \ _filter_negative_samples(tf.reshape(labels, [-1]), [ tf.reshape(labels, [-1]), masks, mask_targets, mask_inside_weights, ]) # _, frac_ = _get_valid_sample_fraction(labels) mask_batch.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 0), tf.float32 ))) mask_batch_pos.append( tf.reduce_sum(tf.cast( tf.greater_equal(labels, 1), tf.float32 ))) # mask_targets = slim.one_hot_encoding(mask_targets, 2, on_value=1.0, off_value=0.0) # mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy(mask_targets, masks) # NOTE: w/o competition between classes. mask_targets = tf.cast(mask_targets, tf.float32) mask_loss = mask_lw * tf.nn.sigmoid_cross_entropy_with_logits(labels=mask_targets, logits=masks) mask_loss = tf.reduce_mean(mask_loss) mask_loss = tf.cond(tf.greater(tf.size(labels), 0), lambda: mask_loss, lambda: tf.constant(0.0)) tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss) mask_losses.append(mask_loss) rpn_box_losses = tf.add_n(rpn_box_losses) rpn_cls_losses = tf.add_n(rpn_cls_losses) refined_box_losses = tf.add_n(refined_box_losses) refined_cls_losses = tf.add_n(refined_cls_losses) mask_losses = tf.add_n(mask_losses) losses = [rpn_box_losses, rpn_cls_losses, refined_box_losses, refined_cls_losses, mask_losses] total_loss = tf.add_n(losses) rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32) refine_batch = tf.cast(tf.add_n(refine_batch), tf.float32) mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32) rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32) refine_batch_pos = tf.cast(tf.add_n(refine_batch_pos), tf.float32) mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32) return total_loss, losses, [rpn_batch_pos, rpn_batch, \ refine_batch_pos, refine_batch, \ mask_batch_pos, mask_batch]