def loss_layer(self, feature_map_i, y_true, anchors): ''' calc loss function from a certain scale input: feature_map_i: feature maps of a certain scale. shape: [N, 13, 13, 3*(5 + num_class)] etc. y_true: y_ture from a certain scale. shape: [N, 13, 13, 3, 5 + num_class + 1] etc. anchors: shape [9, 2] ''' # size in [h, w] format! don't get messed up! grid_size = tf.shape(feature_map_i)[1:3] # the downscale ratio in height and weight ratio = tf.cast(self.img_size / grid_size, tf.float32) # N: batch_size N = tf.cast(tf.shape(feature_map_i)[0], tf.float32) x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self.reorg_layer(feature_map_i, anchors) ########### # get mask ########### # shape: take 416x416 input image and 13*13 feature_map for example: # [N, 13, 13, 3, 1] object_mask = y_true[..., 4:5] # the calculation of ignore mask if referred from # https://github.com/pjreddie/darknet/blob/master/src/yolo_layer.c#L179 ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True) def loop_cond(idx, ignore_mask): return tf.less(idx, tf.cast(N, tf.int32)) def loop_body(idx, ignore_mask): # shape: [13, 13, 3, 4] & [13, 13, 3] ==> [V, 4] # V: num of true gt box of each image in a batch valid_true_boxes = tf.boolean_mask(y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool')) # shape: [13, 13, 3, 4] & [V, 4] ==> [13, 13, 3, V] iou = self.box_iou(pred_boxes[idx], valid_true_boxes) # shape: [13, 13, 3] best_iou = tf.reduce_max(iou, axis=-1) # shape: [13, 13, 3] ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32) # finally will be shape: [N, 13, 13, 3] ignore_mask = ignore_mask.write(idx, ignore_mask_tmp) return idx + 1, ignore_mask _, ignore_mask = tf.while_loop(cond=loop_cond, body=loop_body, loop_vars=[0, ignore_mask]) ignore_mask = ignore_mask.stack() # shape: [N, 13, 13, 3, 1] ignore_mask = tf.expand_dims(ignore_mask, -1) # shape: [N, 13, 13, 3, 2] pred_box_xy = pred_boxes[..., 0:2] pred_box_wh = pred_boxes[..., 2:4] # get xy coordinates in one cell from the feature_map # numerical range: 0 ~ 1 # shape: [N, 13, 13, 3, 2] true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset pred_xy = pred_box_xy / ratio[::-1] - x_y_offset # get_tw_th # numerical range: 0 ~ 1 # shape: [N, 13, 13, 3, 2] true_tw_th = y_true[..., 2:4] / anchors pred_tw_th = pred_box_wh / anchors # for numerical stability true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0), x=tf.ones_like(true_tw_th), y=true_tw_th) pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0), x=tf.ones_like(pred_tw_th), y=pred_tw_th) true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9)) pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9)) # box size punishment: # box with smaller area has bigger weight. This is taken from the yolo darknet C source code. # shape: [N, 13, 13, 3, 1] box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32)) ############ # loss_part ############ # mix_up weight # [N, 13, 13, 3, 1] mix_w = y_true[..., -1:] # shape: [N, 13, 13, 3, 1] xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale * mix_w) / N wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale * mix_w) / N # shape: [N, 13, 13, 3, 1] conf_pos_mask = object_mask conf_neg_mask = (1 - object_mask) * ignore_mask conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits) conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits) # TODO: may need to balance the pos-neg by multiplying some weights conf_loss = conf_loss_pos + conf_loss_neg if self.use_focal_loss: alpha = 1.0 gamma = 2.0 # TODO: alpha should be a mask array if needed focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf_logits)), gamma) conf_loss *= focal_mask conf_loss = tf.reduce_sum(conf_loss * mix_w) / N # shape: [N, 13, 13, 3, 1] # whether to use label smooth if self.use_label_smooth: delta = 0.01 label_target = (1 - delta) * y_true[..., 5:-1] + delta * 1. / self.class_num else: label_target = y_true[..., 5:-1] class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_target, logits=pred_prob_logits) * mix_w class_loss = tf.reduce_sum(class_loss) / N return xy_loss, wh_loss, conf_loss, class_loss
def build_model(self): with tf.name_scope('inputs'): self.sentences = tf.placeholder(tf.int32, [None, self.max_sentence_len]) self.sentence_lens = tf.placeholder(tf.int32, None) self.sentence_types = tf.placeholder(tf.float32, [None, self.max_sentence_len]) self.sentence_entity_loc_1 = tf.placeholder(tf.int32, None) self.sentence_entity_loc_2 = tf.placeholder(tf.int32, None) self.sentence_entity1 = tf.placeholder(tf.int32, [None, self.max_entity_len]) self.sentence_entity2 = tf.placeholder(tf.int32, [None, self.max_entity_len]) self.labels = tf.placeholder(tf.int32, [None, self.n_class]) self.dropout_keep_prob = tf.placeholder(tf.float32) self.index = tf.placeholder(tf.int32) inputs = tf.nn.embedding_lookup(self.word2vec, self.sentences) inputs = tf.cast(inputs, tf.float32) inputs = tf.nn.dropout(inputs, keep_prob=self.dropout_keep_prob) entity1 = tf.nn.embedding_lookup(self.word2vec, self.sentence_entity1) entity1 = tf.cast(entity1, tf.float32) entity1 = tf.reduce_mean(entity1, 1) entity2 = tf.nn.embedding_lookup(self.word2vec, self.sentence_entity2) entity2 = tf.cast(entity2, tf.float32) entity2 = tf.reduce_mean(entity2, 1) with tf.name_scope('weights'): weights = { 'attention': tf.get_variable( name='W_l', shape=[1, self.n_hidden * 4], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ), 'softmax': tf.get_variable( name='W_r', shape=[self.n_hidden * 12, self.n_class], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ), } with tf.name_scope('biases'): biases = { 'softmax': tf.get_variable( name='B_r', shape=[self.n_class], initializer=tf.zeros_initializer(), regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg) ), } with tf.name_scope('dynamic_rnn'): lstm_cell_fw = tf.contrib.rnn.LSTMCell( self.n_hidden, initializer=tf.orthogonal_initializer(), ) lstm_cell_bw = tf.contrib.rnn.LSTMCell( self.n_hidden, initializer=tf.orthogonal_initializer(), ) outputs, state, _ = tf.nn.static_bidirectional_rnn( lstm_cell_fw, lstm_cell_bw, tf.unstack(tf.transpose(inputs, perm=[1, 0, 2])), sequence_length=self.sentence_lens, dtype=tf.float32, scope='BiLSTM' ) outputs = tf.reshape(tf.concat(outputs, 1), [-1, self.max_sentence_len, self.n_hidden * 2]) batch_size = tf.shape(outputs)[0] adj_input = tf.TensorArray(size=batch_size, dtype=tf.float32) outputs_iter = tf.TensorArray(tf.float32, 1, dynamic_size=True, infer_shape=False) outputs_iter = outputs_iter.unstack(outputs) sentence_entity_loc_1_iter = tf.TensorArray(tf.int32, 1, dynamic_size=True, infer_shape=False) sentence_entity_loc_1_iter = sentence_entity_loc_1_iter.unstack(self.sentence_entity_loc_1) sentence_entity_loc_2_iter = tf.TensorArray(tf.int32, 1, dynamic_size=True, infer_shape=False) sentence_entity_loc_2_iter = sentence_entity_loc_2_iter.unstack(self.sentence_entity_loc_2) def edge_representation(i,adj_input): output = outputs_iter.read(i) entity_loc_1 = sentence_entity_loc_1_iter.read(i) entity_loc_2 = sentence_entity_loc_2_iter.read(i) output_iter = tf.TensorArray(tf.float32, 1, dynamic_size=True, infer_shape=False) output_iter = output_iter.unstack(output) output_iter_ = tf.TensorArray(tf.float32, 1, dynamic_size=True, infer_shape=False) output_iter_ = output_iter_.unstack(output) output_context = tf.TensorArray(size=self.max_sentence_len, dtype=tf.float32) output_word = output_iter_.read(0) entity_pos_1 = tf.expand_dims(entity_loc_1,-1) entity_pos_2 = tf.expand_dims(entity_loc_2,-1) entity_pos_1 = tf.tile(entity_pos_1,[self.n_hidden*2]) entity_pos_2 = tf.tile(entity_pos_2,[self.n_hidden*2]) #print("entity_pos_1"+str(entity_pos_1.shape)) entity1=tf.concat([output_word,tf.cast(tf.subtract(entity_pos_1,entity_pos_2),dtype = tf.float32)], axis = 0) #print("entity_1"+str(entity1.shape)) #print("entity_pos_2"+str(entity_pos_2.shape)) entity2=tf.concat([output_word,tf.cast(tf.subtract(entity_pos_2,entity_pos_1),dtype = tf.float32)], axis = 0) #print("entity_2"+str(entity2.shape)) flag1=0 flag2=0 #print(entity2.shape) for index in range(0,self.max_sentence_len): output_word=output_iter.read(index) entity1=tf.cond(tf.equal((index),tf.to_int32(entity_loc_1)),lambda:tf.stack([output_word,tf.cast(tf.subtract(entity_pos_1,entity_pos_2),dtype = tf.float32)], axis = 0),lambda:entity1) entity2=tf.cond(tf.equal((index),tf.to_int32(entity_loc_2)),lambda:tf.stack([output_word,tf.cast(tf.subtract(entity_pos_2,entity_pos_1),dtype = tf.float32)], axis = 0),lambda:entity2) flag1=tf.cond(tf.equal((index),tf.to_int32(entity_loc_1)),lambda:1,lambda:0) flag2=tf.cond(tf.equal((index),tf.to_int32(entity_loc_2)),lambda:1,lambda:0) output_context = output_context.write(index,tf.concat([output_word,tf.cast(tf.concat([tf.subtract(entity_pos_1,entity_pos_2)],0),dtype = tf.float32)],0)) #if((index+1)==tf.to_int32(entity_loc_1)): # flag1=1 # entity1=tf.stack([output_word,tf.cast(tf.subtract(entity_pos_1,entity_pos_2),dtype = tf.float32)], axis = 0) #elif((index+1)==tf.to_int32(entity_loc_2)): # flag2=1 # entity2=tf.stack([output_word,tf.cast(tf.subtract(entity_pos_2,entity_pos_1),dtype = tf.float32)], axis = 0) #else: # output_context = output_context.write(index - flag1 - flag2,tf.concat([output_word,tf.cast(tf.concat([tf.subtract(entity_pos_1,entity_pos_2)],0),dtype = tf.float32)],0)) #output_context = output_context.write(index - flag1 - flag2,tf.concat([output_word,tf.cast(tf.concat([tf.subtract(index,entity_pos_1),tf.subtract(index,entity_pos_2)],0),dtype = tf.float32)],0)) output_context = output_context.stack() print("output_context "+str(output_context)) output_context = tf.squeeze(output_context) context_final = tf.transpose(output_context,perm = [1,0]) print(context_final.shape) u = tf.matmul(weights['attention'],tf.tanh(context_final)) a = tf.nn.softmax(u) context_representation = tf.matmul(context_final,tf.transpose(a,[1,0])) context_representation = tf.squeeze(context_representation) print("context_representation"+str(context_representation.shape)) print("entity1"+str(entity1.shape)) print("entity2"+str(entity2.shape)) entity_concat=tf.concat([entity1,entity2],axis = 0) entity_concat=tf.reshape(entity_concat,[self.n_hidden*8]) edge = tf.concat([entity_concat,context_representation],axis = 0) adj_input = adj_input.write(i,edge) return (i + 1,adj_input) def condition(i, adj_input): return i < batch_size _, input_final = tf.while_loop(cond=condition, body=edge_representation, loop_vars=(0, adj_input)) self.input_final = tf.reshape(input_final.stack(), [-1, self.n_hidden * 12]) self.predict = tf.matmul(self.input_final, weights['softmax']) + biases['softmax'] with tf.name_scope('loss'): self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.predict, labels = self.labels)) self.global_step = tf.Variable(0, name="tr_global_step", trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost, global_step=self.global_step) with tf.name_scope('predict'): self.predict_label = tf.argmax(self.predict, 1) self.correct_pred = tf.equal(self.predict_label, tf.argmax(self.labels, 1)) self.accuracy = tf.reduce_sum(tf.cast(self.correct_pred, tf.int32)) summary_loss = tf.summary.scalar('loss', self.cost) summary_acc = tf.summary.scalar('acc', self.accuracy) self.train_summary_op = tf.summary.merge([summary_loss, summary_acc]) self.test_summary_op = tf.summary.merge([summary_loss, summary_acc]) _dir = 'logs/' + str(self.timestamp) + '_r' + str(self.learning_rate) + '_b' + str(self.batch_size) + '_l' + str(self.l2_reg) self.train_summary_writer = tf.summary.FileWriter(_dir + '/train', self.sess.graph) self.test_summary_writer = tf.summary.FileWriter(_dir + '/test', self.sess.graph)
def incremental(self, c_encoder, time_length=100, initial_input=None): """ need to be adjusted lynnn :param c_encoder: :param time_length: :param initial_input: :return: """ with tf.variable_scope("Model"): assert c_encoder is not None c_encoder_length = tf.shape(c_encoder)[1] if time_length is None: time_length = c_encoder_length init_time = tf.constant(0, dtype=tf.int32) if initial_input is None: init_input = tf.constant(128, dtype=tf.int32) init_state = tf.zeros([1, self.num_units], dtype=tf.float32) init_outputs_ta = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=False) def condition(times, hidden_state, unused_current_input, outputs_ta): return tf.less(times, time_length) def body(times, state, current_input, outputs_ta): with tf.variable_scope('Scale-Input'): inputs = tf.cast(current_input, dtype=tf.float32) inputs = tf.truediv(inputs, 127.5) - 1.0 inputs = tf.reshape(inputs, [1, 1]) with tf.variable_scope(self.scope): ct = c_encoder[:, times, :] mel_and_input = tf.concat([inputs, ct], axis=1) H = tf.matmul(state, self._weight_internal) + self._bias_internal X = tf.matmul(mel_and_input, self._weight_external) + self._bias_external Hr, Hu, He_ = tf.split(H, 3, axis=1) Xr, Xu, Xe_ = tf.split(X, 3, axis=1) u = tf.nn.sigmoid(Xu + Hu) r = tf.nn.sigmoid(Xr + Hr) candidate = tf.tanh(r * He_ + Xe_) state = state * u + candidate * (1 - u) relu_outputs = self.affine_relu(state) ouput_outputs = self.affine(relu_outputs) sample_int64 = tf.multinomial(ouput_outputs, 1, name='multinomial') sample_int32 = tf.cast(sample_int64[0, 0], tf.int32) sample = tf.Print(sample_int32, [times, output_2], message='Generated') outputs_ta = outputs_ta.write(times, sample) times = times + 1 return times, state, sample, outputs_ta times, state, _, sample_array = tf.while_loop( condition, body, loop_vars=[init_time, init_state, init_input, init_outputs_ta], parallel_iterations=10, swap_memory=self._hparams.swap_with_cpu, name='while') sample_array = sample_array.stack() return sample_array, state
entry_path = os.path.join(test_dir, entryname) if os.path.isfile(entry_path): test_files.append(entry_path) # START: Computaional Graph graph = tf.Graph() with graph.as_default(): # placeholders input_data = tf.placeholder(tf.float32, [None, X]) sequence_length = tf.placeholder(tf.int32) initial_nn_state = tf.nn.rnn_cell.BasicLSTMCell(NN).zero_state( 1, tf.float32) empty_unstacked_inputs = tf.TensorArray(tf.float32, sequence_length) unstacked_inputs = empty_unstacked_inputs.unstack(input_data) outputs_container = tf.TensorArray( tf.float32, sequence_length) # accumelates the step outputs t = tf.constant(0, dtype=tf.int32) _, _, _, _, final_outputs = tf.while_loop( cond=lambda time, *_: time < sequence_length, body=step_op, loop_vars=(t, init_memory(N, W, R), initial_nn_state, unstacked_inputs, outputs_container), parallel_iterations=32, swap_memory=True) # stack the individual steps outputs into a single (sequence_length x Y) tensor stacked_output = final_outputs.stack()
def inference(self, src_input): ''' 将输入src_input根据现有模型翻译 ''' src_size = tf.convert_to_tensor(value=[len(src_input)], dtype=tf.int32) src_input = tf.convert_to_tensor(value=[src_input], dtype=tf.int32) src_emb = tf.nn.embedding_lookup(self.src_embedding, src_input) # 直接执行decoder,取出state with tf.variable_scope('encoder'): enc_outputs, enc_state = tf.nn.dynamic_rnn(self.encode_cell, src_emb, src_size, dtype=tf.float32) with tf.variable_scope('decoder/rnn/multi_rnn_cell'): # 使用一个变长的TensorArray来存储生成的句子 init_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=False) # 填入SOS作为解码器的输入 init_array = init_array.write(0, SOS_ID) # 构造loop的状态变量 init_loop_var = (enc_state, init_array, 0) # 构造循环终止条件 def continue_loop_condition(state, trg_ids, step): ''' 循环条件,如果解码器输出EOS或者达到最大步数,就返回False,否则返回True ''' return tf.reduce_all( tf.logical_and(tf.not_equal(trg_ids.read(step), EOS_ID), tf.less(step, MAX_DECODE_LENGTH - 1))) # 构造循环内容 def loop_body(state, trg_ids, step): ''' 循环内容,decoder模型以state和trg_ids中的输入为输入来进行传播,并且更新状态变量 ''' trg_input = [trg_ids.read(step)] # shape = (batch_size, length, embedding size) trg_emb = tf.nn.embedding_lookup(params=self.trg_embedding, ids=trg_input) # (batch_size, length, HIDDEN_SIZE) dec_outputs, next_state = self.decode_cell(state=state, inputs=trg_emb) # (batch_size * length, HIDDEN_SIZE) outputs = tf.reshape(dec_outputs, [-1, HIDDEN_SIZE]) # (batch_size * length, TRG_VOCAB_SIZE) logits = tf.matmul(outputs, self.softmax_weight) + self.softmax_bias # 取出其中最大的 next_id = tf.argmax(logits, axis=1, output_type=tf.int32) trg_ids = trg_ids.write(step + 1, next_id[0]) return next_state, trg_ids, step + 1 # 执行tf.while_loop, 返回最终状态 state, trg_ids, step = tf.while_loop( cond=continue_loop_condition, # 循环条件 body=loop_body, # 循环body loop_vars=init_loop_var # 循环变量 ) return trg_ids.stack()
but if I turn this into a time major tensor it would be [None, batch_size, N] does this means each batch has the same number of seq len? yes it does... """ inputs = tx.Input(n_units=None, dtype=tf.int32) lookup = tx.Lookup(inputs, seq_size=None, lookup_shape=[N, M]) input_seq = lookup.permute_batch_time() # this is a time major sequence so we can look at the number of elements seq_size = tf.shape(input_seq)[0] ta_input = tf.TensorArray(dtype=input_seq.dtype, size=seq_size, tensor_array_name="input_tensors") ta_input = ta_input.unstack(input_seq) ta_output = tf.TensorArray(dtype=tf.float32, size=seq_size, tensor_array_name="output_tensors") init_vars = (0, ta_output) cond = lambda i, _: tf.less(i, seq_size) def body1(i, y): xt = ta_input.read(i) y = y.write(i, 2 * xt) return i + 1, y
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def map_fn(x): """Internal function to flat_map over. Consumes a batch of input examples and produces a variable number of output examples. Args: x: a single example Returns: a tf.data.Dataset """ partial = empty_example.copy() i = tf.zeros([], dtype=tf.int32) dynamic_batch_size = tf.shape(x[keys[0]])[0] outputs = {} for k in keys: outputs[k] = tf.TensorArray(tf.int32, size=0, dynamic_size=True, element_shape=[length[k]]) outputs[k + '_position'] = tf.TensorArray( tf.int32, size=0, dynamic_size=True, element_shape=[length[k]]) def cond_fn(i, partial, outputs): del partial, outputs return i < dynamic_batch_size def body_fn(i, partial, outputs): """Body function for while_loop. Args: i: integer scalar partial: dictionary of Tensor (partially-constructed example) outputs: dictionary of TensorArray Returns: A triple containing the new values of the inputs. """ can_append = True one_example = {} for k in keys: val = tf.cast(x[k][i], tf.int32) val = val[:tf. reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))] one_example[k] = val for k in keys: can_append = tf.logical_and( can_append, tf.less_equal( tf.size(partial[k]) + tf.size(one_example[k]), length[k])) def false_fn(): return write_packed_example(partial, outputs) def true_fn(): return partial, outputs partial, outputs = tf.cond(can_append, true_fn, false_fn) new_partial = {} for k in keys: new_seq = one_example[k][:length[k]] new_seq_len = tf.size(new_seq) new_partial[k] = tf.concat([partial[k], new_seq], 0) new_partial[k + '_position'] = tf.concat([ partial[k + '_position'], tf.range(new_seq_len, dtype=tf.int32) ], 0) partial = new_partial return i + 1, partial, outputs i, partial, outputs = \ tf.while_loop( cond_fn, body_fn, (i, partial, outputs), shape_invariants=( tf.TensorShape([]), {k: tf.TensorShape([None]) for k in keys_etc}, {k: tf.TensorShape(None) for k in keys_etc}, ) ) partial, outputs = write_packed_example(partial, outputs) packed = {k: outputs[k].stack() for k in keys_etc} for k in keys: packed[k + '_segmentation'] = (tf.cumsum( tf.cast(tf.equal(packed[k + '_position'], 0), tf.int32), axis=1) * tf.cast(tf.not_equal(packed[k], 0), tf.int32)) return packed
def yolo_loss(args, anchors, ignore_thresh=.5,seg_loss_weight=0.1, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting print(args) yolo_outputs = args[:1] att_map=args[1] y_true = args[2:3] gt_map=args[3] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[0,1,2]] ##due to deleting 2 scales change [[6,7,8], [3,4,5], [0,1,2]] to [[0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # x32 is original size grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] #3 degree scales output loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] # true_class_probs = y_true[l][..., 5:] #... ==???? grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) def smooth_L1(y_true, y_pred,sigma=3.0): """ Create a smooth L1 loss functor. Args sigma: This argument defines the point where the loss changes from L2 to L1. Returns A functor for computing the smooth L1 loss given target data and predicted data. """ sigma_squared = sigma ** 2 # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff = y_true - y_pred regression_diff = K.abs(regression_diff) regression_loss = tf.where( K.less(regression_diff, 1.0 / sigma_squared), 0.5 * sigma_squared * K.pow(regression_diff, 2), regression_diff - 0.5 / sigma_squared ) return regression_loss # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * smooth_L1(raw_true_wh,raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask att_loss = K.binary_crossentropy(gt_map, att_map, from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf att_loss = K.sum(att_loss) / mf * 0.5 loss += xy_loss + wh_loss + confidence_loss+att_loss return K.expand_dims(loss, axis=0)
def generate_instances(indices, arch, window_size, max_depth=None, codes_points=None): """Generates matrices holding word indices to be passed to Word2Vec models for each sentence. The shape and contents of output matrices depends on the architecture ('skip_gram', 'cbow') and training algorithm ('negative_sampling' , 'hierarchical_softmax'). It takes as input a list of word indices in a subsampled-sentence, where each word is a target word, and their context words are those within the window centered at a target word. For skip gram architecture, `num_context_words` instances are generated for a target word, and for cbow architecture, a single instance is generated for a target word. If `codes_points` is not None ('hierarchical softmax'), the word to be predicted (context word for 'skip_gram', and target word for 'cbow') are represented by their 'codes' and 'points' in the Huffman tree (See `_build_binary_tree`). Args: indices: rank-1 int tensor, the word indices within a sentence after subsampling. arch: scalar string, architecture ('skip_gram' or 'cbow'). window_size: int scalar, num of words on the left or right side of target word within a window. max_depth: (Optional) int scalar, the max depth of the Huffman tree. codes_points: (Optional) an int tensor of shape [vocab_size, 2*max_depth+1] where each row holds the codes (0-1 binary values) padded to `max_depth`, and points (non-leaf node indices) padded to `max_depth`, of each vocabulary word. The last entry is the true length of code and point (<= `max_depth`). Returns: instances: an int tensor holding word indices, with shape being when arch=='skip_gram', algm=='negative_sampling' shape: [N, 2] when arch=='cbow', algm=='negative_sampling' shape: [N, 2*window_size+2] when arch=='skip_gram', algm=='hierarchical_softmax' shape: [N, 2*max_depth+2] when arch=='cbow', algm='hierarchical_softmax' shape: [N, 2*window_size+2*max_depth+2] """ def per_target_fn(index, init_array): """Generate inputs and labels for each target word. `index` is the index of the target word in `indices`. """ reduced_size = tf.random.uniform([], maxval=window_size, dtype='int32') left = tf.range(tf.maximum(index - window_size + reduced_size, 0), index) right = tf.range( index + 1, tf.minimum(index + 1 + window_size - reduced_size, tf.size(indices))) context = tf.concat([left, right], axis=0) context = tf.gather(indices, context) if arch == 'skip_gram': # replicate `indices[index]` to match the size of `context` # [N, 2] window = tf.stack( [tf.fill(tf.shape(context), indices[index]), context], axis=1) elif arch == 'cbow': true_size = tf.size(context) # pad `context` to length `2 * window_size` window = tf.concat([ tf.pad(context, [[0, 2 * window_size - true_size]]), [true_size, indices[index]] ], axis=0) # [1, 2*window_size + 2] window = tf.expand_dims(window, axis=0) else: raise ValueError('architecture must be skip_gram or cbow.') if codes_points is not None: # [N, 2*max_depth + 2] or [1, 2*window_size+2*max_depth+2] window = tf.concat( [window[:, :-1], tf.gather(codes_points, window[:, -1])], axis=1) return index + 1, init_array.write(index, window) size = tf.size(indices) # initialize a tensor array of length `tf.size(indices)` init_array = tf.TensorArray('int64', size=size, infer_shape=False) _, result_array = tf.while_loop(lambda i, ta: i < size, per_target_fn, [0, init_array], back_prop=False) instances = tf.cast(result_array.concat(), 'int64') if arch == 'skip_gram': if max_depth is None: instances.set_shape([None, 2]) else: instances.set_shape([None, 2 * max_depth + 2]) else: if max_depth is None: instances.set_shape([None, 2 * window_size + 2]) else: instances.set_shape([None, 2 * window_size + 2 * max_depth + 2]) return instances
def _create_ta(s): return tf.TensorArray(dtype=s.dtype, size=num_steps, clear_after_read=clear_after_read, element_shape=tf.TensorShape( [batch_size]).concatenate(s.shape))
def synthesize(self, samples): """ Synthesize acoustic features from the input texts Args: samples: the data source to be synthesized Returns: after_outs: the corresponding synthesized acoustic features attn_weights_stack: the corresponding attention weights """ x0 = samples["input"] input_length = samples["input_length"] batch = tf.shape(x0)[0] encoder_output = self.encoder( x0, training=False) # shape: [batch, x_steps, eunits] if self.hparams.use_speaker: if self.hparams.use_pretrained_speaker_model: # hasattr(self, 'speaker_embedding') must be True here speaker_feature = samples["output"] cut_speaker_feature = self.cut_acoustic_feature(speaker_feature, \ self.hparams.num_frame_for_embedding) speaker_embedding = self.speaker_embedding(cut_speaker_feature) else: speaker_embedding = self.speaker_embedding(samples['speaker']) encoder_output = self.concat_speaker_embedding( encoder_output, speaker_embedding) prev_rnn_states, prev_attn_weight, prev_context = \ self.initialize_states(encoder_output, input_length) context_dim = prev_context.shape[-1] accum_attn_weight = prev_attn_weight outs = tf.TensorArray(tf.float32, size=0, dynamic_size=True) logits = tf.TensorArray(tf.float32, size=0, dynamic_size=True) attn_weights = tf.TensorArray(tf.float32, size=0, dynamic_size=True) out = tf.zeros([batch, self.feat_dim * self.reduction_factor]) max_output_len = self.hparams.max_output_length * input_length[ 0] // self.reduction_factor for y_index in tf.range(max_output_len): out, logit, prev_rnn_states, new_weight, prev_context = \ self.time_propagate(encoder_output, input_length, out, prev_rnn_states, accum_attn_weight, prev_attn_weight, prev_context, training=False) new_weight = tf.ensure_shape(new_weight, [None, None]) prev_context = tf.ensure_shape(prev_context, [None, context_dim]) outs = outs.write(y_index, out) logits = logits.write(y_index, logit) attn_weights = attn_weights.write(y_index, new_weight) prev_attn_weight = new_weight accum_attn_weight += new_weight probs = tf.nn.sigmoid(logit) time_to_end = probs > self.hparams.end_prob time_to_end = tf.reduce_any(time_to_end) if time_to_end: break logits_stack = tf.transpose( logits.stack(), [1, 0, 2]) # [batch, y_steps, reduction_factor] # before_outs: [batch, y_steps, feat_dim*reduction_factor] before_outs = tf.transpose(outs.stack(), [1, 0, 2]) attn_weights_stack = tf.transpose(attn_weights.stack(), [1, 0, 2]) after_outs = self._synthesize_post_net(before_outs, logits_stack) return after_outs, attn_weights_stack
def call(self, samples, training: bool = None): x0 = samples["input"] input_length = samples["input_length"] encoder_output = self.encoder( x0, training=training) # shape: [batch, x_steps, eunits] if self.hparams.use_speaker: if self.hparams.use_pretrained_speaker_model: if hasattr(self, 'speaker_embedding'): speaker_feature = samples["output"] cut_speaker_feature = self.cut_acoustic_feature(speaker_feature, \ self.hparams.num_frame_for_embedding) speaker_embedding = self.speaker_embedding( cut_speaker_feature) else: # for the first time of evaluate_step(not initialize the speaker_embedding model yet) batch = tf.shape(encoder_output)[0] fake_embedding = tf.zeros( [batch, self.hparams.speaker_embedding_dim], dtype=tf.float32) speaker_embedding = fake_embedding else: speaker_embedding = self.speaker_embedding(samples['speaker']) encoder_output = self.concat_speaker_embedding( encoder_output, speaker_embedding) if self.hparams.use_gst: reference_state = self.reference_encoder(samples["output"]) style_embeddings = self.style_attn( tf.tile(tf.expand_dims(self.gst_tokens, axis=0), [tf.shape(encoder_output)[0], 1, 1]), tf.tanh( tf.tile(tf.expand_dims(self.gst_tokens, axis=0), [tf.shape(encoder_output)[0], 1, 1])), tf.expand_dims(reference_state, axis=1), mask=None)[0] style_embeddings = tf.tile(style_embeddings, [1, tf.shape(encoder_output)[1], 1]) encoder_output = tf.concat([encoder_output, style_embeddings], axis=-1) y0 = samples['output'] ori_lens = tf.shape(samples['output'])[1] if self.reduction_factor > 1: y0 = self._pad_and_reshape(samples['output'], ori_lens) y0 = self.initialize_input_y(y0) prev_rnn_states, prev_attn_weight, prev_context = \ self.initialize_states(encoder_output, input_length) context_dim = prev_context.shape[-1] accum_attn_weight = prev_attn_weight outs = tf.TensorArray(tf.float32, size=0, dynamic_size=True) logits = tf.TensorArray(tf.float32, size=0, dynamic_size=True) attn_weights = tf.TensorArray(tf.float32, size=0, dynamic_size=True) y_steps = tf.shape(y0)[1] for y_index in tf.range(y_steps): out, logit, prev_rnn_states, new_weight, prev_context = \ self.time_propagate(encoder_output, input_length, y0[:, y_index, :], prev_rnn_states, accum_attn_weight, prev_attn_weight, prev_context, training=training) new_weight = tf.ensure_shape(new_weight, [None, None]) prev_context = tf.ensure_shape(prev_context, [None, context_dim]) outs = outs.write(y_index, out) logits = logits.write(y_index, logit) attn_weights = attn_weights.write(y_index, new_weight) accum_attn_weight += new_weight prev_attn_weight = new_weight logits_stack = tf.transpose( logits.stack(), [1, 0, 2]) # [batch, y_steps, reduction_factor] logits_stack = self._pad_and_reshape(logits_stack, ori_lens, reverse=True) before_outs = tf.transpose(outs.stack(), [1, 0, 2]) # [batch, y_steps, feat_dim] before_outs = self._pad_and_reshape(before_outs, ori_lens, reverse=True) if self.hparams.clip_outputs: maximum = -self.hparams.clip_max_value - self.hparams.clip_lower_bound_decay maximum = tf.maximum(before_outs, maximum) before_outs = tf.minimum(maximum, self.hparams.clip_max_value) # attn_weights_stack, shape: # [batch, y_steps, x_steps] attn_weights_stack = tf.transpose(attn_weights.stack(), [1, 0, 2]) # after_outs, shape: [batch, y_steps, feat_dim] after_outs = before_outs + self.postnet(before_outs, training=training) if self.hparams.clip_outputs: maximum = -self.hparams.clip_max_value - self.hparams.clip_lower_bound_decay maximum = tf.maximum(after_outs, maximum) after_outs = tf.minimum(maximum, self.hparams.clip_max_value) return before_outs, after_outs, logits_stack, attn_weights_stack
def build_batch_grided_gt(y_true, mask, size, num_classes, dtype, use_tie_breaker): """ convert ground truth for use in loss functions Args: y_true: tf.Tensor[] ground truth [box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box] mask: list of the anchor boxes choresponding to the output, ex. [1, 2, 3] tells this layer to predict only the first 3 anchors in the total. size: the dimensions of this output, for regular, it progresses from 13, to 26, to 52 Return: tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes] """ boxes = tf.cast(y_true['bbox'], dtype) classes = tf.one_hot(tf.cast(y_true['classes'], dtype=tf.int32), depth=num_classes, dtype=dtype) anchors = tf.cast(y_true['best_anchors'], dtype) batches = tf.shape(boxes)[0] num_boxes = tf.shape(boxes)[1] len_masks = tf.shape(mask)[0] full = tf.zeros([batches, size, size, len_masks, num_classes + 4 + 1], dtype=dtype) depth_track = tf.zeros((batches, size, size, len_masks), dtype=tf.int32) x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32) y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32) anchors = tf.repeat(tf.expand_dims(anchors, axis=-1), len_masks, axis=-1) update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True) update = tf.TensorArray(dtype, size=0, dynamic_size=True) const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype) mask = tf.cast(mask, dtype=dtype) i = 0 anchor_id = 0 for batch in range(batches): for box_id in range(num_boxes): if K.all(tf.math.equal(boxes[batch, box_id, 2:4], 0)): continue if K.any(tf.math.less(boxes[batch, box_id, 0:2], 0.0)) or K.any( tf.math.greater_equal(boxes[batch, box_id, 0:2], 1.0)): continue if use_tie_breaker: for anchor_id in range(tf.shape(anchors)[-1]): index = tf.math.equal(anchors[batch, box_id, anchor_id], mask) if K.any(index): p = tf.cast(K.argmax(tf.cast(index, dtype=tf.int32)), dtype=tf.int32) uid = 1 used = depth_track[batch, y[batch, box_id], x[batch, box_id], p] if anchor_id == 0: # write the box to the update list # the boxes output from yolo are for some reason have the x and y indexes swapped for some reason, I am not sure why """peculiar""" update_index = update_index.write( i, [batch, y[batch, box_id], x[batch, box_id], p]) value = K.concatenate([ boxes[batch, box_id], const, classes[batch, box_id] ]) update = update.write(i, value) elif tf.math.equal(used, 2) or tf.math.equal(used, 0): uid = 2 # write the box to the update list # the boxes output from yolo are for some reason have the x and y indexes swapped for some reason, I am not sure why """peculiar""" update_index = update_index.write( i, [batch, y[batch, box_id], x[batch, box_id], p]) value = K.concatenate([ boxes[batch, box_id], const, classes[batch, box_id] ]) update = update.write(i, value) depth_track = tf.tensor_scatter_nd_update( depth_track, [(batch, y[batch, box_id], x[batch, box_id], p)], [uid]) i += 1 else: index = tf.math.equal(anchors[batch, box_id, 0], mask) if K.any(index): # tf.(0, anchors[batch, box_id, 0]) p = tf.cast(K.argmax(tf.cast(index, dtype=tf.int32)), dtype=tf.int32) update_index = update_index.write( i, [batch, y[batch, box_id], x[batch, box_id], p]) value = K.concatenate( [boxes[batch, box_id], const, classes[batch, box_id]]) update = update.write(i, value) i += 1 # if the size of the update list is not 0, do an update, other wise, no boxes and pass an empty grid if tf.math.greater(update_index.size(), 0): update_index = update_index.stack() update = update.stack() full = tf.tensor_scatter_nd_add(full, update_index, update) return full
def build_model(self): print(' {:{length}} : {}'.format('x', self.x, length=12)) layer_count=0 self.convs = [] with tf.name_scope('conv'+str(layer_count+1)): layer = models.RCL(input=self.x, weight_size=self.weight_size[layer_count], pool=self.pool[layer_count], pool_size=self.pool_size[layer_count], num_iter=self.iter[layer_count], nonlinearity=self.nonlinearity, use_dropout=self.use_dropout, keep_prob=self.keep_probs[layer_count], use_batchnorm=self.use_batchnorm, std=self.std) self.convs.append(layer) print(' {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12)) layer_count += 1 # length = self.weight_size[layer_count][-1]#layer.get_layer().get_shape()[2].value mu_init_value = np.zeros([self.cluster_num, length]) sigma_init_value = np.zeros([self.cluster_num, length, length]) pi_init_value = np.ones([cluster_num]) / self.cluster_num self.mu = [tf.Variable(tf.random_normal([length], dtype=tf.float64), name='mu'+str(t)) for t in range(self.cluster_num)] self.sigma = [tf.Variable(tf.random_normal([length,length], dtype=tf.float64), name='sigma'+str(t)) for t in range(self.cluster_num)] self.pi = tf.Variable(tf.multiply(tf.ones([1, self.cluster_num], tf.float64), pi_init_value), trainable=True,name='pi') # force the sum of elements of pi vector to be 1. self.pi_normed = tf.div(tf.maximum(self.pi, 0.0), tf.reduce_sum(tf.maximum(self.pi, 0.0))) ### convs before em for i in range(layer_count, self.em_layers[0]-1): layer = models.RCL(input=layer.get_layer(), weight_size=self.weight_size[layer_count], weight=self.w_masked, biases=self.b, pool=self.pool[layer_count], pool_size=self.pool_size[layer_count], num_iter=self.iter[layer_count], nonlinearity=self.nonlinearity, use_dropout=self.use_dropout, keep_prob=self.keep_probs[layer_count], use_batchnorm=self.use_batchnorm, std=self.std, name='conv'+str(layer_count+1)) self.convs.append(layer) print(' {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12)) layer_count += 1 # ### em self.em_w=[] self.w_mask=[] self.w_masked=[] self.cluster = [] self.max_idx = [] for em in range(len(self.em_layers)): with tf.name_scope('conv'+str(layer_count+1)+'em'): self.em_w.append(tf.Variable( tf.random_normal( self.weight_size[layer_count], stddev=self.std, dtype=tf.float32), name='w' )) if em == 0: gamma_elem = [] Q_elem = [] self.x_batch = tf.reduce_max(self.em_w[-1], axis=[0,1]) - tf.reduce_min(self.em_w[-1], axis=[0,1]) for w in range(self.weight_size[layer_count][-2]): x_pdf = gmm_pdf_log(mu=self.mu, sigma=self.sigma, x=tf.reshape(tf.tile(self.x_batch[w,:],[self.cluster_num]),[self.cluster_num,-1]), #[3, 100] sess=self.sess) pi_pdf = tf.multiply(self.pi_normed, x_pdf) gamma_tmp = tf.reshape(tf.div(pi_pdf, tf.maximum(tf.reduce_sum(pi_pdf),1e-30)), [-1]) gamma_tmp = tf.stop_gradient(gamma_tmp) # fix the value. do not calculate the gradient of this term. gamma_elem.append(gamma_tmp) tmp = tf.reduce_sum(tf.multiply(gamma_tmp, tf.log(pi_pdf+1e-30))) Q_elem.append(tmp) self.Q = tf.reduce_sum(Q_elem) self.Q_summary = tf.summary.scalar("Q", self.Q) self.gamma = tf.stack(gamma_elem) self.cluster.append(tf.cast(tf.argmax(self.gamma, axis=1), dtype=tf.int32)) print(' {:{length}} : {}'.format('cluster', self.cluster[-1], length=12)) i = tf.constant(0) w_mean = tf.TensorArray(dtype=tf.float32, size=self.cluster_num)#tf.constant(0.0, shape=tf.TensorShape([])) cond = lambda i,w_mean : i<self.cluster_num x_batch = tf.reduce_max(self.em_w[-1], axis=[0,1]) - tf.reduce_min(self.em_w[-1], axis=[0,1]) def func(i,w_mean): mean = tf.reduce_mean(tf.boolean_mask(x_batch, tf.equal(self.cluster[-1],i)), axis=[0]) w_mean = w_mean.write(i, mean) return i+1, w_mean i, w_mean = tf.while_loop(cond, func, [i,w_mean]) self.max_idx.append(tf.cast(tf.argmax(w_mean.pack(), axis=0), tf.int32)) print(' {:{length}} : {}'.format('max_idx', self.max_idx[-1], length=12)) else: # em!= 0 self.cluster.append(self.max_idx[-1]) print(' {:{length}} : {}'.format('cluster', self.cluster[-1], length=12)) # i = tf.constant(0) w_mean = tf.TensorArray(dtype=tf.float32, size=self.cluster_num)#tf.constant(0.0, shape=tf.TensorShape([])) cond = lambda i,w_mean : i<self.cluster_num x_batch = tf.reduce_max(self.em_w[-1], axis=[0,1]) - tf.reduce_min(self.em_w[-1], axis=[0,1]) def func(i,w_mean): mean = tf.reduce_mean(tf.boolean_mask(x_batch, tf.equal(self.cluster[-1],i)), axis=[0]) w_mean = w_mean.write(i, mean) return i+1, w_mean i, w_mean_ = tf.while_loop(cond, func, [i,w_mean]) self.max_idx.append(tf.cast(tf.argmax(w_mean_.pack(), axis=0), tf.int32)) print(' {:{length}} : {}'.format('max_idx', self.max_idx[-1], length=12)) i = tf.constant(0) w_mask_array = tf.TensorArray(dtype=tf.float32, size=self.weight_size[layer_count][-1]) cond2 = lambda i,w_mask_array : i<self.weight_size[layer_count][-1] def func2(i, w_mask_array): w_mask_array_column = tf.cast(tf.equal(self.cluster[-1], self.max_idx[-1][i]), dtype=tf.float32) w_mask_array = w_mask_array.write(i, w_mask_array_column) return i+1, w_mask_array i, w_mask_array = tf.while_loop(cond2, func2, [i, w_mask_array]) w_mask_pack = tf.transpose(w_mask_array.pack()) self.w_mask.append(tf.expand_dims(tf.stack([w_mask_pack for i in range(self.weight_size[layer_count][1])]), 0)) self.w_masked.append(tf.multiply(self.em_w[-1], self.w_mask[-1])) # end if-else layer = models.RCL(input=layer.get_layer(), weight_size=self.weight_size[layer_count], weight=self.w_masked[-1], pool=self.pool[layer_count], pool_size=self.pool_size[layer_count], num_iter=self.iter[layer_count], nonlinearity=self.nonlinearity, use_dropout=self.use_dropout, keep_prob=self.keep_probs[layer_count], use_batchnorm=self.use_batchnorm, std=self.std, name='conv'+str(layer_count+1)) self.convs.append(layer) print(' {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12)) layer_count += 1 if layer_count>=len(self.conv): break # end for ### left conv layers for i in range(layer_count, len(self.conv)): layer = models.RCL(input=layer.get_layer(), weight_size=self.weight_size[layer_count], pool=self.pool[layer_count], pool_size=self.pool_size[layer_count], num_iter=self.iter[layer_count], nonlinearity=self.nonlinearity, use_dropout=self.use_dropout, keep_prob=self.keep_probs[layer_count], use_batchnorm=self.use_batchnorm, std=self.std, name='conv'+str(layer_count+1)) self.convs.append(layer) print(' {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12)) layer_count += 1 network = tf.reshape(layer.get_layer(), shape=[-1, self.feed_forwards[0]])# * self.keep_probs[1]]) ### self.flatten = network print(' {:{length}} : {}'.format('flatten', self.flatten, length=12)) if len(self.feed_forwards) == 2: network = models.feedforward(input = network, weight_size=[self.feed_forwards[0], self.feed_forwards[1]], nonlinearity=None, use_dropout = False, use_batchnorm = False, std=self.std, offset=self.offset, scale=self.scale, epsilon=self.epsilon, name='output') self.output = network#.get_layer() self.output_layer = network.get_layer() print(' {:{length}} : {}'.format('feedforward'+str(1), self.output_layer, length=12)) else: self.forwards=[] for f in range(len(self.feed_forwards)-1 -1): if layer_count+1+f in self.em_layers: with tf.name_scope('feedforward'+str(f+1)+'em'): self.em_w.append(tf.Variable( tf.random_normal( [self.feed_forwards[f], self.feed_forwards[f+1]], stddev=self.std, dtype=tf.float32), name='w' )) conv_len1 = self.convs[-1].get_layer().get_shape()[1].value conv_len2 = self.convs[-1].get_layer().get_shape()[2].value if (f==0) and (conv_len1 > 1 or conv_len2>1): self.cluster.append(tf.tile(self.max_idx[-1], [conv_len1*conv_len2])) else: self.cluster.append(self.max_idx[-1]) print(' {:{length}} : {}'.format('cluster', self.cluster[-1], length=12)) # i = tf.constant(0) w_mean = tf.TensorArray(dtype=tf.float32, size=self.cluster_num)#tf.constant(0.0, shape=tf.TensorShape([])) cond = lambda i,w_mean : i<self.cluster_num x_batch = self.em_w[-1] def func(i,w_mean): mean = tf.reduce_mean(tf.boolean_mask(x_batch, tf.equal(self.cluster[-1],i)), axis=[0]) w_mean = w_mean.write(i, mean) return i+1, w_mean i, w_mean = tf.while_loop(cond, func, [i,w_mean]) self.max_idx.append(tf.cast(tf.argmax(w_mean.pack(), axis=0), tf.int32)) print(' {:{length}} : {}'.format('max_idx', self.max_idx[-1], length=12)) # i = tf.constant(0) w_mask_array = tf.TensorArray(dtype=tf.float32, size=self.feed_forwards[f+1]) cond2 = lambda i,w_mask_array : i<self.feed_forwards[f+1] def func2(i, w_mask_array): w_mask_array_column = tf.cast(tf.equal(self.cluster[-1], self.max_idx[-1][i]), dtype=tf.float32) w_mask_array = w_mask_array.write(i, w_mask_array_column) return i+1, w_mask_array i, w_mask_array = tf.while_loop(cond2, func2, [i, w_mask_array]) w_mask_pack = tf.transpose(w_mask_array.pack()) self.w_mask.append(w_mask_pack) self.w_masked.append(tf.multiply(self.em_w[-1], self.w_mask[-1])) ### network = models.feedforward(input = network, weight_size=[self.feed_forwards[f], self.feed_forwards[f+1]], weight=self.w_masked[-1], nonlinearity=self.nonlinearity, use_dropout = self.use_dropout, keep_prob = self.keep_probs[len(self.conv)+f], use_batchnorm = self.use_batchnorm, std=self.std, offset=self.offset, scale=self.scale, epsilon=self.epsilon, name='forward'+str(f+1)) self.forwards.append(network) network = network.get_layer() layer_count += 1 print(' {:{length}} : {}'.format('feedforward'+str(f+1), network, length=12)) else: network = models.feedforward(input = network, weight_size=[self.feed_forwards[f], self.feed_forwards[f+1]], nonlinearity=self.nonlinearity, use_dropout = self.use_dropout, keep_prob = self.keep_probs[len(self.conv)+f], use_batchnorm = self.use_batchnorm, std=self.std, offset=self.offset, scale=self.scale, epsilon=self.epsilon, name='forward'+str(f+1)) self.forwards.append(network) network = network.get_layer() layer_count += 1 print(' {:{length}} : {}'.format('feedforward'+str(f+1), network, length=12)) # network = models.feedforward(input = network, weight_size=[self.feed_forwards[-2], self.feed_forwards[-1]], nonlinearity=None, use_dropout = False, use_batchnorm = False, std=self.std, offset=self.offset, scale=self.scale, epsilon=self.epsilon, name='output') self.output = network#.get_layer() self.output_layer = network.get_layer() print(' {:{length}} : {}'.format('feedforward'+str(f+2), self.output_layer, length=12))
def build_network(d): # Define hyperparameters d = d learning_rate = 2e-5 l2norm_scaling = 1e-10 global_norm_gradient_clipping_ratio = 0.65 # Placeholder for answers to the decision problems (one per problem) subgraph_exists = tf.placeholder(tf.float32, shape=(None, ), name='subgraph_exists') # Placeholders for the list of number of vertices per instance n_vertices = tf.placeholder(tf.int32, shape=(None, ), name='n_vertices') # Placeholder for the adjacency matrix connecting each edge to its source and target vertices VV_matrix = tf.placeholder(tf.float32, shape=(None, None), name="VV") # Placeholder for the column matrix of edge weights vertice_weight = tf.placeholder(tf.float32, shape=(None, 1), name="vertice_weight") # Placeholder for the number of timesteps the GNN is to run for time_steps = tf.placeholder(tf.int32, shape=(), name="time_steps") # All edges embeddings are initialized with the same value, which is a trained parameter learned by the network total_n = tf.shape(VV_matrix)[1] v_init = tf.get_variable(initializer=tf.random_normal((1, d)), dtype=tf.float32, name='V_init') vertex_initial_embeddings = tf.tile( tf.div(v_init, tf.sqrt(tf.cast(d, tf.float32))), [total_n, 1]) # Define GNN dictionary GNN = {} # Configure GNN gnn = TGN({ 'V': d, }, {'VV': ('V', 'V')}, { 'V_msg_V': ('V', 'V'), }, { 'V': [{ 'mat': 'VV', 'msg': 'V_msg_V', 'var': 'V' }], }, name='SUBGRAPH') # Populate GNN dictionary GNN['gnn'] = gnn GNN['subgraph_exists'] = subgraph_exists GNN['n_vertices'] = n_vertices GNN['VV'] = VV_matrix GNN['W'] = vertice_weight GNN['time_steps'] = time_steps # Define V_vote, which will compute one logit for each vertice # <--- André: The network witch asks each node if it thinks its part of the subgraph right? V_vote_MLP = Mlp(layer_sizes=[d for _ in range(3)], activations=[tf.nn.relu for _ in range(3)], output_size=1, name='E_vote', name_internal_layers=True, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.zeros_initializer()) # Get the last embeddings last_states = gnn({ "VV": VV_matrix, 'W': vertice_weight }, {"V": vertex_initial_embeddings}, time_steps=time_steps) GNN["last_states"] = last_states V_n = last_states['V'].h # Compute a vote for each embedding #E_vote = tf.reshape(E_vote_MLP( tf.concat([E_n,target_cost],axis=1) ), [-1]) V_vote = tf.reshape(V_vote_MLP(V_n), [-1]) # Compute the number of problems in the batch num_problems = tf.shape(n_vertices)[0] # Compute a logit probability for each problem <- I'll look into this pred_logits = tf.while_loop( lambda i, pred_logits: tf.less(i, num_problems), lambda i, pred_logits: ((i + 1), pred_logits.write( i, tf.reduce_mean(V_vote[tf.reduce_sum(n_vertices[ 0:i]):tf.reduce_sum(n_vertices[0:i]) + n_vertices[i]]))), [0, tf.TensorArray(size=num_problems, dtype=tf.float32)])[1].stack() # Convert logits into probabilities GNN['predictions'] = tf.sigmoid(pred_logits) # Compute True Positives, False Positives, True Negatives, False Negatives, accuracy GNN['TP'] = tf.reduce_sum( tf.multiply( subgraph_exists, tf.cast(tf.equal(subgraph_exists, tf.round(GNN['predictions'])), tf.float32))) GNN['FP'] = tf.reduce_sum( tf.multiply( subgraph_exists, tf.cast( tf.not_equal(subgraph_exists, tf.round(GNN['predictions'])), tf.float32))) GNN['TN'] = tf.reduce_sum( tf.multiply( tf.ones_like(subgraph_exists) - subgraph_exists, tf.cast(tf.equal(subgraph_exists, tf.round(GNN['predictions'])), tf.float32))) GNN['FN'] = tf.reduce_sum( tf.multiply( tf.ones_like(subgraph_exists) - subgraph_exists, tf.cast( tf.not_equal(subgraph_exists, tf.round(GNN['predictions'])), tf.float32))) GNN['acc'] = tf.reduce_mean( tf.cast(tf.equal(subgraph_exists, tf.round(GNN['predictions'])), tf.float32)) # Define loss GNN['loss'] = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=subgraph_exists, logits=pred_logits)) # Define optimizer optimizer = tf.train.AdamOptimizer(name='Adam', learning_rate=learning_rate) # Compute cost relative to L2 normalization vars_cost = tf.add_n( [tf.nn.l2_loss(var) for var in tf.trainable_variables()]) # Define gradients and train step grads, _ = tf.clip_by_global_norm( tf.gradients(GNN['loss'] + tf.multiply(vars_cost, l2norm_scaling), tf.trainable_variables()), global_norm_gradient_clipping_ratio) GNN['train_step'] = optimizer.apply_gradients( zip(grads, tf.trainable_variables())) # Return GNN dictionary return GNN
def encode_input(self, encoder_inp, seq_len): """Run the encoder on gives input. Args: encoder_inp: Input IDs that are time major i.e. TxB. These IDs are first passed through embedding layer before feeding to first LSTM layer. seq_len: Actual length of input time sequences. Returns: attention_states: Final encoder output for every input timestep. This tensor is used by attention-enabled decoders. final_state: Final state of encoder LSTM """ with variable_scope.variable_scope("encoder"): comb_encoder_inputs = None embedding = {} # Necessary to sort so that the order of encoder_inputs is # maintained for idx, key in enumerate(sorted(encoder_inp.iterkeys())): print(key) if key == "speech_frames": continue elif key == "word_dur": cur_inputs = encoder_inp[key] # No embedding for word duration - so just extend dim. cur_inputs = tf.expand_dims(cur_inputs, -1) else: embedding[key] = variable_scope.get_variable( "emb_" + key, [self.vocab_size[key], self.embedding_size[key]]) cur_inputs = embedding_ops.embedding_lookup( embedding[key], encoder_inp[key]) if comb_encoder_inputs is None: comb_encoder_inputs = cur_inputs else: comb_encoder_inputs = tf.concat( [comb_encoder_inputs, cur_inputs], 2) if "speech_frames" in encoder_inp: cnn_outputs = [] max_words = tf.reduce_max(seq_len) for i, filter_size in enumerate(self.filter_sizes): acoustic_input_ta = tf.TensorArray(size=0, dtype=tf.float32, dynamic_size=True) acoustic_input_ta = acoustic_input_ta.unstack( encoder_inp["speech_frames"]) cur_filter_size_output_array = tf.TensorArray( size=0, dtype=tf.float32, dynamic_size=True) _, _, cur_filter_size_output = tf.while_loop( cond=lambda time_idx, a_t, _: time_idx < max_words, body=self._cnn_word_process(filter_size), loop_vars=(tf.constant(0), acoustic_input_ta, cur_filter_size_output_array)) # Convert the TensorArray to Tensor cur_filter_size_output = cur_filter_size_output.stack() cnn_outputs.append(cur_filter_size_output) # T * B * filter_sizes * 1 * num_filters cnn_features = tf.concat(cnn_outputs, 2) num_filters_total = self.num_filters * len(self.filter_sizes) time_dim = array_ops.shape(cnn_features)[0] batch_size = array_ops.shape(cnn_features)[1] cnn_features = tf.reshape( cnn_features, array_ops.stack([time_dim, batch_size, num_filters_total])) comb_encoder_inputs = tf.concat( [comb_encoder_inputs, cnn_features], 2) encoder_outputs, encoder_state = rnn.dynamic_rnn( self.cell, comb_encoder_inputs, sequence_length=seq_len, dtype=tf.float32, time_major=True) # Make the attention states batch major attention_states = tf.transpose(encoder_outputs, [1, 0, 2]) return attention_states, encoder_state
def __init__(self, batch_size, hidden_size, embedding_dim, dropout_rate, grad_clip, initial_learning_rate, mode='train'): # inputs self.inputs_embedded_q = tf.placeholder( tf.float32, shape=[batch_size, None, embedding_dim], name='inputs_embedded_q') self.inputs_actual_length_q = tf.placeholder( tf.int32, [batch_size], name='inputs_actual_length') # 每句输入的实际长度,除了padding self.inputs_embedded_concat_p = tf.placeholder( tf.float32, shape=[batch_size, None, embedding_dim], name='inputs_embedded_concat_p' ) # 干脆先全部都concat吧;不对,这样这里还要padding;还是原来的搞,切片吧 self.inputs_actual_length_concat_p = tf.placeholder( tf.int32, [batch_size], name='inputs_actual_length_concat_p') # passage ranking self.passage_numbers = tf.placeholder(tf.int32, [batch_size], name='passage_numbers') self.passage_word_numbers = tf.placeholder(tf.int32, [batch_size, None], name='passage_word_numbers') # 还有char也没搞;我看也可以用现成的 # targets if mode != 'test': self.start_position = tf.placeholder(tf.int32, [batch_size]) self.end_position = tf.placeholder(tf.int32, [batch_size]) # self.y_1 = tf.placeholder(tf.float32, [None]) # self.y_2 = tf.placeholder(tf.float32, [None]) # passage_ranking self.passage_y = tf.placeholder( tf.int32, [batch_size, None], name='passage_y') # 也需要padding;实际长度在上面 with tf.variable_scope("q_encoder", reuse=tf.AUTO_REUSE): fcell_q = tf.nn.rnn_cell.GRUCell(hidden_size) bcell_q = tf.nn.rnn_cell.GRUCell(hidden_size) fcell_q = tf.contrib.rnn.DropoutWrapper( fcell_q, output_keep_prob=1 - dropout_rate) # 有3个dropout,应该用哪个?? bcell_q = tf.contrib.rnn.DropoutWrapper(bcell_q, output_keep_prob=1 - dropout_rate) (fw_outputs_q, bw_outputs_q), (fw_final_state_q, bw_final_state_q) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=fcell_q, cell_bw=bcell_q, inputs=self.inputs_embedded_q, sequence_length=self.inputs_actual_length_q, dtype=tf.float32) u_q = tf.concat((fw_outputs_q, bw_outputs_q), 2) # print u_q # 输出是root,root_1的;但前面embedding是共享的 with tf.variable_scope("p_encoder", reuse=tf.AUTO_REUSE): fcell_p = tf.nn.rnn_cell.GRUCell(hidden_size) bcell_p = tf.nn.rnn_cell.GRUCell(hidden_size) fcell_p = tf.contrib.rnn.DropoutWrapper( fcell_p, output_keep_prob=1 - dropout_rate) # 有3个dropout,应该用哪个?? bcell_p = tf.contrib.rnn.DropoutWrapper(bcell_p, output_keep_prob=1 - dropout_rate) def p_encoder_one_p(j, start, end, inputs_embedded_concat_p_i, p_w_num_i, fw_p_i, bw_p_i): inputs_embedded_p = tf.expand_dims( inputs_embedded_concat_p_i[start:end, :], 0) p_w_num = tf.expand_dims(p_w_num_i[j], 0) with tf.variable_scope("p_encoder_one_p", reuse=tf.AUTO_REUSE): (fw_outputs_p, bw_outputs_p), (fw_final_state_p, bw_final_state_p) = \ tf.nn.bidirectional_dynamic_rnn(cell_fw=fcell_p, cell_bw=bcell_p, inputs=inputs_embedded_p, sequence_length=p_w_num, dtype=tf.float32) # 其实是不是一样的呢,反正也都用;可能还是不太一样,一篇文章第一个词不依赖上一篇文章最后词 fw_p_i.write(j, fw_outputs_p) bw_p_i.write(j, bw_outputs_p) start = end j = tf.add(j, 1) end = p_w_num_i[j] return j, start, end, inputs_embedded_concat_p_i, p_w_num_i, fw_p_i, bw_p_i def p_encoder_one_q(i, fw_p_b, bw_p_b): j = tf.constant(0) p_w_num_i = self.passage_word_numbers[i] start = tf.constant(0) end = p_w_num_i[0] inputs_embedded_concat_p_i = self.inputs_embedded_concat_p[i] p_num_i = self.passage_numbers[i] fw_p_i = tf.TensorArray(dtype=tf.float32, size=p_num_i) bw_p_i = tf.TensorArray(dtype=tf.float32, size=p_num_i) c = lambda x, y, z, m, n, p, q: tf.less(x, p_num_i) b = lambda x, y, z, m, n, p, q: p_encoder_one_p( x, y, z, m, n, p, q) u_p_i_res = tf.while_loop( cond=c, body=b, loop_vars=(j, start, end, inputs_embedded_concat_p_i, p_w_num_i, fw_p_i, bw_p_i)) fw_p_i = u_p_i_res[-2].stack() bw_p_i = u_p_i_res[-1].stack() # print 'fw_p_i, bw_p_i', fw_p_i, bw_p_i fw_p_i = tf.reshape(fw_p_i, shape=[-1, hidden_size]) # 就是降了一维 bw_p_i = tf.reshape(bw_p_i, shape=[-1, hidden_size]) # 就是降了一维 # print 'fw_p_i, bw_p_i', fw_p_i, bw_p_i fw_p_b.write(i, fw_p_i) bw_p_b.write(i, bw_p_i) i = tf.add(i, 1) return i, fw_p_b, bw_p_b i = tf.constant(0) fw_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size) bw_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size) c = lambda x, y, z: tf.less(x, batch_size ) # 不用调,切第一维即可;不对,关键每个batch的切法不同;还是分开吧 b = lambda x, y, z: p_encoder_one_q(x, y, z) u_p_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, fw_p_b, bw_p_b)) fw_p = u_p_b_res[-2].stack() bw_p = u_p_b_res[-2].stack() # print 'fw_p, bw_p', fw_p, bw_p u_p = tf.concat((fw_p, bw_p), 2) # print 'u_p', u_p # 要把它弄成和原来一样的形状,回头要分再切片即可 with tf.variable_scope("q_p_attention", reuse=tf.AUTO_REUSE): w_q_u = tf.get_variable(name='w_q_u', shape=[hidden_size * 2, hidden_size * 2]) w_p_u = tf.get_variable(name='w_p_u', shape=[hidden_size * 2, hidden_size * 2]) # w_p_v = tf.get_variable(name='w_p_v', shape=[hidden_size*2, hidden_size*2]) v = tf.get_variable(name='v', shape=[hidden_size * 2, 1]) w_g = tf.get_variable(name='w_g', shape=[hidden_size * 4, hidden_size * 4]) # 这里是又拼接了一把的 cell_v = tf.nn.rnn_cell.GRUCell(hidden_size * 2) # passage中第t个词的attention def attention_step(t, q_i, p_i, len_q_i, state, v_p_p): p_i_t = tf.reshape(p_i[t], [1, -1]) # !!注意可用-1,怎么忘了;变1行 q_i_t = tf.slice(q_i, begin=[0, 0], size=[len_q_i, hidden_size * 2]) # 哦是为了去掉padding的部分 # sum_t = tf.matmul(w_q_u, q_i_t) + tf.matmul(w_p_u, p_i_t) # 是可以的!!! # + tf.matmul(w_p_v, tf.transpose(v_p_t_1) # 看看加不加 sum_t = tf.matmul(q_i_t, w_q_u) + tf.matmul( p_i_t, w_p_u) # 少一点转置,减少计算量吧 # print sum_t # (?,150) s_t = tf.matmul(tf.tanh(sum_t), v) # 列向量,问题长 # print s_t # (?,1),?应该最后填了是150 a_t = tf.nn.softmax(s_t) a_t = tf.reshape(a_t, [-1, 1]) c_q_t = tf.transpose(tf.matmul(q_i_t, a_t)) # 行向量 # print 'c_q_t', c_q_t # (1,?),同样应150 p_c = tf.concat([p_i_t, c_q_t], axis=1) # 行向量, 维度 hidden_size*4 g_t = tf.nn.sigmoid(tf.matmul(p_c, w_g)) # 维度 hidden_size*4,行向量 # print p_c, g_t # (1,?), 应300;(1,300) # 方法:用门的输出向量按元素乘以我们需要控制的那个向量 原理:门的输出是 0到1 之间的实数向量, p_c_gated = g_t * p_c # 应该直接乘就行 # print p_c_gated # 行向量,(1,300) out, next_state = cell_v(inputs=p_c_gated, state=state) # out和state一样?? # print 'state', state # print 'out', out v_p_p = v_p_p.write(t, out) # 这块,看看咋分开 t = tf.add(t, 1) return t, q_i, p_i, len_q_i, state, v_p_p # 就是i,t->i,j,t def atention_one_p(j, q_i, p_i, len_q_i, p_w_num_i, v_p_q): state = cell_v.zero_state(batch_size=1, dtype=tf.float32) # 不对,双向的;? p_w_num_i_j = p_w_num_i[j] v_p_p = tf.TensorArray(dtype=tf.float32, size=p_w_num_i_j) t = tf.constant(0) c = lambda a, x, y, z, s, u: tf.less(a, p_w_num_i_j) b = lambda a, x, y, z, s, u: attention_step(a, x, y, z, s, u) v_p_p_res = tf.while_loop(cond=c, body=b, loop_vars=(t, q_i, p_i, len_q_i, state, v_p_p)) v_p_p = v_p_p_res[-1].stack() # print 'v_p_p', v_p_p v_p_q.write(j, v_p_p) return j, q_i, p_i, len_q_i, p_w_num_i, v_p_q # 整个passage的attention def atention_one_q(i, v_p_b): p_i = u_p[i] # 一个question q_i = u_q[i] # 对应的passage len_q_i = self.inputs_actual_length_q[i] # print state j = tf.constant(0) p_num_i = self.passage_numbers[i] p_w_num_i = self.passage_word_numbers[i] v_p_q = tf.TensorArray(dtype=tf.float32, size=p_num_i) c = lambda a, x, y, z, s, u: tf.less(a, p_num_i) b = lambda a, x, y, z, s, u: atention_one_p(a, x, y, z, s, u) v_p_q_res = tf.while_loop(cond=c, body=b, loop_vars=(j, q_i, p_i, len_q_i, p_w_num_i, v_p_q)) v_p_q = v_p_q_res[-1].stack() # print 'v_p_q', v_p_q v_p_q = tf.reshape(v_p_q, shape=[-1, hidden_size * 2]) # 应该是什么shape? # print 'v_p_q', v_p_q v_p_b.write(i, v_p_q) # print 'temp', temp i = tf.add(i, 1) return i, v_p_b v_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size) # 存放batch中每条的结果 c = lambda x, y: tf.less(x, batch_size) # batch的循环 b = lambda x, y: atention_one_q(x, y) i = tf.constant(0) # batch号 v_p_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, v_p_b)) # 这个会先不循环,而后面用for则会循环 v_p = v_p_b_res[-1].stack() # 是v_p;应就是把多个array拼成高一维的一个array # print 'v_p', v_p # with tf.variable_scope("self-matching"): # 这里s_net似乎删掉了r_net的self-matching部分 with tf.variable_scope("output_layer", reuse=tf.AUTO_REUSE): # 先算h的初始状态r_q # 算a,p,c # 用c做输入,下个h with tf.variable_scope("intial_state", reuse=tf.AUTO_REUSE): w_u_q = tf.get_variable( name='w_u_q', shape=[hidden_size * 2, hidden_size * 2]) w_q_v = tf.get_variable( name='w_q_v', shape=[hidden_size * 2, hidden_size * 2]) v_q_r = tf.get_variable(name='v_q_r', shape=[1, hidden_size * 2 ]) # 好像是向量吧,难道是矩阵?? v2 = tf.get_variable(name='v2', shape=[hidden_size * 2, 1]) def attention_r_q(i, r_q_b): q_i = u_q[i] # print 'q_i', q_i len_q_i = self.inputs_actual_length_q[i] q_i = tf.slice(q_i, begin=[0, 0], size=[len_q_i, hidden_size * 2 ]) # 直接列表那样也可以。像下文那样;先不改了吧 # print 'q_i', q_i sum_q = tf.matmul(q_i, w_u_q) + tf.matmul(v_q_r, w_q_v) # print sum_q s = tf.matmul(tf.tanh(sum_q), v2) # print s a = tf.nn.softmax(s) a = tf.reshape(a, [-1, 1]) r_q_i = tf.transpose(tf.matmul(tf.transpose(q_i), a)) # 还是转成行向量 # print 'r_q_i', r_q_i # 应该还是hidden*2 r_q_b.write(i, r_q_i) i = tf.add(i, 1) return i, r_q_b r_q_b = tf.TensorArray(dtype=tf.float32, size=batch_size) c = lambda x, y: tf.less(x, batch_size) # batch的循环 b = lambda x, y: attention_r_q(x, y) r_q_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, r_q_b)) r_q = r_q_b_res[-1].stack() # print 'r_q', r_q # 哦没squeeze,[b, 1, ] with tf.variable_scope("answer_recurrent_network", reuse=tf.AUTO_REUSE): w_p_h = tf.get_variable( shape=[hidden_size * 2, hidden_size * 2], name="w_p_h_s") w_a_h = tf.get_variable( shape=[hidden_size * 2, hidden_size * 2], name="w_a_h_s") v4 = tf.get_variable(shape=[hidden_size * 2, 1], name="v4") cell_h = tf.nn.rnn_cell.GRUCell(hidden_size * 2) def pointers(i, p_1_b, p_2_b, a_1_b, a_2_b): p_i = v_p[i] len_p_i = self.inputs_actual_length_concat_p[i] p_i_t = tf.slice(p_i, begin=[0, 0], size=[len_p_i, hidden_size * 2]) # t就是取1,2,开头和结尾,见论文损失那里的下标 # start, t=1 h_a_1 = r_q[i] # 初始状态 sum_1 = tf.matmul(p_i_t, w_p_h) + tf.matmul(h_a_1, w_a_h) s_1 = tf.matmul(tf.tanh(sum_1), v4) # 列向量,passasge长N a_1 = tf.nn.softmax(s_1) a_1 = tf.reshape(a_1, [-1, 1]) a_1_b.write(i, tf.transpose(a_1)) # 还是转行向量 c_1 = tf.transpose(tf.matmul(p_i_t, a_1)) # 行向量 c_1 = tf.reshape(c_1, [1, hidden_size * 2]) # 必须这样固定 h_a_1 = tf.reshape(h_a_1, [1, hidden_size * 2]) # print 'c_1', c_1 # (1,?),同样应150 # print 'h_a_1', h_a_1 h_a_2, state = cell_h(inputs=c_1, state=h_a_1) p_1 = tf.argmax(a_1) p_1_b.write(i, p_1) # end,t=2 sum_2 = tf.matmul(p_i_t, w_p_h) + tf.matmul(h_a_2, w_a_h) s_2 = tf.matmul(tf.tanh(sum_2), v4) # 列向量,passasge长N a_2 = tf.nn.softmax(s_2) a_2 = tf.reshape(a_2, [-1, 1]) a_2_b.write(i, tf.transpose(a_2)) p_2 = tf.argmax(a_2) p_2_b.write(i, p_2) i = tf.add(i, 1) return i, p_1_b, p_2_b, a_1_b, a_2_b p_1_b = tf.TensorArray(dtype=tf.int32, size=batch_size) p_2_b = tf.TensorArray(dtype=tf.int32, size=batch_size) a_1_b = tf.TensorArray(dtype=tf.float32, size=batch_size) a_2_b = tf.TensorArray(dtype=tf.float32, size=batch_size) c = lambda x, y, z, m, n: tf.less(x, batch_size) # batch的循环 b = lambda x, y, z, m, n: pointers(x, y, z, m, n) b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, p_1_b, p_2_b, a_1_b, a_2_b)) p_1 = b_res[1].stack() p_2 = b_res[2].stack() # print 'p_1', p_1 # print 'p_2', p_2 a_1 = b_res[3].stack() a_2 = b_res[4].stack() # print 'a_1', a_1 # print 'a_2', a_2 # self.p = [tf.reshape(p_1, [1, -1]), tf.reshape(p_2, [1, -1])] self.p1 = tf.reshape(p_1, [1, -1]) self.p2 = tf.reshape(p_2, [1, -1]) a = [tf.reshape(a_1, [1, -1]), tf.reshape(a_2, [1, -1])] # print p, a with tf.variable_scope("passage_ranking", reuse=tf.AUTO_REUSE): w_v_q = tf.get_variable(name='w_v_q', shape=[hidden_size * 2, hidden_size * 2]) w_v_p = tf.get_variable(name='w_v_p', shape=[hidden_size * 2, hidden_size * 2]) v3 = tf.get_variable(name='v3', shape=[hidden_size * 2, 1]) v_g = tf.get_variable(name='v_g', shape=[hidden_size * 2, 1]) w_g_2 = tf.get_variable(name='w_g_2', shape=[hidden_size * 2, hidden_size * 2]) def attention_r_p_one_passage(j, start, end, v_p_i, r_q_i, p_w_num_i, r_p_i): v_p_i_j = v_p_i[start:end, :] # print 'v_p_i_j', v_p_i_j sum_p = tf.matmul(v_p_i_j, w_v_p) + tf.matmul(r_q_i, w_v_q) # print 'sum_p', sum_p # [p_w_n, hidden*2] s = tf.matmul(tf.tanh(sum_p), v3) # print 's', s # [p_w_n, 1] a = tf.nn.softmax(s) # print 'a', a r_p_i_j = tf.transpose(tf.matmul(tf.transpose(v_p_i_j), a)) # 还是转成行向量 # print 'r_p_i_j', r_p_i_j # [1, hidden*2] r_p_i.write(j, r_p_i_j) start = p_w_num_i[j] j = tf.add(j, 1) end = p_w_num_i[j] return j, start, end, v_p_i, r_q_i, p_w_num_i, r_p_i def attention_r_p(i, r_p_b): v_p_i = v_p[i] # 主要是这里了,要分开成不同文章搞 # print 'v_p_i', v_p_i, v_p_i[:self.passage_word_numbers[i][0],:] r_q_i = r_q[i] p_num_i = self.passage_numbers[i] r_p_i = tf.TensorArray(dtype=tf.float32, size=p_num_i) # 这个竟然可以!!! # print r_p_i j = tf.constant(0) start = tf.constant(0) p_w_num_i = self.passage_word_numbers[i] end = p_w_num_i[0] c = lambda x, y, z, m, n, p, q: tf.less(x, p_num_i) b = lambda x, y, z, m, n, p, q: attention_r_p_one_passage( x, y, z, m, n, p, q) res = tf.while_loop(cond=c, body=b, loop_vars=(j, start, end, v_p_i, r_q_i, p_w_num_i, r_p_i)) r_p_i = tf.squeeze(res[-1].stack(), axis=1) # print 'r_p_i', r_p_i r_p_b.write(i, r_p_i) i = tf.add(i, 1) return i, r_p_b r_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size) c = lambda x, y: tf.less(x, batch_size) # batch的循环 b = lambda x, y: attention_r_p(x, y) r_p_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, r_p_b)) r_p = r_p_b_res[-1].stack() # print 'r_p', r_p def compute_g_b_one_passage(j, r_q_i, r_p_i, g_i): r_p_i_j = tf.reshape(r_p_i[j], shape=[1, -1]) # print r_q_i, r_p_i_j r_p_q = tf.concat([r_q_i, r_p_i_j], axis=1) mul_g = tf.matmul(r_p_q, w_g_2) g_j = tf.matmul(tf.tanh(mul_g), v_g) # 这个是数 # print 'g_j', g_j g_i.write(j, g_j) j = tf.add(j, 1) return j, r_q_i, r_p_i, g_i def compute_g_b(i, g_b): r_q_i = r_q[i] r_p_i = r_p[i] p_num_i = self.passage_numbers[i] j = tf.constant(0) g_i = tf.TensorArray(dtype=tf.float32, size=p_num_i) c = lambda x, y, z, m: tf.less(x, p_num_i) b = lambda x, y, z, m: compute_g_b_one_passage(x, y, z, m) res = tf.while_loop(cond=c, body=b, loop_vars=(j, r_q_i, r_p_i, g_i)) g_i = tf.squeeze(res[-1].stack(), axis=1) # 向量 # print 'g_i', g_i # 要归一化一下,再加到g_b里 g_i = tf.nn.softmax(g_i) g_b.write(i, g_i) i = tf.add(i, 1) return i, g_b # 还得按batch g_b = tf.TensorArray(dtype=tf.float32, size=batch_size) c = lambda x, y: tf.less(x, batch_size) # batch的循环 b = lambda x, y: compute_g_b(x, y) g_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, g_b)) g = tf.squeeze(g_b_res[-1].stack(), axis=2) # print 'g', g if mode == 'train': with tf.variable_scope("loss"): # 不对,train中要的不是p,是a # 通过两个位置先搞个y出来 # 长度此时还未定,似乎要用lambda?? def write_y(j, pos, y): if j != pos: y.write(j, 0) else: y.write(j, 1) return j, pos, y def to_one_hot(i, y1_b, y2_b): len_p_i = self.inputs_actual_length_concat_p[i] start = self.start_position[i] end = self.end_position[i] y1 = tf.TensorArray(dtype=tf.float32, size=len_p_i) y2 = tf.TensorArray(dtype=tf.float32, size=len_p_i) c = lambda x, y, z: tf.less(x, len_p_i) # batch的循环 b = lambda x, y, z: write_y(x, y, z) j = tf.constant(0) # batch号 y1_res = tf.while_loop(cond=c, body=b, loop_vars=(j, start, y1)) j = tf.constant(0) # batch号 y2_res = tf.while_loop(cond=c, body=b, loop_vars=(j, end, y2)) y1_i = y1_res[-1].stack() y2_i = y2_res[-1].stack() y1_b.write(i, y1_i) y2_b.write(i, y2_i) i = tf.add(i, 1) return i, y1_b, y2_b y1_b = tf.TensorArray(dtype=tf.float32, size=batch_size) # 存放batch中每条的结果 y2_b = tf.TensorArray(dtype=tf.float32, size=batch_size) # 存放batch中每条的结果 c = lambda x, y, z: tf.less(x, batch_size) # batch的循环 b = lambda x, y, z: to_one_hot(x, y, z) i = tf.constant(0) # batch号 res = tf.while_loop(cond=c, body=b, loop_vars=(i, y1_b, y2_b)) # 这个会先不循环,而后面用for则会循环 y1 = res[-2].stack() y2 = res[-1].stack() y = [tf.reshape(y1, [1, -1]), tf.reshape(y2, [1, -1])] # print 'y', y self.loss = 0.0 for t in range(2): self.loss += tf.reduce_sum( y[t] * tf.log(a[t]) + (1 - y[t]) * (1 - tf.log(a[t])), 1)
def GetLoss(self, y_true, y_pred): ''' 获取损失值 y_true:坐标还没归一化,[(batch_size, 13, 13, 3, 5+num_classes), (batch_size, 26, 26, 3, 5+num_classes), (batch_size, 52, 52, 3, 5+num_classes)] y_pred:[(batch_size, 13, 13, 3, 5+num_classes), (batch_size, 26, 26, 3, 5+num_classes), (batch_size, 52, 52, 3, 5+num_classes)] ''' print('loss_fun:', type(y_true), type(y_pred)) layers_size = [[13, 13], [26, 26], [52, 52]] anchors_wh = [ [[116, 90], [156, 198], [373, 326]], [[30, 61], [62, 45], [59, 119]], [[10, 13], [16, 30], [33, 23]], ] classes_num = 80 train_iou_thresh = 0.5 image_size = tf.constant((416, 416), dtype=tf.float32) # (layers_num, anchors_num, 2) anchors_wh = tf.constant(anchors_wh, dtype=tf.float32) # anchors_wh = anchors_wh / image_size anchors_num = tf.shape(anchors_wh)[1] layers_size = tf.constant(layers_size, dtype=tf.int32) layers_num = tf.shape(layers_size)[0] classes_num = tf.constant(classes_num, dtype=tf.int32) batch_size = tf.shape(y_true[0])[0] batch_size_float = tf.cast(batch_size, dtype=tf.float32) loss = 0.0 layer_index = 0 for layer_index in range(3): y_true_read = y_true[layer_index] y_pred_raw = y_pred[layer_index] y_pred_raw = tf.reshape(y_pred_raw, tf.shape(y_true_read)) # 特征网格对应实际图片的坐标 grid_shape = tf.shape(y_pred_raw)[1:3] # height, width grid_y = tf.range(0, grid_shape[0], dtype=tf.float32) grid_x = tf.range(0, grid_shape[1], dtype=tf.float32) grid_x, grid_y = tf.meshgrid(grid_x, grid_y) grid_x = tf.reshape(grid_x, (grid_shape[0], grid_shape[1], 1, 1)) grid_y = tf.reshape(grid_y, (grid_shape[0], grid_shape[1], 1, 1)) grid_xy = tf.concat([grid_x, grid_y], axis=-1) # 计算真实坐标与相对坐标 # y_true y_true_object = y_true_read[..., 4:5] y_true_classes = y_true_read[..., 5:] y_true_read_xy = y_true_read[..., 0:2] # tf.print('grid_xy:', tf.math.reduce_max(grid_xy), tf.math.reduce_min(grid_xy)) # tf.print('grid_shape:', grid_shape[::-1]) y_true_raw_xy = y_true_read_xy * tf.cast( grid_shape[::-1], dtype=tf.float32) - grid_xy # tf.print('y_true_raw_xy:', tf.math.reduce_max(y_true_raw_xy), tf.math.reduce_min(y_true_raw_xy)) # tf.print('y_true_object:', tf.math.reduce_max(y_true_object), tf.math.reduce_min(y_true_object)) # y_true_raw_xy = y_true_object * y_true_raw_xy # tf.print('y_true_raw_xy:', tf.math.reduce_max(y_true_raw_xy), tf.math.reduce_min(y_true_raw_xy)) y_true_read_wh = y_true_read[..., 2:4] y_true_raw_wh = tf.math.log(y_true_read_wh * image_size[::-1] / anchors_wh[layer_index, ...]) y_true_raw_wh = tf.where(tf.cast(y_true_object, dtype=tf.bool), y_true_raw_wh, tf.zeros_like(y_true_raw_wh)) # tf.print('y_true_raw_wh:', tf.math.reduce_max(y_true_raw_wh), tf.math.reduce_min(y_true_raw_wh)) # y_pred y_pred_object = y_pred_raw[..., 4:5] y_pred_classes = y_pred_raw[..., 5:] y_pred_raw_xy = y_pred_raw[..., 0:2] # tf.print('y_pred_raw_xy:', tf.math.reduce_max(y_pred_raw_xy), tf.math.reduce_min(y_pred_raw_xy)) y_pred_read_xy = (tf.math.sigmoid(y_pred_raw_xy) + grid_xy) / tf.cast(grid_shape[::-1], dtype=tf.float32) y_pred_raw_wh = y_pred_raw[..., 2:4] # tf.print('y_pred_raw_wh:', tf.math.reduce_max(y_pred_raw_wh), tf.math.reduce_min(y_pred_raw_wh)) y_pred_read_wh = tf.math.exp(y_pred_raw_wh) * anchors_wh[ layer_index, ...] / image_size[::-1] # y_pred_read_wh = tf.where(tf.math.is_inf(y_pred_read_wh), tf.zeros_like(y_pred_read_wh), y_pred_read_wh) # y_pred_object = tf.math.sigmoid(y_pred_object) # y_pred_classes = tf.math.sigmoid(y_pred_classes) # 框坐标(batch_size, h, w, anchors_num, (x1, y1, x2, y2)) y_true_read_wh_half = y_true_read_wh / 2 y_true_read_mins = y_true_read_xy - y_true_read_wh_half y_true_read_maxes = y_true_read_xy + y_true_read_wh_half y_true_boxes = tf.concat([y_true_read_mins, y_true_read_maxes], axis=-1) y_pred_read_wh_half = y_pred_read_wh / 2 y_pred_read_mins = y_pred_read_xy - y_pred_read_wh_half y_pred_read_maxes = y_pred_read_xy + y_pred_read_wh_half y_pred_boxes = tf.concat([y_pred_read_mins, y_pred_read_maxes], axis=-1) ignore_mask = tf.TensorArray(tf.float32, size=1, dynamic_size=True) def foreach_batch(batch_index, ignore_mask): y_true_boxes_one = y_true_boxes[batch_index, ...] y_pred_boxes_one = y_pred_boxes[batch_index, ...] y_true_object_one = y_true_object[batch_index, ...] y_true_boxes_tmp = tf.boolean_mask( y_true_boxes_one, tf.cast(y_true_object_one[..., 0], dtype=tf.bool)) # 计算IOU # (boxes_num, 4) => (1, boxes_num, 4) y_true_boxes_tmp = tf.expand_dims(y_true_boxes_tmp, axis=0) y_pred_boxes_tmp = y_pred_boxes_one # (h, w, anchors_num, 4) => (h, w, anchors_num, 1, 4) y_pred_boxes_tmp = tf.expand_dims(y_pred_boxes_tmp, axis=-2) # (h, w, anchors_num, boxes_num) iou = GetIOU(y_pred_boxes_tmp, y_true_boxes_tmp, 'iou') # (h, w, anchors_num) best_iou = tf.math.reduce_max(iou, axis=-1) # 把IOU<0.5的认为是背景 ignore_mask = ignore_mask.write( batch_index, tf.cast(best_iou < train_iou_thresh, dtype=tf.float32)) return batch_index + 1, ignore_mask # (batch_size, h, w, anchors_num, y_true_boxes_num) _, ignore_mask = tf.while_loop(lambda b, *args: b < batch_size, foreach_batch, [0, ignore_mask]) ignore_mask = ignore_mask.stack() # (batch_size, h, w, anchors_num) ignore_mask = tf.expand_dims(ignore_mask, axis=-1) # ignore_mask = tf.where(tf.math.is_nan(ignore_mask), tf.zeros_like(ignore_mask), ignore_mask) # tf.print('ignore_mask:', tf.math.reduce_max(ignore_mask), tf.math.reduce_min(ignore_mask)) # 计算loss boxes_loss_scale = 2 - y_true_read_wh[..., 0:1] * y_true_read_wh[ ..., 1:2] # tf.print('boxes_loss_scale:', tf.math.reduce_max(boxes_loss_scale), tf.math.reduce_min(boxes_loss_scale)) xy_loss_bc = tf.keras.losses.binary_crossentropy( tf.expand_dims(y_true_raw_xy, axis=-1), tf.expand_dims(y_pred_raw_xy, axis=-1), from_logits=True) xy_loss = y_true_object * boxes_loss_scale * xy_loss_bc wh_loss = y_true_object * boxes_loss_scale * 0.5 * tf.math.square( y_true_raw_wh - y_pred_raw_wh) object_loss_bc = tf.keras.losses.binary_crossentropy( tf.expand_dims(y_true_object, axis=-1), tf.expand_dims(y_pred_object, axis=-1), from_logits=True) # tf.print('object_loss_bc:', tf.math.reduce_max(object_loss_bc), tf.math.reduce_min(object_loss_bc)) object_loss = y_true_object * object_loss_bc + ( 1 - y_true_object) * object_loss_bc * ignore_mask classes_loss_bc = tf.keras.losses.binary_crossentropy( tf.expand_dims(y_true_classes, axis=-1), tf.expand_dims(y_pred_classes, axis=-1), from_logits=True) # tf.print('classes_loss_bc:', tf.math.reduce_max(classes_loss_bc), tf.math.reduce_min(classes_loss_bc)) classes_loss = y_true_object * classes_loss_bc xy_loss = tf.math.reduce_sum(xy_loss) / batch_size_float wh_loss = tf.math.reduce_sum(wh_loss) / batch_size_float object_loss = tf.math.reduce_sum(object_loss) / batch_size_float classes_loss = tf.math.reduce_sum(classes_loss) / batch_size_float # tf.print('loss:', xy_loss, wh_loss, object_loss, classes_loss) loss += xy_loss + wh_loss + object_loss + classes_loss # tf.print('loss:', loss) return loss
def build_graph(self): self._define_embedding() # hacky but w/e try: self.embed_dim = self.embed_dim.value except: pass # The inputs self.inputs = tf.placeholder(tf.int32, [None, self.max_length]) self.is_leaf = tf.placeholder(tf.bool, [None, self.max_length]) self.left_children = tf.placeholder(tf.int32, [None, self.max_length]) self.right_children = tf.placeholder(tf.int32, [None, self.max_length]) self.is_node = tf.placeholder(tf.bool, [None, self.max_length]) self.input_lens = tf.placeholder(tf.int32, [None]) if self.use_phrases: output_shape = [None, self.max_length, self.output_dim] else: output_shape = [None, self.output_dim] self.outputs = tf.placeholder(tf.float32, shape=output_shape) self.feats = tf.nn.embedding_lookup(self.embedding, self.inputs) # Need to do lift # H = tanh(W_lift*c + b_lift) # First, we define W_lift. This is actually a 3-D tensor, since it # lifts our input vectors into a sqrt(d)-by-sqrt(d) matrix. # Initialize with Xavier initialization, then shape into 3D. self.W_lift = tf.reshape( self.weight_init(self.embed_dim, int(self.hidden_dim**2), 'W_lift'), [self.embed_dim, self.hidden_dim, self.hidden_dim]) self.b_lift = tf.reshape( self.bias_init(int(self.hidden_dim**2), 'b_lift'), [self.hidden_dim, self.hidden_dim]) self.lifted_feats = tf.nn.tanh( tf.tensordot(self.feats, self.W_lift, [[2], [0]]) / 100 + self.b_lift) # 224D self.is_leaf_t = tf.transpose(self.is_leaf) self.left_children_t = tf.transpose(self.left_children) self.right_children_t = tf.transpose(self.right_children) self.lifted_feats_t = tf.transpose(self.lifted_feats, [1, 0, 2, 3]) # For node combination self.W_lstm = self.weight_init(2 * self.hidden_dim_v, 4 * self.hidden_dim_v, 'W_lstm') self.b_lstm = self.bias_init(4 * self.hidden_dim_v, 'b_lstm') self.W_comb = self.weight_init(self.hidden_dim, self.hidden_dim, 'W_comb') self.b_comb = self.weight_init(self.hidden_dim, self.hidden_dim, 'b_comb') #self.b_comb2 = self.weight_init(self.hidden_dim, self.hidden_dim, 'b_comb2') # maybe xavier init here x = np.sqrt(6.0 / self.hidden_dim_v) #self.c_init = tf.Variable(tf.random_uniform(tf.shape(self.lifted_feats_t[0]), minval=-x, maxval=x), name="c_init") node_tensors = tf.TensorArray( tf.float32, size=self.max_length, #element_shape=(2, self.inputs.shape[0], self.hidden_dim, self.hidden_dim), dynamic_size=True, clear_after_read=False, infer_shape=True) # So TF doesn't complain. We're not going to use this value. #node_tensors = node_tensors.write(0, [self.lifted_feats_t[0], self.lifted_feats_t[0]]) #x = node_tensors.gather([0]) # From 224D github # Loop through the tensors, combining them def loop_body(node_tensors, i): node_is_leaf = tf.gather(self.is_leaf_t, i) left_child = tf.gather(self.left_children_t, i) right_child = tf.gather(self.right_children_t, i) leaf_tensor = tf.stack([ tf.zeros_like(self.lifted_feats_t[0]), tf.gather(self.lifted_feats_t, i) ], axis=1) # batchy # keep track of [c, H] node_tensor = tf.where( node_is_leaf, leaf_tensor, # the things i do for batching tf.cond( tf.equal(i, 0), lambda: leaf_tensor, lambda: self.combine_children( node_tensors.gather(left_child), node_tensors.gather(right_child)))) node_tensors = node_tensors.write(i, node_tensor) i = tf.add(i, 1) return node_tensors, i # while less than #nodes loop_cond = lambda node_tensors, i: \ tf.less(i, tf.reduce_max(self.input_lens)) # loop thru node_tensors, _ = tf.while_loop(loop_cond, loop_body, [node_tensors, 0], parallel_iterations=1) # Get the last [C, H], and retrieve H from that. last_pair = node_tensors.gather(self.input_lens - 1) last_H = tf.reshape(self.get_last_val(last_pair), [-1, self.hidden_dim_v]) # allow for inheritance hidden_vals = tf.reshape(node_tensors.stack()[:, :, 1], [self.max_length, -1, self.hidden_dim_v]) self.W_hy = self.weight_init(self.hidden_dim_v, self.output_dim, 'W_hy') self.b_y = self.bias_init(self.output_dim, 'b_y') tiled_W_hy = tf.reshape( tf.tile(self.W_hy, [self.max_length, 1]), [self.max_length, self.hidden_dim_v, self.output_dim]) self.model = tf.transpose( tf.matmul(hidden_vals, tiled_W_hy) + self.b_y, [1, 0, 2]) self.last = tf.matmul(last_H, self.W_hy) + self.b_y self.node_tensors = node_tensors
def __init__(self, inp, inp_mask, seq2seq_gtruth, post_gtruth, hyper_params=None, training=True, name='Tacotron', reuse=False): """ Build the computational graph. :param inp: :param inp_mask: :param seq2seq_gtruth: :param post_gtruth: :param hyper_params: :param training: :param name: """ super(Tacotron, self).__init__(name) self.hyper_params = HyperParams( ) if hyper_params is None else hyper_params with tf.variable_scope(name, reuse=reuse): self.global_step = tf.Variable(0, name='global_step', trainable=False) self.learning_rate = tf.Variable( self.hyper_params.learning_rate[0], name='learning_rate', trainable=False, dtype=tf.float32) batch_size = tf.shape(inp)[0] input_time_steps = tf.shape(inp)[1] output_time_steps = tf.shape(seq2seq_gtruth)[1] ### Encoder [begin] with tf.variable_scope('character_embedding'): embed_inp = EmbeddingLayer(self.hyper_params.embed_class, self.hyper_params.embed_dim)(inp) with tf.variable_scope("changeToVarible"): self.single_style_token = tf.get_variable( 'style_token', (1, self.hyper_params.styles_kind, self.hyper_params.style_dim), dtype=tf.float32) self.style_token = tf.tile(self.single_style_token, (batch_size, 1, 1)) with tf.variable_scope('encoder_pre_net'): pre_ed_inp = tf.layers.dropout(tf.layers.dense( embed_inp, 256, tf.nn.relu), training=training) pre_ed_inp = tf.layers.dropout(tf.layers.dense( pre_ed_inp, 128, tf.nn.relu), training=training) encoder_output = modules.cbhg(pre_ed_inp, training=training, k=16, bank_filters=128, projection_filters=(128, 128), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=inp_mask, name='encoder_cbhg', reuse=False) with tf.variable_scope('post_text'): all_outputs, _ = tf.nn.dynamic_rnn( cell=GRUCell(256), inputs=encoder_output, sequence_length=inp_mask, dtype=encoder_output.dtype, parallel_iterations=unkonwn_parallel_iterations) all_outputs = tf.transpose(all_outputs, [1, 0, 2]) static_encoder_output = all_outputs[-1] ### Encoder [end] ### Attention Module with tf.variable_scope('attention'): att_module = AttentionModule(256, encoder_output, sequence_length=inp_mask, time_major=False) with tf.variable_scope("attention_style"): att_module_style = AttentionModule(256, self.style_token, time_major=False) ### Decoder [begin] att_cell = GRUCell(256) dec_cell = MultiRNNCell( [ResidualWrapper(GRUCell(256)) for _ in range(2)]) # prepare output alpha TensorArray with tf.variable_scope('prepare_decode'): reduc = self.hyper_params.reduction_rate reduced_time_steps = tf.div(output_time_steps, reduc) init_att_cell_state = att_cell.zero_state( batch_size, tf.float32) init_dec_cell_state = dec_cell.zero_state( batch_size, tf.float32) init_state_tup = tuple( [init_att_cell_state, init_dec_cell_state]) init_output_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_alpha_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_weight_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_weight_per_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_alpha_style_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) time_major_seq2seq_gtruth = tf.transpose(seq2seq_gtruth, perm=(1, 0, 2)) indic_array = tf.concat([ tf.zeros([ reduc, batch_size, self.hyper_params.seq2seq_dim ]), time_major_seq2seq_gtruth ], axis=0) init_context = tf.zeros([batch_size, 256], dtype=tf.float32) init_context_style = tf.zeros([batch_size, 256], dtype=tf.float32) init_time = tf.constant(0, dtype=tf.int32) cond = lambda x, *_: tf.less(x, reduced_time_steps) def body(this_time, old_context, old_context_style, old_output_ta, old_alpha_ta, old_alpha_style_ta, old_weight_ta, old_weight_per_ta, old_state_tup): with tf.variable_scope('decoder_pre_net'): dec_pre_ed_inp = indic_array[reduc * this_time + reduc - 1] dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense( dec_pre_ed_inp, 256, tf.nn.relu), training=training) dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense( dec_pre_ed_inp, 128, tf.nn.relu), training=training) with tf.variable_scope('attention_rnn'): att_cell_inp = tf.concat([old_context, dec_pre_ed_inp], axis=-1) att_cell_out, att_cell_state = att_cell( att_cell_inp, old_state_tup[0]) with tf.variable_scope('attention'): query = att_cell_state[0] context, alpha = att_module(query) new_alpha_ta = old_alpha_ta.write(this_time, alpha) with tf.variable_scope("attention_style"): query_style = att_cell_state[0] context_style, alpha_style = att_module_style(query_style) new_alpha_style_ta = old_alpha_style_ta.write( this_time, alpha_style) with tf.variable_scope("weighting"): # weight_dec_pre_ed_inp = tf.expand_dims(tf.layers.dense(dec_pre_ed_inp, 256, tf.nn.sigmoid), axis=1) weight_input = tf.concat( [static_encoder_output, dec_pre_ed_inp], axis=-1) weighting = tf.layers.dense(weight_input, 128, tf.nn.sigmoid) weighting = tf.layers.dense(weighting, 2, tf.nn.softmax) # weighting = tf.nn.softmax(weighting) weight_text, weight_style = tf.split(weighting, [1, 1], -1) # weighting = tf.nn.softmax(weighting) new_weight_ta = old_weight_ta.write(this_time, weight_text) with tf.variable_scope('decoder_rnn'): weighting_context = weight_text * context + weight_style * context_style weight_per = tf.reduce_mean( tf.abs(weight_style * context_style) / (tf.abs(weight_text * context) + tf.abs(weight_style * context_style))) new_weight_per_ta = old_weight_per_ta.write( this_time, weight_per) dec_input = tf.layers.dense( tf.concat([att_cell_out, weighting_context], axis=-1), 256) dec_cell_out, dec_cell_state = dec_cell( dec_input, old_state_tup[1]) dense_out = tf.layers.dense( dec_cell_out, self.hyper_params.seq2seq_dim * reduc) new_output_ta = old_output_ta.write(this_time, dense_out) new_state_tup = tuple([att_cell_state, dec_cell_state]) return tf.add( this_time, 1 ), context, context_style, new_output_ta, new_alpha_ta, new_alpha_style_ta, new_weight_ta, new_weight_per_ta, new_state_tup # run loop _, _, _, seq2seq_output_ta, alpha_ta, alpha_style_ta, weight_ta, weight_per_ta, *_ = tf.while_loop( cond, body, [ init_time, init_context, init_context_style, init_output_ta, init_alpha_ta, init_alpha_style_ta, init_weight_ta, init_weight_per_ta, init_state_tup ], parallel_iterations=unkonwn_parallel_iterations) with tf.variable_scope('reshape_decode'): seq2seq_output = tf.reshape( seq2seq_output_ta.stack(), shape=(reduced_time_steps, batch_size, self.hyper_params.seq2seq_dim * reduc)) seq2seq_output = tf.reshape( tf.transpose(seq2seq_output, perm=(1, 0, 2)), shape=(batch_size, output_time_steps, self.hyper_params.seq2seq_dim)) self.seq2seq_output = seq2seq_output alpha_output = tf.reshape(alpha_ta.stack(), shape=(reduced_time_steps, batch_size, input_time_steps)) alpha_output = tf.expand_dims( tf.transpose(alpha_output, perm=(1, 0, 2)), -1) self.alpha_output = alpha_output alpha_output_style = tf.reshape( alpha_style_ta.stack(), shape=(reduced_time_steps, batch_size, self.hyper_params.styles_kind)) alpha_output_style = tf.expand_dims( tf.transpose(alpha_output_style, perm=(1, 0, 2)), -1) # batch major self.alpha_output_style = alpha_output_style weight_ta = tf.reshape(weight_ta.stack(), shape=(reduced_time_steps, batch_size, 1)) weight_ta = tf.transpose(weight_ta, perm=(1, 0, 2)) self.weight_ta = weight_ta weight_per_ta = tf.reshape(weight_per_ta.stack(), shape=(reduced_time_steps, 1)) self.weight_per_ta = weight_per_ta ### Decoder [end] ### PostNet [begin] post_output = modules.cbhg( seq2seq_output, training=training, k=8, bank_filters=128, projection_filters=(256, self.hyper_params.seq2seq_dim), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=None, name='decoder_cbhg', reuse=False) post_output = tf.layers.dense(post_output, self.hyper_params.post_dim, name='post_linear_transform') self.post_output = post_output ### PostNet [end] ### Loss with tf.variable_scope('loss'): self.seq2seq_loss = l1_loss(seq2seq_gtruth, seq2seq_output) self.post_loss = l1_loss(post_gtruth, post_output) self.loss = self.seq2seq_loss + self.post_loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor num_layers:层的数量,是anchors数量的3分之1; args:前3个是yolo_outputs预测值,后3个是y_true真值; anchor_mask:anchor box的索引数组,3个1组倒序排序,678对应13x13,345对应26x26,123对应52x52; 即[[6, 7, 8], [3, 4, 5], [0, 1, 2]]; input_shape:K.shape(yolo_outputs[0])[1:3],第1个预测矩阵yolo_outputs[0]的结构(shape)的第1~2位, 即(?, 13, 13, 18)中的(13, 13)。再x32,就是YOLO网络的输入尺寸, 即(416, 416),因为在网络中,含有5个步长为(2, 2)的卷积操作,降维32=5^2倍; grid_shapes:与input_shape类似,K.shape(yolo_outputs[l])[1:3],以列表的形式,选择3个尺寸的预测图维度, 即[(13, 13), (26, 26), (52, 52)]; m:第1个预测图的结构的第1位,即K.shape(yolo_outputs[0])[0],输入模型的图片总量,即批次数; mf:m的float类型,即K.cast(m, K.dtype(yolo_outputs[0])) loss:损失值为0; ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4], [1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) #修改之处1 grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def yolo_loss(inputs, num_anchors): ignore_thresh = .5 # Порог вероятности обнаружения объекта num_layers = num_anchors // 3 # Подсчитываем количество анкоров на каждом уровне сетки y_pred = inputs[:num_layers] # Из входных данных выцепляем посчитанные моделью значения y_true = inputs[num_layers:] # Из входных данных выцепляем эталонные значения anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # Задаем маску анкоров для каждого уровня сеток # Получаем размерность входного изображения ( (13 х 13) * 32 = (416 х 416)) и приводим к типу элемента y_true[0] input_shape = K.cast(K.shape(y_pred[0])[1:3] * 32, K.dtype(y_true[0])) # Получаем двумерный массив, соответствующий размерностям сеток ((13, 13), (26, 26), (52, 52)) grid_shapes = [K.cast(K.shape(y_pred[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 # Значение ошибки # Считываем количество элементов m = K.shape(y_pred[0])[0] # Размер пакета batch_size = K.cast(m, K.dtype(y_pred[0])) # Преобразуем к типу y_pred[0] for l in range(num_layers): # Пробегаем по всем трем уровням сеток # Получаем маску для сетки l-го уровня по вероятности определения объекта (5-ый параметр в списке общих параметров). # В массиве object_mask будут значения, которые соответствуют только вероятности обнаружения объекта object_mask = y_true[l][..., 4:5] # Вернется набор данных вида ([0][0][0][0]...[1]...[0]) # Получаем аналогичную выборку для сетки l-го уровня с OHE (где записана позиция нашего класса) # В массиве true_class будут значения, которые соответствуют только OHE представлению класса для данного уровня анкоров true_class = y_true[l][..., 5:] # Вернется набор данных вида ([0][0][0][0]...[1]...[0]) num_sub_anchors = len(anchors[anchor_mask[l]]) # Получаем количество анкоров для отдельного уровян сетки (3) # Решейпим анкоры отдельного уровня сетки и записываем в переменную anchors_tensor anchors_tensor = K.reshape(K.constant(anchors[anchor_mask[l]]), [1, 1, 1, num_sub_anchors, 2]) # Создаем двумерный массив grid со значениями [[[0, 0] , [0, 1] , [0, 2] , ... , [0, k]], # [[1, 0] , [1, 1] , [1, 2] , ... , [1 ,k]], # ... # [[k, 0] , [k, 1] , [k, 2] , ... , [k, k]]] # где k - размерность сетки. Массив хранит индексы ячеек сетки grid_shape = K.shape(y_pred[l])[1:3] # Получаем ширину и высоту сетки grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),[1, grid_shape[1], 1, 1]) # Создаем вертикальную линию grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),[grid_shape[0], 1, 1, 1]) # Создаем горизонтальную линию grid = K.concatenate([grid_x, grid_y]) # Объединяем grid = K.cast(grid, K.dtype(y_pred[l])) # Приводим к типу y_pred[l] # Решейпим y_pred[l] feats = K.reshape(y_pred[l], [-1, grid_shape[0], grid_shape[1], num_sub_anchors, num_classes + 5]) # Считаем ошибку в определении координат центра объекта # Получаем координаты центра объекта из спредиктенного значения pred_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) # Производим обратные вычисления для оригинальных значений из y_true для координат центра объекта true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid # Реальные координаты центра bounding_box box_loss_scale = 2 - y_true[l][...,2:3] * y_true[l][...,3:4] # чем больше бокс, тем меньше ошибка # binary_crossentropy для истинного значения и спредиктенного (obect_mask для подсчета только требуемого значения) xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(true_xy, feats[...,0:2], from_logits=True) # Считаем ошибку в определении координат ширины и высоты # Получаем значения ширины и высоты изображения из спредиктенного значения pred_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) # Производим обратные вычисления для оригинальных значений из y_true для ширины и высоты объекта true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # Оставляем значение высоты и ширины только у тех элементов, где object_mask = 1 true_wh = K.switch(object_mask, true_wh, K.zeros_like(true_wh)) # Считаем значение ошибки в определении высоты и ширины wh_loss = object_mask * box_loss_scale * 0.5 * K.square(true_wh-feats[...,2:4]) # Объединяем значения в один массив pred_box = K.concatenate([pred_xy, pred_wh]) # Считаем ошибку в определении обнаружения какого-либо класса # Для этого вначале надо отсечь все найденные объекты, вероятность которых меньше установленного значения ignore_thresh # Определяем массив, который будет хранить данные о неподходящих значениях ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # Приводим тип object_mask к типу 'bool' # Функция, определяющая данные, которые требуется игнорировать # Пробегаем по всем элементам пакета (b<m) # Получаем параметры реального bounding_box для текущей ячейки # Считаем IoU реального и спредиктенного # В зависимости от best_iou < ignore_thresh помечаем его как верно распознанный или неверено def loop_body( b, ignore_mask ): # в true_box запишутся первые 4 параметра (центр, высота и ширина объекта) того элемента, значение которого в object_mask_bool равно True true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) # Подсчитываем iou для спредиктенной ограничивающей рамки (pred_box) и оригинальной (true_box) iou = calc_iou(pred_box[b], true_box) # Находим лучшую ограничивающую рамку best_iou = K.max(iou, axis=-1) # Записываем в ignore_mask true или false в зависимости от (best_iou < ignore_thresh) ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask # Увеличиваем счетчик на единицу и возвращаем ignore_mask # Пробегаем в цикле по всем элементам в пределах значения m (m = batch size) _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() # Приводим ignore_mask к тензору ignore_mask = K.expand_dims(ignore_mask, -1) # Добавляем еще одну размерность в конце ignore_mask # Считаем значение ошибки # 1 компонент - для значений, которые были верно спредиктены # 2 компонент - для значения, которые были неверно спредиктены confidence_loss = ( object_mask * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) + (1-object_mask) * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) * ignore_mask ) # Считаем ошибку в определении класса объекта class_loss = object_mask * K.binary_crossentropy(true_class, feats[...,5:], from_logits=True) # Считаем суммарную ошибку xy_loss = K.sum(xy_loss) / batch_size wh_loss = K.sum(wh_loss) / batch_size confidence_loss = K.sum(confidence_loss) / batch_size class_loss = K.sum(class_loss) / batch_size loss += xy_loss + wh_loss + confidence_loss + class_loss return loss # Возвращаем значение ошибки
def create_output_ta(spec): return tf.TensorArray(spec.dtype, size=sequence_length, element_shape=(tf.TensorShape([ static_batch_size ]).concatenate(spec.shape)))
def __init__(self, inp, inp_mask, inp_att, decode_time_steps, hyper_params=None, name='Tacotron'): """ Build the computational graph. :param inp: :param inp_mask: :param decode_time_steps: :param hyper_params: :param name: """ super(Tacotron, self).__init__(name) self.hyper_params = HyperParams( ) if hyper_params is None else hyper_params with tf.variable_scope(name): self.global_step = tf.Variable(0, name='global_step', trainable=False) batch_size = tf.shape(inp)[0] input_time_steps = tf.shape(inp)[1] reduc = self.hyper_params.reduction_rate output_time_steps = decode_time_steps * reduc ### Encoder [begin] with tf.variable_scope('character_embedding'): embed_inp = EmbeddingLayer(self.hyper_params.embed_class, self.hyper_params.embed_dim)(inp) with tf.variable_scope("changeToVarible"): self.single_style_token = tf.get_variable( 'style_token', (1, self.hyper_params.styles_kind, self.hyper_params.style_dim), dtype=tf.float32) self.style_token = tf.tile(self.single_style_token, (batch_size, 1, 1)) with tf.variable_scope('encoder_pre_net'): pre_ed_inp = tf.layers.dropout(tf.layers.dense( embed_inp, 256, tf.nn.relu), training=False) pre_ed_inp = tf.layers.dropout(tf.layers.dense( pre_ed_inp, 128, tf.nn.relu), training=False) encoder_output = modules.cbhg(pre_ed_inp, training=False, k=16, bank_filters=128, projection_filters=(128, 128), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=inp_mask, name='encoder_cbhg', reuse=False) inp_att = tf.Print(inp_att, [inp_att], message='inp_att', summarize=10) sentence_style = tf.reduce_sum(tf.expand_dims(inp_att, axis=-1) * self.style_token, axis=1) sentence_style = tf.Print(sentence_style, [sentence_style], message='style', summarize=10) # with tf.variable_scope('post_text'): # all_outputs, _ = tf.nn.dynamic_rnn(cell=GRUCell(256), inputs=encoder_output, sequence_length=inp_mask, # dtype=encoder_output.dtype, parallel_iterations=unkonwn_parallel_iterations) # all_outputs = tf.transpose(all_outputs, [1, 0, 2]) # static_encoder_output = all_outputs[-1] # ### Encoder [end] # # sentence_style_att = tf.layers.dense(static_encoder_output, 256, tf.nn.relu) # sentence_style_att = tf.layers.dense(sentence_style_att, 64, tf.nn.relu) # sentence_style = tf.layers.dense(sentence_style_att, 10, tf.nn.softmax) # # sentence_style = tf.cond(tf.equal(ctr_flag, 1), lambda: ctr_attention, lambda: sentence_style) # sentence_style = tf.Print(sentence_style, [sentence_style], message='att', summarize=10) # sentence_style = tf.reduce_sum(tf.expand_dims(sentence_style, axis=-1) * self.style_token, axis=1) # sentence_style = tf.Print(sentence_style, [sentence_style], message='style', summarize=10) # sentence_style = tf.cond(tf.equal(ctr_flag, 1), # lambda: tf.reduce_sum(tf.expand_dims(sentence_style, axis=-1) * self.style_token, # axis=1), # lambda: sentence_style) ### Attention Module with tf.variable_scope('attention'): att_module = AttentionModule(256, encoder_output, sequence_length=inp_mask, time_major=False) ### Decoder [begin] att_cell = GRUCell(256) dec_cell = MultiRNNCell( [ResidualWrapper(GRUCell(256)) for _ in range(2)]) # prepare output alpha TensorArray with tf.variable_scope('prepare_decode'): # prepare output alpha TensorArray reduced_time_steps = tf.div(output_time_steps, reduc) init_att_cell_state = att_cell.zero_state( batch_size, tf.float32) init_dec_cell_state = dec_cell.zero_state( batch_size, tf.float32) init_state_tup = tuple( [init_att_cell_state, init_dec_cell_state]) init_output_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_alpha_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) init_weight_per_ta = tf.TensorArray(size=reduced_time_steps, dtype=tf.float32) go_array = tf.zeros( [batch_size, self.hyper_params.seq2seq_dim], dtype=tf.float32) init_context = tf.zeros([batch_size, 256], dtype=tf.float32) init_time = tf.constant(0, dtype=tf.int32) cond = lambda x, *_: tf.less(x, reduced_time_steps) def body(this_time, old_output_ta, old_alpha_ta, old_weight_per_ta, old_state_tup, last_context, last_output): with tf.variable_scope('decoder_pre_net'): dec_pre_ed_inp = last_output dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense( dec_pre_ed_inp, 256, tf.nn.relu), training=False) dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense( dec_pre_ed_inp, 128, tf.nn.relu), training=False) with tf.variable_scope('attention_rnn'): # dec_pre_ed_inp = tf.Print(dec_pre_ed_inp, [dec_pre_ed_inp[0]], message='dec', summarize=10) att_cell_inp = tf.concat([last_context, dec_pre_ed_inp], axis=-1) att_cell_out, att_cell_state = att_cell( att_cell_inp, old_state_tup[0]) with tf.variable_scope('attention'): query = att_cell_state[0] context, alpha = att_module(query) new_alpha_ta = old_alpha_ta.write(this_time, alpha) with tf.variable_scope('decoder_rnn'): weighting_context = context + sentence_style weight_per = tf.reduce_mean( tf.abs(sentence_style) / (tf.abs(context) + tf.abs(sentence_style))) new_weight_per_ta = old_weight_per_ta.write( this_time, weight_per) dec_input = tf.layers.dense( tf.concat([att_cell_out, weighting_context], axis=-1), 256) # dec_input = tf.layers.dense(tf.concat([att_cell_out, context], axis=-1), 256) dec_cell_out, dec_cell_state = dec_cell( dec_input, old_state_tup[1]) dense_out = tf.layers.dense( dec_cell_out, self.hyper_params.seq2seq_dim * reduc) new_output_ta = old_output_ta.write(this_time, dense_out) new_output = dense_out[:, -self.hyper_params.seq2seq_dim:] new_state_tup = tuple([att_cell_state, dec_cell_state]) return tf.add(this_time, 1), new_output_ta, new_alpha_ta, \ new_weight_per_ta, new_state_tup, context, new_output # run loop _, seq2seq_output_ta, alpha_ta, weight_per_ta, *_ = tf.while_loop( cond, body, [ init_time, init_output_ta, init_alpha_ta, init_weight_per_ta, init_state_tup, init_context, go_array ]) with tf.variable_scope('reshape_decode'): seq2seq_output = tf.reshape( seq2seq_output_ta.stack(), shape=(reduced_time_steps, batch_size, self.hyper_params.seq2seq_dim * reduc)) seq2seq_output = tf.reshape( tf.transpose(seq2seq_output, perm=(1, 0, 2)), shape=(batch_size, output_time_steps, self.hyper_params.seq2seq_dim)) self.seq2seq_output = seq2seq_output # alpha_output = tf.reshape(alpha_ta.stack(), # shape=(reduced_time_steps, batch_size, input_time_steps)) # alpha_output = tf.expand_dims(tf.transpose(alpha_output, perm=(1, 0, 2)), -1) # self.alpha_output = alpha_output # # alpha_output_style = tf.reshape(alpha_style_ta.stack(), # shape=(reduced_time_steps, batch_size, self.hyper_params.styles_kind)) # alpha_output_style = tf.expand_dims(tf.transpose(alpha_output_style, perm=(1, 0, 2)), -1) # batch major # self.alpha_output_style = alpha_output_style # # weight_ta = tf.reshape(weight_ta.stack(), shape=(reduced_time_steps, batch_size, 1)) # weight_ta = tf.transpose(weight_ta, perm=(1, 0, 2)) # self.weight_ta = weight_ta # # weight_per_ta = tf.reshape(weight_per_ta.stack(), shape=(reduced_time_steps, 1)) # self.weight_per_ta = weight_per_ta ### Decoder [end] ### PostNet [begin] post_output = modules.cbhg( seq2seq_output, training=False, k=8, bank_filters=128, projection_filters=(256, self.hyper_params.seq2seq_dim), highway_layers=4, highway_units=128, bi_gru_units=128, sequence_length=None, name='decoder_cbhg', reuse=False) post_output = tf.layers.dense(post_output, self.hyper_params.post_dim, name='post_linear_transform') self.post_output = post_output
def run(self, trajectory, policy_state=None): """Apply the policy to trajectory steps and store actions/info. If `self.time_major == True`, the tensors in `trajectory` are assumed to have shape `[time, batch, ...]`. Otherwise they are assumed to have shape `[batch, time, ...]`. Args: trajectory: The `Trajectory` to run against. If the replay class was created with `time_major=True`, then the tensors in trajectory must be shaped `[time, batch, ...]`. Otherwise they must be shaped `[batch, time, ...]`. policy_state: (optional) A nest Tensor with initial step policy state. Returns: output_actions: A nest of the actions that the policy took. If the replay class was created with `time_major=True`, then the tensors here will be shaped `[time, batch, ...]`. Otherwise they'll be shaped `[batch, time, ...]`. output_policy_info: A nest of the policy info that the policy emitted. If the replay class was created with `time_major=True`, then the tensors here will be shaped `[time, batch, ...]`. Otherwise they'll be shaped `[batch, time, ...]`. policy_state: A nest Tensor with final step policy state. Raises: TypeError: If `policy_state` structure doesn't match `self.policy.policy_state_spec`, or `trajectory` structure doesn't match `self.policy.trajectory_spec`. ValueError: If `policy_state` doesn't match `self.policy.policy_state_spec`, or `trajectory` structure doesn't match `self.policy.trajectory_spec`. ValueError: If `trajectory` lacks two outer dims. """ trajectory_spec = self._policy.trajectory_spec outer_dims = nest_utils.get_outer_shape(trajectory, trajectory_spec) if tf.compat.dimension_value(outer_dims.shape[0]) != 2: raise ValueError( "Expected two outer dimensions, but saw '{}' dimensions.\n" "Trajectory:\n{}.\nTrajectory spec from policy:\n{}.".format( tf.compat.dimension_value(outer_dims.shape[0]), trajectory, trajectory_spec)) if self._time_major: sequence_length = outer_dims[0] batch_size = outer_dims[1] static_batch_size = tf.compat.dimension_value( trajectory.discount.shape[1]) else: batch_size = outer_dims[0] sequence_length = outer_dims[1] static_batch_size = tf.compat.dimension_value( trajectory.discount.shape[0]) if policy_state is None: policy_state = self._policy.get_initial_state(batch_size) else: nest_utils.assert_same_structure(policy_state, self._policy.policy_state_spec) if not self._time_major: # Make trajectory time-major. trajectory = tf.nest.map_structure(common.transpose_batch_time, trajectory) trajectory_tas = tf.nest.map_structure( lambda t: tf.TensorArray(t.dtype, size=sequence_length).unstack(t), trajectory) def create_output_ta(spec): return tf.TensorArray(spec.dtype, size=sequence_length, element_shape=(tf.TensorShape([ static_batch_size ]).concatenate(spec.shape))) output_action_tas = tf.nest.map_structure(create_output_ta, trajectory_spec.action) output_policy_info_tas = tf.nest.map_structure( create_output_ta, trajectory_spec.policy_info) read0 = lambda ta: ta.read(0) zeros_like0 = lambda t: tf.zeros_like(t[0]) ones_like0 = lambda t: tf.ones_like(t[0]) time_step = ts.TimeStep( step_type=read0(trajectory_tas.step_type), reward=tf.nest.map_structure(zeros_like0, trajectory.reward), discount=ones_like0(trajectory.discount), observation=tf.nest.map_structure(read0, trajectory_tas.observation)) def process_step(time, time_step, policy_state, output_action_tas, output_policy_info_tas): """Take an action on the given step, and update output TensorArrays. Args: time: Step time. Describes which row to read from the trajectory TensorArrays and which location to write into in the output TensorArrays. time_step: Previous step's `TimeStep`. policy_state: Policy state tensor or nested structure of tensors. output_action_tas: Nest of `tf.TensorArray` containing new actions. output_policy_info_tas: Nest of `tf.TensorArray` containing new policy info. Returns: policy_state: The next policy state. next_output_action_tas: Updated `output_action_tas`. next_output_policy_info_tas: Updated `output_policy_info_tas`. """ action_step = self._policy.action(time_step, policy_state) policy_state = action_step.state write_ta = lambda ta, t: ta.write(time - 1, t) next_output_action_tas = tf.nest.map_structure( write_ta, output_action_tas, action_step.action) next_output_policy_info_tas = tf.nest.map_structure( write_ta, output_policy_info_tas, action_step.info) return (action_step.state, next_output_action_tas, next_output_policy_info_tas) def loop_body(time, time_step, policy_state, output_action_tas, output_policy_info_tas): """Runs a step in environment. While loop will call multiple times. Args: time: Step time. time_step: Previous step's `TimeStep`. policy_state: Policy state tensor or nested structure of tensors. output_action_tas: Updated nest of `tf.TensorArray`, the new actions. output_policy_info_tas: Updated nest of `tf.TensorArray`, the new policy info. Returns: loop_vars for next iteration of tf.while_loop. """ policy_state, next_output_action_tas, next_output_policy_info_tas = ( process_step(time, time_step, policy_state, output_action_tas, output_policy_info_tas)) ta_read = lambda ta: ta.read(time) ta_read_prev = lambda ta: ta.read(time - 1) time_step = ts.TimeStep( step_type=ta_read(trajectory_tas.step_type), observation=tf.nest.map_structure(ta_read, trajectory_tas.observation), reward=tf.nest.map_structure(ta_read_prev, trajectory_tas.reward), discount=ta_read_prev(trajectory_tas.discount)) return (time + 1, time_step, policy_state, next_output_action_tas, next_output_policy_info_tas) time = tf.constant(1) time, time_step, policy_state, output_action_tas, output_policy_info_tas = ( tf.while_loop(cond=lambda time, *_: time < sequence_length, body=loop_body, loop_vars=[ time, time_step, policy_state, output_action_tas, output_policy_info_tas ], back_prop=False, name="trajectory_replay_loop")) # Run the last time step last_policy_state, output_action_tas, output_policy_info_tas = ( process_step(time, time_step, policy_state, output_action_tas, output_policy_info_tas)) def stack_ta(ta): t = ta.stack() if not self._time_major: t = common.transpose_batch_time(t) return t stacked_output_actions = tf.nest.map_structure(stack_ta, output_action_tas) stacked_output_policy_info = tf.nest.map_structure( stack_ta, output_policy_info_tas) return (stacked_output_actions, stacked_output_policy_info, last_policy_state)
def _decoder(cell, labels, encoder_output, sequence_length, initial_state, dtype=None, scope=None): #inputs:shifted_tgt_inputs[?,?,50], #memory:encoder_output[?,?,512], #initial_state[?,512]: reduce_mean(encoder_output,axis=1) #encoder的全局特征 # Assume that the underlying cell is GRUCell-like batch = tf.shape(labels)[0] time_steps = tf.shape(labels)[1] dtype = dtype or labels.dtype output_size = cell.output_size #256 zero_output = tf.zeros([batch, output_size], dtype) #[batch,256] zero_value = tf.zeros([batch, encoder_output.shape[-1].value], dtype) # [batch,512] with tf.variable_scope(scope or "decoder", dtype=dtype): labels = tf.transpose(labels, [1, 0, 2]) #[字数,batch,50] encoder_output = tf.transpose(encoder_output, [1, 0, 2]) #[字数,batch,512] input_ta = tf.TensorArray(tf.float32, time_steps, tensor_array_name="input_array") memory_ta = tf.TensorArray(tf.float32, tf.shape(encoder_output)[0], tensor_array_name="memory_array") output_ta = tf.TensorArray(tf.float32, time_steps, tensor_array_name="output_array") input_ta = input_ta.unstack( labels ) #input_ta其实就是[0,1,2,....字的个数],unstack就是将inputs在axis=0维度进行拆分,拆成字的个数个[batch,50] memory_ta = memory_ta.unstack(encoder_output) #拆成字的个数个[batch,512] initial_state = layers.nn.linear(initial_state, output_size, True, False, scope="s_transform") initial_state = tf.tanh( initial_state) #经过线性变化加上偏置tanh,成为GRU的初始状态[batch,256] def loop_func(t, out_ta, state): inp_t = input_ta.read(t) mem_t = memory_ta.read(t) #t时刻输入 cell_input = [inp_t, mem_t] cell_output, new_state = cell( cell_input, state) #cell_output, new_state改进的GRU输出的两个仍然是一样的东西 cell_output = _copy_through(t, sequence_length["target"], zero_output, cell_output) new_state = _copy_through(t, sequence_length["target"], state, new_state) out_ta = out_ta.write(t, cell_output) return t + 1, out_ta, new_state time = tf.constant(0, dtype=tf.int32, name="time") loop_vars = (time, output_ta, initial_state) outputs = tf.while_loop(lambda t, *_: t < time_steps, loop_func, loop_vars, parallel_iterations=32, swap_memory=True) output_final_ta = outputs[1] final_output = output_final_ta.stack() #[字数,batch_size,256] final_output.set_shape([None, None, output_size]) final_output = tf.transpose(final_output, [1, 0, 2]) #[batch_size,字数,256] result = {"outputs": final_output, "initial_state": initial_state} return result
def _unstack_ta(inp): return tf.TensorArray(dtype=inp.dtype, size=tf.shape(inp)[0], element_shape=inp.get_shape()[1:]).unstack(inp)
def maml_train_step(self, x_speech_train, x_image_train, x_speech_test, x_image_test, num_steps, meta_optimizer, training=True, stop_gradients=False, clip_norm=None): meta_batch_size = tf.shape(x_speech_train)[0] with tf.GradientTape() as meta_tape: # watch vars in case of tf.Tensor's which are not tracked by default meta_tape.watch(self.speech_model.model.trainable_variables) meta_tape.watch(self.vision_model.model.trainable_variables) # use tf.TensorArray to accumulate results in dynamically unrolled loop inner_losses = tf.TensorArray(tf.float32, size=meta_batch_size) meta_losses = tf.TensorArray(tf.float32, size=meta_batch_size) # train and evaluate meta-objective on each task in the batch for batch_index in tf.range(meta_batch_size): x_s_1 = x_speech_train[batch_index] x_i_1 = x_image_train[batch_index] x_s_2 = x_speech_test[batch_index] x_i_2 = x_image_test[batch_index] # accumulate train and test losses per update for each task train_losses = tf.TensorArray(tf.float32, size=num_steps) test_losses = tf.TensorArray(tf.float32, size=num_steps) # initial "weight update" with current model weights speech_weight_updates = self.speech_model.model.trainable_weights vision_weight_updates = self.vision_model.model.trainable_weights # # create a model copy starting with the exact weight variables from # # the base model so we can update the model on the current task and # # then take gradients w.r.t. the base weights on the meta-objective # # NOTE: not using variable assign which has no grad ... solutions? # self.adapt_model.speech_model.model = self.clone_speech_network_func( # self.speech_model.model) # self.adapt_model.vision_model.model = self.clone_vision_network_func( # self.vision_model.model) for update_step in tf.range(num_steps): # make sure model has previous updates (python state issue .. ?) model_utils.update_model_weights( self.adapt_model.speech_model.model, speech_weight_updates, self.speech_weights_structure) model_utils.update_model_weights( self.adapt_model.vision_model.model, vision_weight_updates, self.vision_weights_structure) # update model on task training samples inner_task_loss, y_s_1, y_i_1 = self.adapt_model.train_step( x_s_1, x_i_1, optimizer=self.inner_optimizer, training=training, stop_gradients=stop_gradients, clip_norm=clip_norm) # compute transformations for `x_speech` and `x_image` and # evaluate meta-objective of updated model on task test samples y_s_2 = self.adapt_model.speech_model.predict( x_s_2, training=training) y_i_2 = self.adapt_model.vision_model.predict( x_i_2, training=training) meta_task_loss = self.loss(y_s_2, y_i_2) train_losses = train_losses.write(update_step, inner_task_loss) test_losses = test_losses.write(update_step, meta_task_loss) inner_losses = inner_losses.write(batch_index, train_losses.stack()) meta_losses = meta_losses.write(batch_index, test_losses.stack()) # get stacked tensors from the array inner_losses = inner_losses.stack() meta_losses = meta_losses.stack() # average across task meta-objectives (at the final updates) meta_loss = tf.reduce_mean(tf.stack(meta_losses)[:, -1]) # compute gradient of meta-objective and update MAML model network_s_variables = self.speech_model.model.trainable_variables network_i_variables = self.vision_model.model.trainable_variables meta_gradients_s, meta_gradients_i = meta_tape.gradient( meta_loss, [network_s_variables, network_i_variables]) if "debug" in FLAGS and FLAGS.debug: for grad in meta_gradients_s + meta_gradients_i: if tf.math.count_nonzero(tf.math.is_nan(grad)) >= 1: tf.print("NaN grad encountered:", grad) tf.print("Loss:", meta_loss) # clip gradients by global norm if specified if clip_norm is not None: meta_gradients_s, global_norm = tf.clip_by_global_norm( meta_gradients_s, clip_norm) meta_gradients_i, global_norm = tf.clip_by_global_norm( meta_gradients_i, clip_norm) # debugging in eager mode if "debug" in FLAGS and FLAGS.debug and global_norm > clip_norm: tf.print("Clipping gradients with global norm", global_norm, "to", "clip norm", clip_norm) if isinstance(meta_optimizer, tf.keras.optimizers.Optimizer): meta_optimizer.apply_gradients( zip(meta_gradients_s + meta_gradients_i, network_s_variables + network_i_variables)) elif callable(meta_optimizer): meta_optimizer(self.speech_model.model, self.vision_model.model, meta_gradients_s, meta_gradients_i) else: raise ValueError( "Argument `meta_optimizer` should be a tf.keras optimizer or a " "callable that takes arguments " "(network_a, network_b, gradients_a, gradients_b).") return meta_loss, inner_losses, meta_losses
def process_loss(feature_map_i, y_true, anchors): grid_size = tf.shape(feature_map_i)[1:3] ratio = tf.cast( tf.constant([config.image_size, config.image_size]) / grid_size, tf.float32) batch_size = tf.cast(tf.shape(feature_map_i)[0], tf.float32) x_y_offset, pred_boxes, pred_conf, pred_prob = process_layer( feature_map_i, anchors) object_mask = y_true[..., 4:5] def loop_cond(idx, _): return tf.less(idx, tf.cast(batch_size, tf.int32)) def loop_body(idx, mask): valid_true_boxes = tf.boolean_mask( y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool')) iou = box_iou(pred_boxes[idx], valid_true_boxes) best_iou = tf.reduce_max(iou, axis=-1) ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32) mask = mask.write(idx, ignore_mask_tmp) return idx + 1, mask ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True) _, ignore_mask = tf.while_loop(cond=loop_cond, body=loop_body, loop_vars=[0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = tf.expand_dims(ignore_mask, -1) pred_box_xy = pred_boxes[..., 0:2] pred_box_wh = pred_boxes[..., 2:4] true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset pred_xy = pred_box_xy / ratio[::-1] - x_y_offset true_tw_th = y_true[..., 2:4] / anchors pred_tw_th = pred_box_wh / anchors true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0), x=tf.ones_like(true_tw_th), y=true_tw_th) pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0), x=tf.ones_like(pred_tw_th), y=pred_tw_th) true_tw_th = tf.math.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9)) pred_tw_th = tf.math.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9)) box_loss_scale_1 = y_true[..., 2:3] / tf.cast( tf.constant([config.image_size, config.image_size])[1], tf.float32) box_loss_scale_2 = y_true[..., 3:4] / tf.cast( tf.constant([config.image_size, config.image_size])[0], tf.float32) box_loss_scale = 2. - box_loss_scale_1 * box_loss_scale_2 mix_w = y_true[..., -1:] xy_loss = tf.reduce_sum( tf.square(true_xy - pred_xy) * object_mask * box_loss_scale * mix_w) / batch_size wh_loss = tf.reduce_sum( tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale * mix_w) / batch_size conf_pos_mask = object_mask conf_neg_mask = (1 - object_mask) * ignore_mask conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=object_mask, logits=pred_conf) conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=object_mask, logits=pred_conf) conf_loss = conf_loss_pos + conf_loss_neg alpha = 0.25 gamma = 1.5 focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf)), gamma) conf_loss *= focal_mask conf_loss = tf.reduce_sum(conf_loss * mix_w) / batch_size delta = 0.01 label_target = (1 - delta) * y_true[..., 5:-1] + delta * 1. / len( config.classes) class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits( labels=label_target, logits=pred_prob) * mix_w class_loss = tf.reduce_sum(class_loss) / batch_size return xy_loss, wh_loss, conf_loss, class_loss