Ejemplo n.º 1
0
    def loss_layer(self, feature_map_i, y_true, anchors):
        '''
        calc loss function from a certain scale
        input:
            feature_map_i: feature maps of a certain scale. shape: [N, 13, 13, 3*(5 + num_class)] etc.
            y_true: y_ture from a certain scale. shape: [N, 13, 13, 3, 5 + num_class + 1] etc.
            anchors: shape [9, 2]
        '''
        
        # size in [h, w] format! don't get messed up!
        grid_size = tf.shape(feature_map_i)[1:3]
        # the downscale ratio in height and weight
        ratio = tf.cast(self.img_size / grid_size, tf.float32)
        # N: batch_size
        N = tf.cast(tf.shape(feature_map_i)[0], tf.float32)

        x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self.reorg_layer(feature_map_i, anchors)

        ###########
        # get mask
        ###########

        # shape: take 416x416 input image and 13*13 feature_map for example:
        # [N, 13, 13, 3, 1]
        object_mask = y_true[..., 4:5]

        # the calculation of ignore mask if referred from
        # https://github.com/pjreddie/darknet/blob/master/src/yolo_layer.c#L179
        ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
        def loop_cond(idx, ignore_mask):
            return tf.less(idx, tf.cast(N, tf.int32))
        def loop_body(idx, ignore_mask):
            # shape: [13, 13, 3, 4] & [13, 13, 3]  ==>  [V, 4]
            # V: num of true gt box of each image in a batch
            valid_true_boxes = tf.boolean_mask(y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool'))
            # shape: [13, 13, 3, 4] & [V, 4] ==> [13, 13, 3, V]
            iou = self.box_iou(pred_boxes[idx], valid_true_boxes)
            # shape: [13, 13, 3]
            best_iou = tf.reduce_max(iou, axis=-1)
            # shape: [13, 13, 3]
            ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32)
            # finally will be shape: [N, 13, 13, 3]
            ignore_mask = ignore_mask.write(idx, ignore_mask_tmp)
            return idx + 1, ignore_mask
        _, ignore_mask = tf.while_loop(cond=loop_cond, body=loop_body, loop_vars=[0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        # shape: [N, 13, 13, 3, 1]
        ignore_mask = tf.expand_dims(ignore_mask, -1)

        # shape: [N, 13, 13, 3, 2]
        pred_box_xy = pred_boxes[..., 0:2]
        pred_box_wh = pred_boxes[..., 2:4]

        # get xy coordinates in one cell from the feature_map
        # numerical range: 0 ~ 1
        # shape: [N, 13, 13, 3, 2]
        true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
        pred_xy = pred_box_xy / ratio[::-1] - x_y_offset

        # get_tw_th
        # numerical range: 0 ~ 1
        # shape: [N, 13, 13, 3, 2]
        true_tw_th = y_true[..., 2:4] / anchors
        pred_tw_th = pred_box_wh / anchors
        # for numerical stability
        true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
                              x=tf.ones_like(true_tw_th), y=true_tw_th)
        pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
                              x=tf.ones_like(pred_tw_th), y=pred_tw_th)
        true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
        pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))

        # box size punishment: 
        # box with smaller area has bigger weight. This is taken from the yolo darknet C source code.
        # shape: [N, 13, 13, 3, 1]
        box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32))

        ############
        # loss_part
        ############
        # mix_up weight
        # [N, 13, 13, 3, 1]
        mix_w = y_true[..., -1:]
        # shape: [N, 13, 13, 3, 1]
        xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale * mix_w) / N
        wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale * mix_w) / N

        # shape: [N, 13, 13, 3, 1]
        conf_pos_mask = object_mask
        conf_neg_mask = (1 - object_mask) * ignore_mask
        conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits)
        conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits)
        # TODO: may need to balance the pos-neg by multiplying some weights
        conf_loss = conf_loss_pos + conf_loss_neg
        if self.use_focal_loss:
            alpha = 1.0
            gamma = 2.0
            # TODO: alpha should be a mask array if needed
            focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf_logits)), gamma)
            conf_loss *= focal_mask
        conf_loss = tf.reduce_sum(conf_loss * mix_w) / N

        # shape: [N, 13, 13, 3, 1]
        # whether to use label smooth
        if self.use_label_smooth:
            delta = 0.01
            label_target = (1 - delta) * y_true[..., 5:-1] + delta * 1. / self.class_num
        else:
            label_target = y_true[..., 5:-1]
        class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_target, logits=pred_prob_logits) * mix_w
        class_loss = tf.reduce_sum(class_loss) / N

        return xy_loss, wh_loss, conf_loss, class_loss
Ejemplo n.º 2
0
    def build_model(self):
        with tf.name_scope('inputs'):
            self.sentences = tf.placeholder(tf.int32, [None, self.max_sentence_len])
            self.sentence_lens = tf.placeholder(tf.int32, None)
            self.sentence_types = tf.placeholder(tf.float32, [None, self.max_sentence_len])

            self.sentence_entity_loc_1 = tf.placeholder(tf.int32, None)
            self.sentence_entity_loc_2 = tf.placeholder(tf.int32, None)
            self.sentence_entity1 = tf.placeholder(tf.int32, [None, self.max_entity_len])
            self.sentence_entity2 = tf.placeholder(tf.int32, [None, self.max_entity_len])

            self.labels = tf.placeholder(tf.int32, [None, self.n_class])
            self.dropout_keep_prob = tf.placeholder(tf.float32)
            self.index = tf.placeholder(tf.int32)

            inputs = tf.nn.embedding_lookup(self.word2vec, self.sentences)
            inputs = tf.cast(inputs, tf.float32)
            inputs = tf.nn.dropout(inputs, keep_prob=self.dropout_keep_prob)

            entity1 = tf.nn.embedding_lookup(self.word2vec, self.sentence_entity1)
            entity1 = tf.cast(entity1, tf.float32)
            entity1 = tf.reduce_mean(entity1, 1)

            entity2 = tf.nn.embedding_lookup(self.word2vec, self.sentence_entity2)
            entity2 = tf.cast(entity2, tf.float32)
            entity2 = tf.reduce_mean(entity2, 1)
        with tf.name_scope('weights'):
            weights = {
                'attention': tf.get_variable(
                    name='W_l',
                    shape=[1, self.n_hidden * 4],
                    initializer=tf.contrib.layers.xavier_initializer(),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)
                ),
                'softmax': tf.get_variable(
                    name='W_r',
                    shape=[self.n_hidden * 12, self.n_class],
                    initializer=tf.contrib.layers.xavier_initializer(),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)
                ),
            }
        with tf.name_scope('biases'):
            biases = {
                'softmax': tf.get_variable(
                    name='B_r',
                    shape=[self.n_class],
                    initializer=tf.zeros_initializer(),
                    regularizer=tf.contrib.layers.l2_regularizer(self.l2_reg)
                ),
            }
        with tf.name_scope('dynamic_rnn'):
            lstm_cell_fw = tf.contrib.rnn.LSTMCell(
                self.n_hidden,
                initializer=tf.orthogonal_initializer(),
            )
            lstm_cell_bw = tf.contrib.rnn.LSTMCell(
                self.n_hidden,
                initializer=tf.orthogonal_initializer(),
            )
            outputs, state, _ = tf.nn.static_bidirectional_rnn(
                lstm_cell_fw,
                lstm_cell_bw,
                tf.unstack(tf.transpose(inputs, perm=[1, 0, 2])),
                sequence_length=self.sentence_lens,
                dtype=tf.float32,
                scope='BiLSTM'
            )
            outputs = tf.reshape(tf.concat(outputs, 1), [-1, self.max_sentence_len, self.n_hidden * 2])
            batch_size = tf.shape(outputs)[0]

            adj_input = tf.TensorArray(size=batch_size, dtype=tf.float32)

            outputs_iter = tf.TensorArray(tf.float32, 1, dynamic_size=True, infer_shape=False)
            outputs_iter = outputs_iter.unstack(outputs)

            sentence_entity_loc_1_iter = tf.TensorArray(tf.int32, 1, dynamic_size=True, infer_shape=False)
            sentence_entity_loc_1_iter = sentence_entity_loc_1_iter.unstack(self.sentence_entity_loc_1)

            sentence_entity_loc_2_iter = tf.TensorArray(tf.int32, 1, dynamic_size=True, infer_shape=False)
            sentence_entity_loc_2_iter = sentence_entity_loc_2_iter.unstack(self.sentence_entity_loc_2)
            def edge_representation(i,adj_input):
                output = outputs_iter.read(i)
                entity_loc_1 = sentence_entity_loc_1_iter.read(i)
                entity_loc_2 = sentence_entity_loc_2_iter.read(i)
                output_iter = tf.TensorArray(tf.float32, 1, dynamic_size=True, infer_shape=False)
                output_iter = output_iter.unstack(output)
                output_iter_ = tf.TensorArray(tf.float32, 1, dynamic_size=True, infer_shape=False)
                output_iter_ = output_iter_.unstack(output)
                output_context = tf.TensorArray(size=self.max_sentence_len, dtype=tf.float32)
                output_word = output_iter_.read(0)
                entity_pos_1 = tf.expand_dims(entity_loc_1,-1)
                entity_pos_2 = tf.expand_dims(entity_loc_2,-1)
                entity_pos_1 = tf.tile(entity_pos_1,[self.n_hidden*2])
                entity_pos_2 = tf.tile(entity_pos_2,[self.n_hidden*2])
                #print("entity_pos_1"+str(entity_pos_1.shape))
                entity1=tf.concat([output_word,tf.cast(tf.subtract(entity_pos_1,entity_pos_2),dtype = tf.float32)], axis = 0)
                #print("entity_1"+str(entity1.shape))
                #print("entity_pos_2"+str(entity_pos_2.shape))
                entity2=tf.concat([output_word,tf.cast(tf.subtract(entity_pos_2,entity_pos_1),dtype = tf.float32)], axis = 0)
                #print("entity_2"+str(entity2.shape))
                flag1=0
                flag2=0
                #print(entity2.shape)
                for index in range(0,self.max_sentence_len):
                    output_word=output_iter.read(index)
                    entity1=tf.cond(tf.equal((index),tf.to_int32(entity_loc_1)),lambda:tf.stack([output_word,tf.cast(tf.subtract(entity_pos_1,entity_pos_2),dtype = tf.float32)], axis = 0),lambda:entity1)
                    entity2=tf.cond(tf.equal((index),tf.to_int32(entity_loc_2)),lambda:tf.stack([output_word,tf.cast(tf.subtract(entity_pos_2,entity_pos_1),dtype = tf.float32)], axis = 0),lambda:entity2)
                    flag1=tf.cond(tf.equal((index),tf.to_int32(entity_loc_1)),lambda:1,lambda:0)
                    flag2=tf.cond(tf.equal((index),tf.to_int32(entity_loc_2)),lambda:1,lambda:0)
                    output_context = output_context.write(index,tf.concat([output_word,tf.cast(tf.concat([tf.subtract(entity_pos_1,entity_pos_2)],0),dtype = tf.float32)],0))
                    #if((index+1)==tf.to_int32(entity_loc_1)):
                    #    flag1=1
                    #    entity1=tf.stack([output_word,tf.cast(tf.subtract(entity_pos_1,entity_pos_2),dtype = tf.float32)], axis = 0)
                    #elif((index+1)==tf.to_int32(entity_loc_2)):
                    #    flag2=1
                    #    entity2=tf.stack([output_word,tf.cast(tf.subtract(entity_pos_2,entity_pos_1),dtype = tf.float32)], axis = 0)
                    #else:
                    #    output_context = output_context.write(index - flag1 - flag2,tf.concat([output_word,tf.cast(tf.concat([tf.subtract(entity_pos_1,entity_pos_2)],0),dtype = tf.float32)],0))
                    #output_context = output_context.write(index - flag1 - flag2,tf.concat([output_word,tf.cast(tf.concat([tf.subtract(index,entity_pos_1),tf.subtract(index,entity_pos_2)],0),dtype = tf.float32)],0))
                output_context = output_context.stack()
                print("output_context "+str(output_context))
                output_context = tf.squeeze(output_context)
                context_final = tf.transpose(output_context,perm = [1,0])
                print(context_final.shape)
                u = tf.matmul(weights['attention'],tf.tanh(context_final))
                a = tf.nn.softmax(u)
                context_representation = tf.matmul(context_final,tf.transpose(a,[1,0]))
                context_representation = tf.squeeze(context_representation)
                print("context_representation"+str(context_representation.shape))
                print("entity1"+str(entity1.shape))
                print("entity2"+str(entity2.shape))
                entity_concat=tf.concat([entity1,entity2],axis = 0)
                entity_concat=tf.reshape(entity_concat,[self.n_hidden*8])
                edge = tf.concat([entity_concat,context_representation],axis = 0)
                adj_input = adj_input.write(i,edge) 
                return (i + 1,adj_input)
            def condition(i, adj_input):
                return i < batch_size
            _, input_final = tf.while_loop(cond=condition, body=edge_representation, loop_vars=(0, adj_input))
            self.input_final = tf.reshape(input_final.stack(), [-1, self.n_hidden * 12])
            self.predict = tf.matmul(self.input_final, weights['softmax']) + biases['softmax']
        with tf.name_scope('loss'):
            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = self.predict, labels = self.labels))
            self.global_step = tf.Variable(0, name="tr_global_step", trainable=False)
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.cost, global_step=self.global_step)

        with tf.name_scope('predict'):
            self.predict_label = tf.argmax(self.predict, 1)
            self.correct_pred = tf.equal(self.predict_label, tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_sum(tf.cast(self.correct_pred, tf.int32))
            
        summary_loss = tf.summary.scalar('loss', self.cost)
        summary_acc = tf.summary.scalar('acc', self.accuracy)
        self.train_summary_op = tf.summary.merge([summary_loss, summary_acc])
        self.test_summary_op = tf.summary.merge([summary_loss, summary_acc])
        _dir = 'logs/' + str(self.timestamp) + '_r' + str(self.learning_rate) + '_b' + str(self.batch_size) + '_l' + str(self.l2_reg)
        self.train_summary_writer = tf.summary.FileWriter(_dir + '/train', self.sess.graph)
        self.test_summary_writer = tf.summary.FileWriter(_dir + '/test', self.sess.graph)
Ejemplo n.º 3
0
    def incremental(self, c_encoder, time_length=100, initial_input=None):
        """
        need to be adjusted lynnn
        :param c_encoder:
        :param time_length:
        :param initial_input:
        :return:
        """
        with tf.variable_scope("Model"):
            assert c_encoder is not None
            c_encoder_length = tf.shape(c_encoder)[1]
            if time_length is None:
                time_length = c_encoder_length
            init_time = tf.constant(0, dtype=tf.int32)
            if initial_input is None:
                init_input = tf.constant(128, dtype=tf.int32)

            init_state = tf.zeros([1, self.num_units], dtype=tf.float32)
            init_outputs_ta = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=False)

            def condition(times, hidden_state, unused_current_input, outputs_ta):
                return tf.less(times, time_length)

            def body(times, state, current_input, outputs_ta):
                with tf.variable_scope('Scale-Input'):
                    inputs = tf.cast(current_input, dtype=tf.float32)
                    inputs = tf.truediv(inputs, 127.5) - 1.0
                    inputs = tf.reshape(inputs, [1, 1])
                with tf.variable_scope(self.scope):
                    ct = c_encoder[:, times, :]
                    mel_and_input = tf.concat([inputs, ct], axis=1)

                    H = tf.matmul(state, self._weight_internal) + self._bias_internal
                    X = tf.matmul(mel_and_input, self._weight_external) + self._bias_external

                    Hr, Hu, He_ = tf.split(H, 3, axis=1)
                    Xr, Xu, Xe_ = tf.split(X, 3, axis=1)

                    u = tf.nn.sigmoid(Xu + Hu)
                    r = tf.nn.sigmoid(Xr + Hr)
                    candidate = tf.tanh(r * He_ + Xe_)
                    state = state * u + candidate * (1 - u)

                relu_outputs = self.affine_relu(state)
                ouput_outputs = self.affine(relu_outputs)
                sample_int64 = tf.multinomial(ouput_outputs, 1, name='multinomial')
                sample_int32 = tf.cast(sample_int64[0, 0], tf.int32)
                sample = tf.Print(sample_int32, [times, output_2], message='Generated')
                outputs_ta = outputs_ta.write(times, sample)
                times = times + 1
                return times, state, sample, outputs_ta

            times, state, _, sample_array = tf.while_loop(
                condition,
                body,
                loop_vars=[init_time, init_state, init_input, init_outputs_ta],
                parallel_iterations=10,
                swap_memory=self._hparams.swap_with_cpu,
                name='while')

        sample_array = sample_array.stack()
        return sample_array, state
Ejemplo n.º 4
0
        entry_path = os.path.join(test_dir, entryname)
        if os.path.isfile(entry_path):
            test_files.append(entry_path)

    # START: Computaional Graph
    graph = tf.Graph()
    with graph.as_default():

        # placeholders
        input_data = tf.placeholder(tf.float32, [None, X])
        sequence_length = tf.placeholder(tf.int32)

        initial_nn_state = tf.nn.rnn_cell.BasicLSTMCell(NN).zero_state(
            1, tf.float32)

        empty_unstacked_inputs = tf.TensorArray(tf.float32, sequence_length)
        unstacked_inputs = empty_unstacked_inputs.unstack(input_data)
        outputs_container = tf.TensorArray(
            tf.float32, sequence_length)  # accumelates the step outputs
        t = tf.constant(0, dtype=tf.int32)

        _, _, _, _, final_outputs = tf.while_loop(
            cond=lambda time, *_: time < sequence_length,
            body=step_op,
            loop_vars=(t, init_memory(N, W, R), initial_nn_state,
                       unstacked_inputs, outputs_container),
            parallel_iterations=32,
            swap_memory=True)

        # stack the individual steps outputs into a single (sequence_length x Y) tensor
        stacked_output = final_outputs.stack()
Ejemplo n.º 5
0
    def inference(self, src_input):
        '''
		将输入src_input根据现有模型翻译
		'''
        src_size = tf.convert_to_tensor(value=[len(src_input)], dtype=tf.int32)
        src_input = tf.convert_to_tensor(value=[src_input], dtype=tf.int32)
        src_emb = tf.nn.embedding_lookup(self.src_embedding, src_input)

        # 直接执行decoder,取出state
        with tf.variable_scope('encoder'):
            enc_outputs, enc_state = tf.nn.dynamic_rnn(self.encode_cell,
                                                       src_emb,
                                                       src_size,
                                                       dtype=tf.float32)
        with tf.variable_scope('decoder/rnn/multi_rnn_cell'):
            # 使用一个变长的TensorArray来存储生成的句子
            init_array = tf.TensorArray(dtype=tf.int32,
                                        size=0,
                                        dynamic_size=True,
                                        clear_after_read=False)

            # 填入SOS作为解码器的输入
            init_array = init_array.write(0, SOS_ID)
            # 构造loop的状态变量
            init_loop_var = (enc_state, init_array, 0)

            # 构造循环终止条件
            def continue_loop_condition(state, trg_ids, step):
                '''
				循环条件,如果解码器输出EOS或者达到最大步数,就返回False,否则返回True
				'''
                return tf.reduce_all(
                    tf.logical_and(tf.not_equal(trg_ids.read(step), EOS_ID),
                                   tf.less(step, MAX_DECODE_LENGTH - 1)))

            # 构造循环内容
            def loop_body(state, trg_ids, step):
                '''
				循环内容,decoder模型以state和trg_ids中的输入为输入来进行传播,并且更新状态变量
				'''
                trg_input = [trg_ids.read(step)]
                # shape = (batch_size, length, embedding size)
                trg_emb = tf.nn.embedding_lookup(params=self.trg_embedding,
                                                 ids=trg_input)
                # (batch_size, length, HIDDEN_SIZE)
                dec_outputs, next_state = self.decode_cell(state=state,
                                                           inputs=trg_emb)

                # (batch_size * length, HIDDEN_SIZE)
                outputs = tf.reshape(dec_outputs, [-1, HIDDEN_SIZE])
                # (batch_size * length, TRG_VOCAB_SIZE)
                logits = tf.matmul(outputs,
                                   self.softmax_weight) + self.softmax_bias

                # 取出其中最大的
                next_id = tf.argmax(logits, axis=1, output_type=tf.int32)
                trg_ids = trg_ids.write(step + 1, next_id[0])
                return next_state, trg_ids, step + 1

            # 执行tf.while_loop, 返回最终状态
            state, trg_ids, step = tf.while_loop(
                cond=continue_loop_condition,  # 循环条件
                body=loop_body,  # 循环body
                loop_vars=init_loop_var  # 循环变量
            )
            return trg_ids.stack()
but if I turn this into a time major tensor
it would be 

[None, batch_size, N] does this means each batch has the same number of seq len? yes it does...
"""

inputs = tx.Input(n_units=None, dtype=tf.int32)
lookup = tx.Lookup(inputs, seq_size=None, lookup_shape=[N, M])
input_seq = lookup.permute_batch_time()

# this is a time major sequence so we can look at the number of elements
seq_size = tf.shape(input_seq)[0]

ta_input = tf.TensorArray(dtype=input_seq.dtype,
                          size=seq_size,
                          tensor_array_name="input_tensors")
ta_input = ta_input.unstack(input_seq)

ta_output = tf.TensorArray(dtype=tf.float32,
                           size=seq_size,
                           tensor_array_name="output_tensors")
init_vars = (0, ta_output)
cond = lambda i, _: tf.less(i, seq_size)


def body1(i, y):
    xt = ta_input.read(i)
    y = y.write(i, 2 * xt)
    return i + 1, y
Ejemplo n.º 7
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                    from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Ejemplo n.º 8
0
    def map_fn(x):
        """Internal function to flat_map over.

    Consumes a batch of input examples and produces a variable number of output
    examples.
    Args:
      x: a single example

    Returns:
      a tf.data.Dataset
    """
        partial = empty_example.copy()
        i = tf.zeros([], dtype=tf.int32)
        dynamic_batch_size = tf.shape(x[keys[0]])[0]
        outputs = {}
        for k in keys:
            outputs[k] = tf.TensorArray(tf.int32,
                                        size=0,
                                        dynamic_size=True,
                                        element_shape=[length[k]])
            outputs[k + '_position'] = tf.TensorArray(
                tf.int32, size=0, dynamic_size=True, element_shape=[length[k]])

        def cond_fn(i, partial, outputs):
            del partial, outputs
            return i < dynamic_batch_size

        def body_fn(i, partial, outputs):
            """Body function for while_loop.

      Args:
        i: integer scalar
        partial: dictionary of Tensor (partially-constructed example)
        outputs: dictionary of TensorArray

      Returns:
        A triple containing the new values of the inputs.
      """
            can_append = True
            one_example = {}
            for k in keys:
                val = tf.cast(x[k][i], tf.int32)
                val = val[:tf.
                          reduce_sum(tf.cast(tf.not_equal(val, 0), tf.int32))]
                one_example[k] = val
            for k in keys:
                can_append = tf.logical_and(
                    can_append,
                    tf.less_equal(
                        tf.size(partial[k]) + tf.size(one_example[k]),
                        length[k]))

            def false_fn():
                return write_packed_example(partial, outputs)

            def true_fn():
                return partial, outputs

            partial, outputs = tf.cond(can_append, true_fn, false_fn)
            new_partial = {}
            for k in keys:
                new_seq = one_example[k][:length[k]]
                new_seq_len = tf.size(new_seq)
                new_partial[k] = tf.concat([partial[k], new_seq], 0)
                new_partial[k + '_position'] = tf.concat([
                    partial[k + '_position'],
                    tf.range(new_seq_len, dtype=tf.int32)
                ], 0)
            partial = new_partial
            return i + 1, partial, outputs

        i, partial, outputs = \
            tf.while_loop(
                cond_fn, body_fn, (i, partial, outputs),
                shape_invariants=(
                    tf.TensorShape([]),
                    {k: tf.TensorShape([None]) for k in keys_etc},
                    {k: tf.TensorShape(None) for k in keys_etc},
                )
            )
        partial, outputs = write_packed_example(partial, outputs)
        packed = {k: outputs[k].stack() for k in keys_etc}
        for k in keys:
            packed[k + '_segmentation'] = (tf.cumsum(
                tf.cast(tf.equal(packed[k + '_position'], 0), tf.int32),
                axis=1) * tf.cast(tf.not_equal(packed[k], 0), tf.int32))
        return packed
Ejemplo n.º 9
0
def yolo_loss(args, anchors, ignore_thresh=.5,seg_loss_weight=0.1, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    print(args)
    yolo_outputs = args[:1]
    att_map=args[1]
    y_true = args[2:3]
    gt_map=args[3]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[0,1,2]] ##due to deleting 2 scales  change [[6,7,8], [3,4,5], [0,1,2]] to [[0,1,2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))   # x32 is original size
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] #3 degree scales output
    loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        # true_class_probs = y_true[l][..., 5:]  #... ==????

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])
        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        def smooth_L1(y_true, y_pred,sigma=3.0):
            """ Create a smooth L1 loss functor.

            Args
                sigma: This argument defines the point where the loss changes from L2 to L1.

            Returns
                A functor for computing the smooth L1 loss given target data and predicted data.
            """
            sigma_squared = sigma ** 2

            # compute smooth L1 loss
            # f(x) = 0.5 * (sigma * x)^2          if |x| < 1 / sigma / sigma
            #        |x| - 0.5 / sigma / sigma    otherwise
            regression_diff = y_true - y_pred
            regression_diff = K.abs(regression_diff)
            regression_loss = tf.where(
                K.less(regression_diff, 1.0 / sigma_squared),
                0.5 * sigma_squared * K.pow(regression_diff, 2),
                regression_diff - 0.5 / sigma_squared
            )
            return regression_loss
        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * smooth_L1(raw_true_wh,raw_pred[...,2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        att_loss = K.binary_crossentropy(gt_map, att_map, from_logits=True)
        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        att_loss = K.sum(att_loss) / mf * 0.5
        loss += xy_loss + wh_loss + confidence_loss+att_loss

    return  K.expand_dims(loss, axis=0)
Ejemplo n.º 10
0
def generate_instances(indices,
                       arch,
                       window_size,
                       max_depth=None,
                       codes_points=None):
    """Generates matrices holding word indices to be passed to Word2Vec models 
  for each sentence. The shape and contents of output matrices depends on the 
  architecture ('skip_gram', 'cbow') and training algorithm ('negative_sampling'
  , 'hierarchical_softmax').
  It takes as input a list of word indices in a subsampled-sentence, where each
  word is a target word, and their context words are those within the window 
  centered at a target word. For skip gram architecture, `num_context_words` 
  instances are generated for a target word, and for cbow architecture, a single
  instance is generated for a target word.
  If `codes_points` is not None ('hierarchical softmax'), the word to be 
  predicted (context word for 'skip_gram', and target word for 'cbow') are 
  represented by their 'codes' and 'points' in the Huffman tree (See 
  `_build_binary_tree`). 
  Args:
    indices: rank-1 int tensor, the word indices within a sentence after
      subsampling.
    arch: scalar string, architecture ('skip_gram' or 'cbow').
    window_size: int scalar, num of words on the left or right side of
      target word within a window.
    max_depth: (Optional) int scalar, the max depth of the Huffman tree. 
    codes_points: (Optional) an int tensor of shape [vocab_size, 2*max_depth+1] 
      where each row holds the codes (0-1 binary values) padded to `max_depth`, 
      and points (non-leaf node indices) padded to `max_depth`, of each 
      vocabulary word. The last entry is the true length of code and point 
      (<= `max_depth`).
    
  Returns:
    instances: an int tensor holding word indices, with shape being
      when arch=='skip_gram', algm=='negative_sampling'
        shape: [N, 2]
      when arch=='cbow', algm=='negative_sampling'
        shape: [N, 2*window_size+2]
      when arch=='skip_gram', algm=='hierarchical_softmax'
        shape: [N, 2*max_depth+2]
      when arch=='cbow', algm='hierarchical_softmax'
        shape: [N, 2*window_size+2*max_depth+2]
  """
    def per_target_fn(index, init_array):
        """Generate inputs and labels for each target word.
    `index` is the index of the target word in `indices`.
    """
        reduced_size = tf.random.uniform([], maxval=window_size, dtype='int32')
        left = tf.range(tf.maximum(index - window_size + reduced_size, 0),
                        index)
        right = tf.range(
            index + 1,
            tf.minimum(index + 1 + window_size - reduced_size,
                       tf.size(indices)))
        context = tf.concat([left, right], axis=0)
        context = tf.gather(indices, context)

        if arch == 'skip_gram':
            # replicate `indices[index]` to match the size of `context`
            # [N, 2]
            window = tf.stack(
                [tf.fill(tf.shape(context), indices[index]), context], axis=1)
        elif arch == 'cbow':
            true_size = tf.size(context)
            # pad `context` to length `2 * window_size`
            window = tf.concat([
                tf.pad(context, [[0, 2 * window_size - true_size]]),
                [true_size, indices[index]]
            ],
                               axis=0)
            # [1, 2*window_size + 2]
            window = tf.expand_dims(window, axis=0)
        else:
            raise ValueError('architecture must be skip_gram or cbow.')

        if codes_points is not None:
            # [N, 2*max_depth + 2] or [1, 2*window_size+2*max_depth+2]
            window = tf.concat(
                [window[:, :-1],
                 tf.gather(codes_points, window[:, -1])],
                axis=1)
        return index + 1, init_array.write(index, window)

    size = tf.size(indices)
    # initialize a tensor array of length `tf.size(indices)`
    init_array = tf.TensorArray('int64', size=size, infer_shape=False)
    _, result_array = tf.while_loop(lambda i, ta: i < size,
                                    per_target_fn, [0, init_array],
                                    back_prop=False)
    instances = tf.cast(result_array.concat(), 'int64')
    if arch == 'skip_gram':
        if max_depth is None:
            instances.set_shape([None, 2])
        else:
            instances.set_shape([None, 2 * max_depth + 2])
    else:
        if max_depth is None:
            instances.set_shape([None, 2 * window_size + 2])
        else:
            instances.set_shape([None, 2 * window_size + 2 * max_depth + 2])

    return instances
Ejemplo n.º 11
0
 def _create_ta(s):
     return tf.TensorArray(dtype=s.dtype,
                           size=num_steps,
                           clear_after_read=clear_after_read,
                           element_shape=tf.TensorShape(
                               [batch_size]).concatenate(s.shape))
Ejemplo n.º 12
0
    def synthesize(self, samples):
        """
        Synthesize acoustic features from the input texts
        Args:
            samples: the data source to be synthesized
        Returns:
            after_outs: the corresponding synthesized acoustic features
            attn_weights_stack: the corresponding attention weights
        """
        x0 = samples["input"]
        input_length = samples["input_length"]
        batch = tf.shape(x0)[0]
        encoder_output = self.encoder(
            x0, training=False)  # shape: [batch, x_steps, eunits]

        if self.hparams.use_speaker:
            if self.hparams.use_pretrained_speaker_model:  # hasattr(self, 'speaker_embedding') must be True here
                speaker_feature = samples["output"]
                cut_speaker_feature = self.cut_acoustic_feature(speaker_feature, \
                                                                self.hparams.num_frame_for_embedding)
                speaker_embedding = self.speaker_embedding(cut_speaker_feature)
            else:
                speaker_embedding = self.speaker_embedding(samples['speaker'])
            encoder_output = self.concat_speaker_embedding(
                encoder_output, speaker_embedding)

        prev_rnn_states, prev_attn_weight, prev_context = \
            self.initialize_states(encoder_output, input_length)
        context_dim = prev_context.shape[-1]
        accum_attn_weight = prev_attn_weight
        outs = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
        logits = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
        attn_weights = tf.TensorArray(tf.float32, size=0, dynamic_size=True)

        out = tf.zeros([batch, self.feat_dim * self.reduction_factor])
        max_output_len = self.hparams.max_output_length * input_length[
            0] // self.reduction_factor
        for y_index in tf.range(max_output_len):
            out, logit, prev_rnn_states, new_weight, prev_context = \
                self.time_propagate(encoder_output,
                                    input_length,
                                    out,
                                    prev_rnn_states,
                                    accum_attn_weight,
                                    prev_attn_weight,
                                    prev_context,
                                    training=False)

            new_weight = tf.ensure_shape(new_weight, [None, None])
            prev_context = tf.ensure_shape(prev_context, [None, context_dim])
            outs = outs.write(y_index, out)
            logits = logits.write(y_index, logit)
            attn_weights = attn_weights.write(y_index, new_weight)
            prev_attn_weight = new_weight
            accum_attn_weight += new_weight
            probs = tf.nn.sigmoid(logit)
            time_to_end = probs > self.hparams.end_prob
            time_to_end = tf.reduce_any(time_to_end)
            if time_to_end:
                break

        logits_stack = tf.transpose(
            logits.stack(), [1, 0, 2])  # [batch, y_steps, reduction_factor]
        # before_outs: [batch, y_steps, feat_dim*reduction_factor]
        before_outs = tf.transpose(outs.stack(), [1, 0, 2])
        attn_weights_stack = tf.transpose(attn_weights.stack(), [1, 0, 2])
        after_outs = self._synthesize_post_net(before_outs, logits_stack)
        return after_outs, attn_weights_stack
Ejemplo n.º 13
0
    def call(self, samples, training: bool = None):
        x0 = samples["input"]
        input_length = samples["input_length"]
        encoder_output = self.encoder(
            x0, training=training)  # shape: [batch, x_steps, eunits]

        if self.hparams.use_speaker:
            if self.hparams.use_pretrained_speaker_model:
                if hasattr(self, 'speaker_embedding'):
                    speaker_feature = samples["output"]
                    cut_speaker_feature = self.cut_acoustic_feature(speaker_feature, \
                                                                    self.hparams.num_frame_for_embedding)
                    speaker_embedding = self.speaker_embedding(
                        cut_speaker_feature)
                else:  # for the first time of evaluate_step(not initialize the speaker_embedding model yet)
                    batch = tf.shape(encoder_output)[0]
                    fake_embedding = tf.zeros(
                        [batch, self.hparams.speaker_embedding_dim],
                        dtype=tf.float32)
                    speaker_embedding = fake_embedding
            else:
                speaker_embedding = self.speaker_embedding(samples['speaker'])
            encoder_output = self.concat_speaker_embedding(
                encoder_output, speaker_embedding)

        if self.hparams.use_gst:
            reference_state = self.reference_encoder(samples["output"])
            style_embeddings = self.style_attn(
                tf.tile(tf.expand_dims(self.gst_tokens, axis=0),
                        [tf.shape(encoder_output)[0], 1, 1]),
                tf.tanh(
                    tf.tile(tf.expand_dims(self.gst_tokens, axis=0),
                            [tf.shape(encoder_output)[0], 1, 1])),
                tf.expand_dims(reference_state, axis=1),
                mask=None)[0]
            style_embeddings = tf.tile(style_embeddings,
                                       [1, tf.shape(encoder_output)[1], 1])
            encoder_output = tf.concat([encoder_output, style_embeddings],
                                       axis=-1)

        y0 = samples['output']
        ori_lens = tf.shape(samples['output'])[1]
        if self.reduction_factor > 1:
            y0 = self._pad_and_reshape(samples['output'], ori_lens)
        y0 = self.initialize_input_y(y0)
        prev_rnn_states, prev_attn_weight, prev_context = \
            self.initialize_states(encoder_output, input_length)
        context_dim = prev_context.shape[-1]
        accum_attn_weight = prev_attn_weight
        outs = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
        logits = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
        attn_weights = tf.TensorArray(tf.float32, size=0, dynamic_size=True)

        y_steps = tf.shape(y0)[1]
        for y_index in tf.range(y_steps):
            out, logit, prev_rnn_states, new_weight, prev_context = \
                self.time_propagate(encoder_output,
                                    input_length,
                                    y0[:, y_index, :],
                                    prev_rnn_states,
                                    accum_attn_weight,
                                    prev_attn_weight,
                                    prev_context,
                                    training=training)

            new_weight = tf.ensure_shape(new_weight, [None, None])
            prev_context = tf.ensure_shape(prev_context, [None, context_dim])
            outs = outs.write(y_index, out)
            logits = logits.write(y_index, logit)
            attn_weights = attn_weights.write(y_index, new_weight)
            accum_attn_weight += new_weight
            prev_attn_weight = new_weight
        logits_stack = tf.transpose(
            logits.stack(), [1, 0, 2])  # [batch, y_steps, reduction_factor]
        logits_stack = self._pad_and_reshape(logits_stack,
                                             ori_lens,
                                             reverse=True)
        before_outs = tf.transpose(outs.stack(),
                                   [1, 0, 2])  # [batch, y_steps, feat_dim]
        before_outs = self._pad_and_reshape(before_outs,
                                            ori_lens,
                                            reverse=True)
        if self.hparams.clip_outputs:
            maximum = -self.hparams.clip_max_value - self.hparams.clip_lower_bound_decay
            maximum = tf.maximum(before_outs, maximum)
            before_outs = tf.minimum(maximum, self.hparams.clip_max_value)
        # attn_weights_stack, shape: # [batch, y_steps, x_steps]
        attn_weights_stack = tf.transpose(attn_weights.stack(), [1, 0, 2])
        # after_outs, shape: [batch, y_steps, feat_dim]
        after_outs = before_outs + self.postnet(before_outs, training=training)
        if self.hparams.clip_outputs:
            maximum = -self.hparams.clip_max_value - self.hparams.clip_lower_bound_decay
            maximum = tf.maximum(after_outs, maximum)
            after_outs = tf.minimum(maximum, self.hparams.clip_max_value)

        return before_outs, after_outs, logits_stack, attn_weights_stack
Ejemplo n.º 14
0
def build_batch_grided_gt(y_true, mask, size, num_classes, dtype,
                          use_tie_breaker):
    """
    convert ground truth for use in loss functions
    Args:
        y_true: tf.Tensor[] ground truth [box coords[0:4], classes_onehot[0:-1], best_fit_anchor_box]
        mask: list of the anchor boxes choresponding to the output, ex. [1, 2, 3] tells this layer to predict only the first 3 anchors in the total.
        size: the dimensions of this output, for regular, it progresses from 13, to 26, to 52

    Return:
        tf.Tensor[] of shape [batch, size, size, #of_anchors, 4, 1, num_classes]
    """
    boxes = tf.cast(y_true['bbox'], dtype)
    classes = tf.one_hot(tf.cast(y_true['classes'], dtype=tf.int32),
                         depth=num_classes,
                         dtype=dtype)
    anchors = tf.cast(y_true['best_anchors'], dtype)

    batches = tf.shape(boxes)[0]
    num_boxes = tf.shape(boxes)[1]
    len_masks = tf.shape(mask)[0]

    full = tf.zeros([batches, size, size, len_masks, num_classes + 4 + 1],
                    dtype=dtype)
    depth_track = tf.zeros((batches, size, size, len_masks), dtype=tf.int32)

    x = tf.cast(boxes[..., 0] * tf.cast(size, dtype=dtype), dtype=tf.int32)
    y = tf.cast(boxes[..., 1] * tf.cast(size, dtype=dtype), dtype=tf.int32)

    anchors = tf.repeat(tf.expand_dims(anchors, axis=-1), len_masks, axis=-1)

    update_index = tf.TensorArray(tf.int32, size=0, dynamic_size=True)
    update = tf.TensorArray(dtype, size=0, dynamic_size=True)
    const = tf.cast(tf.convert_to_tensor([1.]), dtype=dtype)
    mask = tf.cast(mask, dtype=dtype)

    i = 0
    anchor_id = 0
    for batch in range(batches):
        for box_id in range(num_boxes):
            if K.all(tf.math.equal(boxes[batch, box_id, 2:4], 0)):
                continue
            if K.any(tf.math.less(boxes[batch, box_id, 0:2], 0.0)) or K.any(
                    tf.math.greater_equal(boxes[batch, box_id, 0:2], 1.0)):
                continue
            if use_tie_breaker:
                for anchor_id in range(tf.shape(anchors)[-1]):
                    index = tf.math.equal(anchors[batch, box_id, anchor_id],
                                          mask)
                    if K.any(index):
                        p = tf.cast(K.argmax(tf.cast(index, dtype=tf.int32)),
                                    dtype=tf.int32)
                        uid = 1

                        used = depth_track[batch, y[batch, box_id],
                                           x[batch, box_id], p]
                        if anchor_id == 0:
                            # write the box to the update list
                            # the boxes output from yolo are for some reason have the x and y indexes swapped for some reason, I am not sure why
                            """peculiar"""
                            update_index = update_index.write(
                                i,
                                [batch, y[batch, box_id], x[batch, box_id], p])
                            value = K.concatenate([
                                boxes[batch, box_id], const, classes[batch,
                                                                     box_id]
                            ])
                            update = update.write(i, value)
                        elif tf.math.equal(used, 2) or tf.math.equal(used, 0):
                            uid = 2
                            # write the box to the update list
                            # the boxes output from yolo are for some reason have the x and y indexes swapped for some reason, I am not sure why
                            """peculiar"""
                            update_index = update_index.write(
                                i,
                                [batch, y[batch, box_id], x[batch, box_id], p])
                            value = K.concatenate([
                                boxes[batch, box_id], const, classes[batch,
                                                                     box_id]
                            ])
                            update = update.write(i, value)

                        depth_track = tf.tensor_scatter_nd_update(
                            depth_track,
                            [(batch, y[batch, box_id], x[batch, box_id], p)],
                            [uid])
                        i += 1
            else:
                index = tf.math.equal(anchors[batch, box_id, 0], mask)
                if K.any(index):
                    # tf.(0, anchors[batch, box_id, 0])
                    p = tf.cast(K.argmax(tf.cast(index, dtype=tf.int32)),
                                dtype=tf.int32)
                    update_index = update_index.write(
                        i, [batch, y[batch, box_id], x[batch, box_id], p])
                    value = K.concatenate(
                        [boxes[batch, box_id], const, classes[batch, box_id]])
                    update = update.write(i, value)
                    i += 1

    # if the size of the update list is not 0, do an update, other wise, no boxes and pass an empty grid
    if tf.math.greater(update_index.size(), 0):
        update_index = update_index.stack()
        update = update.stack()
        full = tf.tensor_scatter_nd_add(full, update_index, update)
    return full
Ejemplo n.º 15
0
    def build_model(self): 
        print('    {:{length}} : {}'.format('x', self.x, length=12))
        layer_count=0
        self.convs = []
        with tf.name_scope('conv'+str(layer_count+1)):
            layer = models.RCL(input=self.x, 
                               weight_size=self.weight_size[layer_count],
                               pool=self.pool[layer_count],
                               pool_size=self.pool_size[layer_count], 
                               num_iter=self.iter[layer_count], 
                               nonlinearity=self.nonlinearity, 
                               use_dropout=self.use_dropout,
                               keep_prob=self.keep_probs[layer_count], 
                               use_batchnorm=self.use_batchnorm, 
                               std=self.std)
            self.convs.append(layer)
            print('    {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12))
            layer_count += 1
        #
        length = self.weight_size[layer_count][-1]#layer.get_layer().get_shape()[2].value
        mu_init_value = np.zeros([self.cluster_num, length])
        sigma_init_value = np.zeros([self.cluster_num, length, length])
        pi_init_value = np.ones([cluster_num]) / self.cluster_num 
        self.mu = [tf.Variable(tf.random_normal([length], dtype=tf.float64), name='mu'+str(t)) for t in range(self.cluster_num)]
        self.sigma = [tf.Variable(tf.random_normal([length,length], dtype=tf.float64), name='sigma'+str(t)) for t in range(self.cluster_num)]
        self.pi = tf.Variable(tf.multiply(tf.ones([1, self.cluster_num], tf.float64), pi_init_value),
                            trainable=True,name='pi')
        # force the sum of elements of pi vector to be 1.
        self.pi_normed = tf.div(tf.maximum(self.pi, 0.0), tf.reduce_sum(tf.maximum(self.pi, 0.0))) 
        
        ### convs before em
        for i in range(layer_count, self.em_layers[0]-1):
            layer = models.RCL(input=layer.get_layer(),
                               weight_size=self.weight_size[layer_count],
                               weight=self.w_masked,
                               biases=self.b,
                               pool=self.pool[layer_count],
                               pool_size=self.pool_size[layer_count], 
                               num_iter=self.iter[layer_count], 
                               nonlinearity=self.nonlinearity, 
                               use_dropout=self.use_dropout,
                               keep_prob=self.keep_probs[layer_count], 
                               use_batchnorm=self.use_batchnorm, 
                               std=self.std,
                               name='conv'+str(layer_count+1))
            self.convs.append(layer)
            print('    {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12))
            layer_count += 1
        #        
        ### em
        self.em_w=[]
        self.w_mask=[]
        self.w_masked=[]
        self.cluster = []
        self.max_idx = []
        for em in range(len(self.em_layers)):
            with tf.name_scope('conv'+str(layer_count+1)+'em'):
                self.em_w.append(tf.Variable( tf.random_normal( self.weight_size[layer_count], stddev=self.std, dtype=tf.float32), name='w' ))                
                if em == 0:
                    gamma_elem = []
                    Q_elem = []
                    self.x_batch = tf.reduce_max(self.em_w[-1], axis=[0,1]) - tf.reduce_min(self.em_w[-1], axis=[0,1])
                    for w in range(self.weight_size[layer_count][-2]):
                        x_pdf = gmm_pdf_log(mu=self.mu, 
                                                        sigma=self.sigma, 
                                                        x=tf.reshape(tf.tile(self.x_batch[w,:],[self.cluster_num]),[self.cluster_num,-1]), #[3, 100]
                                                       sess=self.sess)                
                        pi_pdf = tf.multiply(self.pi_normed, x_pdf)
                        gamma_tmp = tf.reshape(tf.div(pi_pdf,
                                                      tf.maximum(tf.reduce_sum(pi_pdf),1e-30)),
                                               [-1])
                        gamma_tmp = tf.stop_gradient(gamma_tmp) # fix the value. do not calculate the gradient of this term.
                        gamma_elem.append(gamma_tmp)
                        tmp = tf.reduce_sum(tf.multiply(gamma_tmp, 
                                                                    tf.log(pi_pdf+1e-30)))
                        Q_elem.append(tmp)
                    self.Q = tf.reduce_sum(Q_elem)
                    self.Q_summary = tf.summary.scalar("Q", self.Q)
                    self.gamma = tf.stack(gamma_elem)
                    self.cluster.append(tf.cast(tf.argmax(self.gamma, axis=1), dtype=tf.int32))
                    print('      {:{length}} : {}'.format('cluster', self.cluster[-1], length=12))

                    i = tf.constant(0)
                    w_mean = tf.TensorArray(dtype=tf.float32, size=self.cluster_num)#tf.constant(0.0, shape=tf.TensorShape([]))
                    cond = lambda i,w_mean : i<self.cluster_num
                    x_batch = tf.reduce_max(self.em_w[-1], axis=[0,1]) - tf.reduce_min(self.em_w[-1], axis=[0,1])
                    def func(i,w_mean):
                        mean = tf.reduce_mean(tf.boolean_mask(x_batch, tf.equal(self.cluster[-1],i)), axis=[0])
                        w_mean = w_mean.write(i, mean)
                        return i+1, w_mean
                    i, w_mean = tf.while_loop(cond, func, [i,w_mean])
                    self.max_idx.append(tf.cast(tf.argmax(w_mean.pack(), axis=0), tf.int32))
                    print('      {:{length}} : {}'.format('max_idx', self.max_idx[-1], length=12))
                else:
                    # em!= 0
                    self.cluster.append(self.max_idx[-1])
                    print('      {:{length}} : {}'.format('cluster', self.cluster[-1], length=12))
                    #
                    i = tf.constant(0)
                    w_mean = tf.TensorArray(dtype=tf.float32, size=self.cluster_num)#tf.constant(0.0, shape=tf.TensorShape([]))
                    cond = lambda i,w_mean : i<self.cluster_num
                    x_batch = tf.reduce_max(self.em_w[-1], axis=[0,1]) - tf.reduce_min(self.em_w[-1], axis=[0,1])
                    def func(i,w_mean):
                        mean = tf.reduce_mean(tf.boolean_mask(x_batch, tf.equal(self.cluster[-1],i)), axis=[0])
                        w_mean = w_mean.write(i, mean)
                        return i+1, w_mean
                    i, w_mean_ = tf.while_loop(cond, func, [i,w_mean])
                    self.max_idx.append(tf.cast(tf.argmax(w_mean_.pack(), axis=0), tf.int32))
                    print('      {:{length}} : {}'.format('max_idx', self.max_idx[-1], length=12))
                i = tf.constant(0)
                w_mask_array = tf.TensorArray(dtype=tf.float32, size=self.weight_size[layer_count][-1])
                cond2 = lambda i,w_mask_array : i<self.weight_size[layer_count][-1]
                def func2(i, w_mask_array):
                    w_mask_array_column = tf.cast(tf.equal(self.cluster[-1], self.max_idx[-1][i]), dtype=tf.float32)
                    w_mask_array = w_mask_array.write(i, w_mask_array_column)
                    return i+1, w_mask_array
                i, w_mask_array = tf.while_loop(cond2, func2, [i, w_mask_array])
                w_mask_pack = tf.transpose(w_mask_array.pack())
                self.w_mask.append(tf.expand_dims(tf.stack([w_mask_pack for i in range(self.weight_size[layer_count][1])]), 0))
                self.w_masked.append(tf.multiply(self.em_w[-1], self.w_mask[-1]))
            # end if-else
            layer = models.RCL(input=layer.get_layer(),
                               weight_size=self.weight_size[layer_count],
                               weight=self.w_masked[-1],
                               pool=self.pool[layer_count],
                               pool_size=self.pool_size[layer_count], 
                               num_iter=self.iter[layer_count], 
                               nonlinearity=self.nonlinearity, 
                               use_dropout=self.use_dropout,
                               keep_prob=self.keep_probs[layer_count], 
                               use_batchnorm=self.use_batchnorm, 
                               std=self.std,
                              name='conv'+str(layer_count+1))
            self.convs.append(layer)
            print('    {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12))
            layer_count += 1
            if layer_count>=len(self.conv):
                break
        # end for
        
        ### left conv layers
        for i in range(layer_count, len(self.conv)):
            layer = models.RCL(input=layer.get_layer(),
                               weight_size=self.weight_size[layer_count],
                               pool=self.pool[layer_count],
                               pool_size=self.pool_size[layer_count], 
                               num_iter=self.iter[layer_count], 
                               nonlinearity=self.nonlinearity, 
                               use_dropout=self.use_dropout,
                               keep_prob=self.keep_probs[layer_count], 
                               use_batchnorm=self.use_batchnorm, 
                               std=self.std,
                               name='conv'+str(layer_count+1))
            self.convs.append(layer)
            print('    {:{length}} : {}'.format('conv'+str(layer_count+1), layer.get_layer(), length=12))
            layer_count += 1
        
        network = tf.reshape(layer.get_layer(), shape=[-1, self.feed_forwards[0]])# * self.keep_probs[1]]) ###
        self.flatten = network
        print('    {:{length}} : {}'.format('flatten', self.flatten, length=12))
        
        if len(self.feed_forwards) == 2:
            network = models.feedforward(input = network,
                                         weight_size=[self.feed_forwards[0], self.feed_forwards[1]],
                                         nonlinearity=None,
                                         use_dropout = False, 
                                         use_batchnorm = False,
                                         std=self.std,
                                         offset=self.offset,
                                         scale=self.scale,
                                         epsilon=self.epsilon, 
                                         name='output')
            self.output = network#.get_layer()
            self.output_layer = network.get_layer()
            print('    {:{length}} : {}'.format('feedforward'+str(1), self.output_layer, length=12))
        else:
            self.forwards=[]
            for f in range(len(self.feed_forwards)-1 -1):
                if layer_count+1+f in self.em_layers:
                    with tf.name_scope('feedforward'+str(f+1)+'em'):
                        self.em_w.append(tf.Variable( tf.random_normal( [self.feed_forwards[f], self.feed_forwards[f+1]], stddev=self.std, dtype=tf.float32), name='w' ))
                        conv_len1 = self.convs[-1].get_layer().get_shape()[1].value
                        conv_len2 = self.convs[-1].get_layer().get_shape()[2].value
                        if (f==0) and (conv_len1 > 1 or conv_len2>1):
                            self.cluster.append(tf.tile(self.max_idx[-1], [conv_len1*conv_len2]))
                        else:
                            self.cluster.append(self.max_idx[-1])
                        print('      {:{length}} : {}'.format('cluster', self.cluster[-1], length=12))
                        #
                        i = tf.constant(0)
                        w_mean = tf.TensorArray(dtype=tf.float32, size=self.cluster_num)#tf.constant(0.0, shape=tf.TensorShape([]))
                        cond = lambda i,w_mean : i<self.cluster_num
                        x_batch = self.em_w[-1]
                        def func(i,w_mean):
                            mean = tf.reduce_mean(tf.boolean_mask(x_batch, tf.equal(self.cluster[-1],i)), axis=[0])
                            w_mean = w_mean.write(i, mean)
                            return i+1, w_mean
                        i, w_mean = tf.while_loop(cond, func, [i,w_mean])
                        self.max_idx.append(tf.cast(tf.argmax(w_mean.pack(), axis=0), tf.int32))
                        print('      {:{length}} : {}'.format('max_idx', self.max_idx[-1], length=12))
                        #
                        i = tf.constant(0)
                        w_mask_array = tf.TensorArray(dtype=tf.float32, size=self.feed_forwards[f+1])
                        cond2 = lambda i,w_mask_array : i<self.feed_forwards[f+1]
                        def func2(i, w_mask_array):
                            w_mask_array_column = tf.cast(tf.equal(self.cluster[-1], self.max_idx[-1][i]), dtype=tf.float32)
                            w_mask_array = w_mask_array.write(i, w_mask_array_column)
                            return i+1, w_mask_array
                        i, w_mask_array = tf.while_loop(cond2, func2, [i, w_mask_array])
                        w_mask_pack = tf.transpose(w_mask_array.pack())
                        self.w_mask.append(w_mask_pack)
                        self.w_masked.append(tf.multiply(self.em_w[-1], self.w_mask[-1]))
                    ###                        
                    network  = models.feedforward(input = network, 
                                                  weight_size=[self.feed_forwards[f], self.feed_forwards[f+1]],
                                                  weight=self.w_masked[-1],
                                                  nonlinearity=self.nonlinearity, 
                                                  use_dropout = self.use_dropout, 
                                                  keep_prob = self.keep_probs[len(self.conv)+f], 
                                                  use_batchnorm = self.use_batchnorm,
                                                  std=self.std,
                                                  offset=self.offset,
                                                  scale=self.scale,
                                                  epsilon=self.epsilon, 
                                                  name='forward'+str(f+1))
                    self.forwards.append(network)
                    network = network.get_layer()
                    layer_count += 1
                    print('    {:{length}} : {}'.format('feedforward'+str(f+1), network, length=12))
                else:
                    network  = models.feedforward(input = network, 
                                                  weight_size=[self.feed_forwards[f], self.feed_forwards[f+1]],
                                                  nonlinearity=self.nonlinearity, 
                                                  use_dropout = self.use_dropout, 
                                                  keep_prob = self.keep_probs[len(self.conv)+f], 
                                                  use_batchnorm = self.use_batchnorm,
                                                  std=self.std,
                                                  offset=self.offset,
                                                  scale=self.scale,
                                                  epsilon=self.epsilon, 
                                                  name='forward'+str(f+1))
                    self.forwards.append(network)
                    network = network.get_layer()
                    layer_count += 1
                    print('    {:{length}} : {}'.format('feedforward'+str(f+1), network, length=12))
                #
            network =  models.feedforward(input = network,
                                         weight_size=[self.feed_forwards[-2], self.feed_forwards[-1]],
                                         nonlinearity=None,
                                         use_dropout = False, 
                                         use_batchnorm = False,
                                         std=self.std,
                                         offset=self.offset,
                                         scale=self.scale,
                                         epsilon=self.epsilon, 
                                         name='output')
            self.output = network#.get_layer()
            self.output_layer = network.get_layer()
            print('    {:{length}} : {}'.format('feedforward'+str(f+2), self.output_layer, length=12))
def build_network(d):
    # Define hyperparameters
    d = d
    learning_rate = 2e-5
    l2norm_scaling = 1e-10
    global_norm_gradient_clipping_ratio = 0.65

    # Placeholder for answers to the decision problems (one per problem)
    subgraph_exists = tf.placeholder(tf.float32,
                                     shape=(None, ),
                                     name='subgraph_exists')
    # Placeholders for the list of number of vertices per instance
    n_vertices = tf.placeholder(tf.int32, shape=(None, ), name='n_vertices')
    # Placeholder for the adjacency matrix connecting each edge to its source and target vertices
    VV_matrix = tf.placeholder(tf.float32, shape=(None, None), name="VV")
    # Placeholder for the column matrix of edge weights
    vertice_weight = tf.placeholder(tf.float32,
                                    shape=(None, 1),
                                    name="vertice_weight")
    # Placeholder for the number of timesteps the GNN is to run for
    time_steps = tf.placeholder(tf.int32, shape=(), name="time_steps")

    # All edges embeddings are initialized with the same value, which is a trained parameter learned by the network
    total_n = tf.shape(VV_matrix)[1]
    v_init = tf.get_variable(initializer=tf.random_normal((1, d)),
                             dtype=tf.float32,
                             name='V_init')
    vertex_initial_embeddings = tf.tile(
        tf.div(v_init, tf.sqrt(tf.cast(d, tf.float32))), [total_n, 1])

    # Define GNN dictionary
    GNN = {}

    # Configure GNN
    gnn = TGN({
        'V': d,
    }, {'VV': ('V', 'V')}, {
        'V_msg_V': ('V', 'V'),
    }, {
        'V': [{
            'mat': 'VV',
            'msg': 'V_msg_V',
            'var': 'V'
        }],
    },
              name='SUBGRAPH')

    # Populate GNN dictionary
    GNN['gnn'] = gnn
    GNN['subgraph_exists'] = subgraph_exists
    GNN['n_vertices'] = n_vertices
    GNN['VV'] = VV_matrix
    GNN['W'] = vertice_weight
    GNN['time_steps'] = time_steps

    # Define V_vote, which will compute one logit for each vertice
    # <--- André: The network witch asks each node if it thinks its part of the subgraph right?
    V_vote_MLP = Mlp(layer_sizes=[d for _ in range(3)],
                     activations=[tf.nn.relu for _ in range(3)],
                     output_size=1,
                     name='E_vote',
                     name_internal_layers=True,
                     kernel_initializer=tf.contrib.layers.xavier_initializer(),
                     bias_initializer=tf.zeros_initializer())

    # Get the last embeddings
    last_states = gnn({
        "VV": VV_matrix,
        'W': vertice_weight
    }, {"V": vertex_initial_embeddings},
                      time_steps=time_steps)
    GNN["last_states"] = last_states
    V_n = last_states['V'].h

    # Compute a vote for each embedding
    #E_vote = tf.reshape(E_vote_MLP( tf.concat([E_n,target_cost],axis=1) ), [-1])
    V_vote = tf.reshape(V_vote_MLP(V_n), [-1])

    # Compute the number of problems in the batch
    num_problems = tf.shape(n_vertices)[0]

    # Compute a logit probability for each problem <- I'll look into this
    pred_logits = tf.while_loop(
        lambda i, pred_logits: tf.less(i, num_problems), lambda i, pred_logits:
        ((i + 1),
         pred_logits.write(
             i,
             tf.reduce_mean(V_vote[tf.reduce_sum(n_vertices[
                 0:i]):tf.reduce_sum(n_vertices[0:i]) + n_vertices[i]]))),
        [0, tf.TensorArray(size=num_problems, dtype=tf.float32)])[1].stack()
    # Convert logits into probabilities
    GNN['predictions'] = tf.sigmoid(pred_logits)

    # Compute True Positives, False Positives, True Negatives, False Negatives, accuracy
    GNN['TP'] = tf.reduce_sum(
        tf.multiply(
            subgraph_exists,
            tf.cast(tf.equal(subgraph_exists, tf.round(GNN['predictions'])),
                    tf.float32)))
    GNN['FP'] = tf.reduce_sum(
        tf.multiply(
            subgraph_exists,
            tf.cast(
                tf.not_equal(subgraph_exists, tf.round(GNN['predictions'])),
                tf.float32)))
    GNN['TN'] = tf.reduce_sum(
        tf.multiply(
            tf.ones_like(subgraph_exists) - subgraph_exists,
            tf.cast(tf.equal(subgraph_exists, tf.round(GNN['predictions'])),
                    tf.float32)))
    GNN['FN'] = tf.reduce_sum(
        tf.multiply(
            tf.ones_like(subgraph_exists) - subgraph_exists,
            tf.cast(
                tf.not_equal(subgraph_exists, tf.round(GNN['predictions'])),
                tf.float32)))
    GNN['acc'] = tf.reduce_mean(
        tf.cast(tf.equal(subgraph_exists, tf.round(GNN['predictions'])),
                tf.float32))

    # Define loss
    GNN['loss'] = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(labels=subgraph_exists,
                                                logits=pred_logits))

    # Define optimizer
    optimizer = tf.train.AdamOptimizer(name='Adam',
                                       learning_rate=learning_rate)

    # Compute cost relative to L2 normalization
    vars_cost = tf.add_n(
        [tf.nn.l2_loss(var) for var in tf.trainable_variables()])

    # Define gradients and train step
    grads, _ = tf.clip_by_global_norm(
        tf.gradients(GNN['loss'] + tf.multiply(vars_cost, l2norm_scaling),
                     tf.trainable_variables()),
        global_norm_gradient_clipping_ratio)
    GNN['train_step'] = optimizer.apply_gradients(
        zip(grads, tf.trainable_variables()))

    # Return GNN dictionary
    return GNN
Ejemplo n.º 17
0
    def encode_input(self, encoder_inp, seq_len):
        """Run the encoder on gives input.

        Args:
            encoder_inp: Input IDs that are time major i.e. TxB. These IDs are
                first passed through embedding layer before feeding to first
                LSTM layer.
            seq_len: Actual length of input time sequences.
        Returns:
            attention_states: Final encoder output for every input timestep.
                This tensor is used by attention-enabled decoders.
            final_state: Final state of encoder LSTM
        """
        with variable_scope.variable_scope("encoder"):
            comb_encoder_inputs = None
            embedding = {}
            # Necessary to sort so that the order of encoder_inputs is
            # maintained
            for idx, key in enumerate(sorted(encoder_inp.iterkeys())):
                print(key)
                if key == "speech_frames":
                    continue
                elif key == "word_dur":
                    cur_inputs = encoder_inp[key]
                    # No embedding for word duration - so just extend dim.
                    cur_inputs = tf.expand_dims(cur_inputs, -1)
                else:
                    embedding[key] = variable_scope.get_variable(
                        "emb_" + key,
                        [self.vocab_size[key], self.embedding_size[key]])

                    cur_inputs = embedding_ops.embedding_lookup(
                        embedding[key], encoder_inp[key])
                if comb_encoder_inputs is None:
                    comb_encoder_inputs = cur_inputs
                else:
                    comb_encoder_inputs = tf.concat(
                        [comb_encoder_inputs, cur_inputs], 2)
            if "speech_frames" in encoder_inp:
                cnn_outputs = []
                max_words = tf.reduce_max(seq_len)
                for i, filter_size in enumerate(self.filter_sizes):
                    acoustic_input_ta = tf.TensorArray(size=0,
                                                       dtype=tf.float32,
                                                       dynamic_size=True)
                    acoustic_input_ta = acoustic_input_ta.unstack(
                        encoder_inp["speech_frames"])
                    cur_filter_size_output_array = tf.TensorArray(
                        size=0, dtype=tf.float32, dynamic_size=True)
                    _, _, cur_filter_size_output = tf.while_loop(
                        cond=lambda time_idx, a_t, _: time_idx < max_words,
                        body=self._cnn_word_process(filter_size),
                        loop_vars=(tf.constant(0), acoustic_input_ta,
                                   cur_filter_size_output_array))
                    # Convert the TensorArray to Tensor
                    cur_filter_size_output = cur_filter_size_output.stack()
                    cnn_outputs.append(cur_filter_size_output)

                # T * B * filter_sizes * 1 * num_filters
                cnn_features = tf.concat(cnn_outputs, 2)
                num_filters_total = self.num_filters * len(self.filter_sizes)

                time_dim = array_ops.shape(cnn_features)[0]
                batch_size = array_ops.shape(cnn_features)[1]
                cnn_features = tf.reshape(
                    cnn_features,
                    array_ops.stack([time_dim, batch_size, num_filters_total]))

                comb_encoder_inputs = tf.concat(
                    [comb_encoder_inputs, cnn_features], 2)

            encoder_outputs, encoder_state = rnn.dynamic_rnn(
                self.cell,
                comb_encoder_inputs,
                sequence_length=seq_len,
                dtype=tf.float32,
                time_major=True)
            # Make the attention states batch major
            attention_states = tf.transpose(encoder_outputs, [1, 0, 2])

        return attention_states, encoder_state
Ejemplo n.º 18
0
    def __init__(self,
                 batch_size,
                 hidden_size,
                 embedding_dim,
                 dropout_rate,
                 grad_clip,
                 initial_learning_rate,
                 mode='train'):
        # inputs
        self.inputs_embedded_q = tf.placeholder(
            tf.float32,
            shape=[batch_size, None, embedding_dim],
            name='inputs_embedded_q')
        self.inputs_actual_length_q = tf.placeholder(
            tf.int32, [batch_size],
            name='inputs_actual_length')  # 每句输入的实际长度,除了padding
        self.inputs_embedded_concat_p = tf.placeholder(
            tf.float32,
            shape=[batch_size, None, embedding_dim],
            name='inputs_embedded_concat_p'
        )  # 干脆先全部都concat吧;不对,这样这里还要padding;还是原来的搞,切片吧
        self.inputs_actual_length_concat_p = tf.placeholder(
            tf.int32, [batch_size], name='inputs_actual_length_concat_p')
        # passage ranking
        self.passage_numbers = tf.placeholder(tf.int32, [batch_size],
                                              name='passage_numbers')
        self.passage_word_numbers = tf.placeholder(tf.int32,
                                                   [batch_size, None],
                                                   name='passage_word_numbers')

        # 还有char也没搞;我看也可以用现成的

        # targets
        if mode != 'test':
            self.start_position = tf.placeholder(tf.int32, [batch_size])
            self.end_position = tf.placeholder(tf.int32, [batch_size])
            # self.y_1 = tf.placeholder(tf.float32, [None])
            # self.y_2 = tf.placeholder(tf.float32, [None])
            # passage_ranking
            self.passage_y = tf.placeholder(
                tf.int32, [batch_size, None],
                name='passage_y')  # 也需要padding;实际长度在上面

        with tf.variable_scope("q_encoder", reuse=tf.AUTO_REUSE):
            fcell_q = tf.nn.rnn_cell.GRUCell(hidden_size)
            bcell_q = tf.nn.rnn_cell.GRUCell(hidden_size)
            fcell_q = tf.contrib.rnn.DropoutWrapper(
                fcell_q,
                output_keep_prob=1 - dropout_rate)  # 有3个dropout,应该用哪个??
            bcell_q = tf.contrib.rnn.DropoutWrapper(bcell_q,
                                                    output_keep_prob=1 -
                                                    dropout_rate)
            (fw_outputs_q, bw_outputs_q), (fw_final_state_q, bw_final_state_q) = \
            tf.nn.bidirectional_dynamic_rnn(cell_fw=fcell_q,
                                            cell_bw=bcell_q,
                                            inputs=self.inputs_embedded_q,
                                            sequence_length=self.inputs_actual_length_q,
                                            dtype=tf.float32)
            u_q = tf.concat((fw_outputs_q, bw_outputs_q), 2)
            # print u_q  # 输出是root,root_1的;但前面embedding是共享的

        with tf.variable_scope("p_encoder", reuse=tf.AUTO_REUSE):
            fcell_p = tf.nn.rnn_cell.GRUCell(hidden_size)
            bcell_p = tf.nn.rnn_cell.GRUCell(hidden_size)
            fcell_p = tf.contrib.rnn.DropoutWrapper(
                fcell_p,
                output_keep_prob=1 - dropout_rate)  # 有3个dropout,应该用哪个??
            bcell_p = tf.contrib.rnn.DropoutWrapper(bcell_p,
                                                    output_keep_prob=1 -
                                                    dropout_rate)

            def p_encoder_one_p(j, start, end, inputs_embedded_concat_p_i,
                                p_w_num_i, fw_p_i, bw_p_i):
                inputs_embedded_p = tf.expand_dims(
                    inputs_embedded_concat_p_i[start:end, :], 0)
                p_w_num = tf.expand_dims(p_w_num_i[j], 0)
                with tf.variable_scope("p_encoder_one_p", reuse=tf.AUTO_REUSE):
                    (fw_outputs_p, bw_outputs_p), (fw_final_state_p, bw_final_state_p) = \
                        tf.nn.bidirectional_dynamic_rnn(cell_fw=fcell_p,
                                                        cell_bw=bcell_p,
                                                        inputs=inputs_embedded_p,
                                                        sequence_length=p_w_num,
                                                        dtype=tf.float32)
                    # 其实是不是一样的呢,反正也都用;可能还是不太一样,一篇文章第一个词不依赖上一篇文章最后词
                fw_p_i.write(j, fw_outputs_p)
                bw_p_i.write(j, bw_outputs_p)
                start = end
                j = tf.add(j, 1)
                end = p_w_num_i[j]
                return j, start, end, inputs_embedded_concat_p_i, p_w_num_i, fw_p_i, bw_p_i

            def p_encoder_one_q(i, fw_p_b, bw_p_b):
                j = tf.constant(0)
                p_w_num_i = self.passage_word_numbers[i]
                start = tf.constant(0)
                end = p_w_num_i[0]
                inputs_embedded_concat_p_i = self.inputs_embedded_concat_p[i]
                p_num_i = self.passage_numbers[i]
                fw_p_i = tf.TensorArray(dtype=tf.float32, size=p_num_i)
                bw_p_i = tf.TensorArray(dtype=tf.float32, size=p_num_i)
                c = lambda x, y, z, m, n, p, q: tf.less(x, p_num_i)
                b = lambda x, y, z, m, n, p, q: p_encoder_one_p(
                    x, y, z, m, n, p, q)
                u_p_i_res = tf.while_loop(
                    cond=c,
                    body=b,
                    loop_vars=(j, start, end, inputs_embedded_concat_p_i,
                               p_w_num_i, fw_p_i, bw_p_i))
                fw_p_i = u_p_i_res[-2].stack()
                bw_p_i = u_p_i_res[-1].stack()
                # print 'fw_p_i, bw_p_i', fw_p_i, bw_p_i
                fw_p_i = tf.reshape(fw_p_i, shape=[-1, hidden_size])  # 就是降了一维
                bw_p_i = tf.reshape(bw_p_i, shape=[-1, hidden_size])  # 就是降了一维
                # print 'fw_p_i, bw_p_i', fw_p_i, bw_p_i
                fw_p_b.write(i, fw_p_i)
                bw_p_b.write(i, bw_p_i)
                i = tf.add(i, 1)
                return i, fw_p_b, bw_p_b

            i = tf.constant(0)
            fw_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
            bw_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
            c = lambda x, y, z: tf.less(x, batch_size
                                        )  # 不用调,切第一维即可;不对,关键每个batch的切法不同;还是分开吧
            b = lambda x, y, z: p_encoder_one_q(x, y, z)
            u_p_b_res = tf.while_loop(cond=c,
                                      body=b,
                                      loop_vars=(i, fw_p_b, bw_p_b))
            fw_p = u_p_b_res[-2].stack()
            bw_p = u_p_b_res[-2].stack()
            # print 'fw_p, bw_p', fw_p, bw_p
            u_p = tf.concat((fw_p, bw_p), 2)
            # print 'u_p', u_p
            # 要把它弄成和原来一样的形状,回头要分再切片即可

        with tf.variable_scope("q_p_attention", reuse=tf.AUTO_REUSE):
            w_q_u = tf.get_variable(name='w_q_u',
                                    shape=[hidden_size * 2, hidden_size * 2])
            w_p_u = tf.get_variable(name='w_p_u',
                                    shape=[hidden_size * 2, hidden_size * 2])
            # w_p_v = tf.get_variable(name='w_p_v', shape=[hidden_size*2, hidden_size*2])
            v = tf.get_variable(name='v', shape=[hidden_size * 2, 1])
            w_g = tf.get_variable(name='w_g',
                                  shape=[hidden_size * 4,
                                         hidden_size * 4])  # 这里是又拼接了一把的
            cell_v = tf.nn.rnn_cell.GRUCell(hidden_size * 2)

            # passage中第t个词的attention
            def attention_step(t, q_i, p_i, len_q_i, state, v_p_p):
                p_i_t = tf.reshape(p_i[t], [1, -1])  # !!注意可用-1,怎么忘了;变1行
                q_i_t = tf.slice(q_i,
                                 begin=[0, 0],
                                 size=[len_q_i,
                                       hidden_size * 2])  # 哦是为了去掉padding的部分

                # sum_t = tf.matmul(w_q_u, q_i_t) + tf.matmul(w_p_u, p_i_t)  # 是可以的!!!
                # + tf.matmul(w_p_v, tf.transpose(v_p_t_1)  # 看看加不加
                sum_t = tf.matmul(q_i_t, w_q_u) + tf.matmul(
                    p_i_t, w_p_u)  # 少一点转置,减少计算量吧
                # print sum_t  # (?,150)
                s_t = tf.matmul(tf.tanh(sum_t), v)  # 列向量,问题长
                # print s_t # (?,1),?应该最后填了是150
                a_t = tf.nn.softmax(s_t)
                a_t = tf.reshape(a_t, [-1, 1])
                c_q_t = tf.transpose(tf.matmul(q_i_t, a_t))  # 行向量
                # print 'c_q_t', c_q_t  #  (1,?),同样应150

                p_c = tf.concat([p_i_t, c_q_t],
                                axis=1)  # 行向量, 维度 hidden_size*4
                g_t = tf.nn.sigmoid(tf.matmul(p_c,
                                              w_g))  # 维度 hidden_size*4,行向量
                # print p_c, g_t  # (1,?), 应300;(1,300)
                # 方法:用门的输出向量按元素乘以我们需要控制的那个向量 原理:门的输出是 0到1 之间的实数向量,
                p_c_gated = g_t * p_c  # 应该直接乘就行
                # print p_c_gated  # 行向量,(1,300)
                out, next_state = cell_v(inputs=p_c_gated,
                                         state=state)  # out和state一样??
                # print 'state', state
                # print 'out', out
                v_p_p = v_p_p.write(t, out)  # 这块,看看咋分开

                t = tf.add(t, 1)

                return t, q_i, p_i, len_q_i, state, v_p_p

            # 就是i,t->i,j,t
            def atention_one_p(j, q_i, p_i, len_q_i, p_w_num_i, v_p_q):
                state = cell_v.zero_state(batch_size=1,
                                          dtype=tf.float32)  # 不对,双向的;?
                p_w_num_i_j = p_w_num_i[j]
                v_p_p = tf.TensorArray(dtype=tf.float32, size=p_w_num_i_j)
                t = tf.constant(0)

                c = lambda a, x, y, z, s, u: tf.less(a, p_w_num_i_j)
                b = lambda a, x, y, z, s, u: attention_step(a, x, y, z, s, u)
                v_p_p_res = tf.while_loop(cond=c,
                                          body=b,
                                          loop_vars=(t, q_i, p_i, len_q_i,
                                                     state, v_p_p))

                v_p_p = v_p_p_res[-1].stack()
                # print 'v_p_p', v_p_p
                v_p_q.write(j, v_p_p)

                return j, q_i, p_i, len_q_i, p_w_num_i, v_p_q

            # 整个passage的attention
            def atention_one_q(i, v_p_b):
                p_i = u_p[i]  # 一个question
                q_i = u_q[i]  # 对应的passage
                len_q_i = self.inputs_actual_length_q[i]
                # print state
                j = tf.constant(0)
                p_num_i = self.passage_numbers[i]
                p_w_num_i = self.passage_word_numbers[i]
                v_p_q = tf.TensorArray(dtype=tf.float32, size=p_num_i)

                c = lambda a, x, y, z, s, u: tf.less(a, p_num_i)
                b = lambda a, x, y, z, s, u: atention_one_p(a, x, y, z, s, u)
                v_p_q_res = tf.while_loop(cond=c,
                                          body=b,
                                          loop_vars=(j, q_i, p_i, len_q_i,
                                                     p_w_num_i, v_p_q))

                v_p_q = v_p_q_res[-1].stack()
                # print 'v_p_q', v_p_q
                v_p_q = tf.reshape(v_p_q,
                                   shape=[-1, hidden_size * 2])  # 应该是什么shape?
                # print 'v_p_q', v_p_q
                v_p_b.write(i, v_p_q)
                # print 'temp', temp

                i = tf.add(i, 1)

                return i, v_p_b

            v_p_b = tf.TensorArray(dtype=tf.float32,
                                   size=batch_size)  # 存放batch中每条的结果
            c = lambda x, y: tf.less(x, batch_size)  # batch的循环
            b = lambda x, y: atention_one_q(x, y)
            i = tf.constant(0)  # batch号
            v_p_b_res = tf.while_loop(cond=c, body=b,
                                      loop_vars=(i,
                                                 v_p_b))  # 这个会先不循环,而后面用for则会循环
            v_p = v_p_b_res[-1].stack()  # 是v_p;应就是把多个array拼成高一维的一个array
            # print 'v_p', v_p

        # with tf.variable_scope("self-matching"): # 这里s_net似乎删掉了r_net的self-matching部分

        with tf.variable_scope("output_layer", reuse=tf.AUTO_REUSE):
            # 先算h的初始状态r_q
            # 算a,p,c
            # 用c做输入,下个h

            with tf.variable_scope("intial_state", reuse=tf.AUTO_REUSE):
                w_u_q = tf.get_variable(
                    name='w_u_q', shape=[hidden_size * 2, hidden_size * 2])
                w_q_v = tf.get_variable(
                    name='w_q_v', shape=[hidden_size * 2, hidden_size * 2])
                v_q_r = tf.get_variable(name='v_q_r',
                                        shape=[1, hidden_size * 2
                                               ])  # 好像是向量吧,难道是矩阵??
                v2 = tf.get_variable(name='v2', shape=[hidden_size * 2, 1])

                def attention_r_q(i, r_q_b):
                    q_i = u_q[i]
                    # print 'q_i', q_i
                    len_q_i = self.inputs_actual_length_q[i]
                    q_i = tf.slice(q_i,
                                   begin=[0, 0],
                                   size=[len_q_i, hidden_size * 2
                                         ])  # 直接列表那样也可以。像下文那样;先不改了吧
                    # print 'q_i', q_i
                    sum_q = tf.matmul(q_i, w_u_q) + tf.matmul(v_q_r, w_q_v)
                    # print sum_q
                    s = tf.matmul(tf.tanh(sum_q), v2)
                    # print s
                    a = tf.nn.softmax(s)
                    a = tf.reshape(a, [-1, 1])
                    r_q_i = tf.transpose(tf.matmul(tf.transpose(q_i),
                                                   a))  # 还是转成行向量
                    # print 'r_q_i', r_q_i  # 应该还是hidden*2
                    r_q_b.write(i, r_q_i)
                    i = tf.add(i, 1)
                    return i, r_q_b

                r_q_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
                c = lambda x, y: tf.less(x, batch_size)  # batch的循环
                b = lambda x, y: attention_r_q(x, y)
                r_q_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, r_q_b))
                r_q = r_q_b_res[-1].stack()
                # print 'r_q', r_q  # 哦没squeeze,[b, 1, ]

            with tf.variable_scope("answer_recurrent_network",
                                   reuse=tf.AUTO_REUSE):

                w_p_h = tf.get_variable(
                    shape=[hidden_size * 2, hidden_size * 2], name="w_p_h_s")
                w_a_h = tf.get_variable(
                    shape=[hidden_size * 2, hidden_size * 2], name="w_a_h_s")
                v4 = tf.get_variable(shape=[hidden_size * 2, 1], name="v4")
                cell_h = tf.nn.rnn_cell.GRUCell(hidden_size * 2)

                def pointers(i, p_1_b, p_2_b, a_1_b, a_2_b):
                    p_i = v_p[i]
                    len_p_i = self.inputs_actual_length_concat_p[i]
                    p_i_t = tf.slice(p_i,
                                     begin=[0, 0],
                                     size=[len_p_i, hidden_size * 2])

                    # t就是取1,2,开头和结尾,见论文损失那里的下标
                    # start, t=1
                    h_a_1 = r_q[i]  # 初始状态
                    sum_1 = tf.matmul(p_i_t, w_p_h) + tf.matmul(h_a_1, w_a_h)
                    s_1 = tf.matmul(tf.tanh(sum_1), v4)  # 列向量,passasge长N
                    a_1 = tf.nn.softmax(s_1)
                    a_1 = tf.reshape(a_1, [-1, 1])
                    a_1_b.write(i, tf.transpose(a_1))  # 还是转行向量
                    c_1 = tf.transpose(tf.matmul(p_i_t, a_1))  # 行向量
                    c_1 = tf.reshape(c_1, [1, hidden_size * 2])  # 必须这样固定
                    h_a_1 = tf.reshape(h_a_1, [1, hidden_size * 2])
                    # print 'c_1', c_1  # (1,?),同样应150
                    # print 'h_a_1', h_a_1
                    h_a_2, state = cell_h(inputs=c_1, state=h_a_1)
                    p_1 = tf.argmax(a_1)
                    p_1_b.write(i, p_1)

                    # end,t=2
                    sum_2 = tf.matmul(p_i_t, w_p_h) + tf.matmul(h_a_2, w_a_h)
                    s_2 = tf.matmul(tf.tanh(sum_2), v4)  # 列向量,passasge长N
                    a_2 = tf.nn.softmax(s_2)
                    a_2 = tf.reshape(a_2, [-1, 1])
                    a_2_b.write(i, tf.transpose(a_2))
                    p_2 = tf.argmax(a_2)
                    p_2_b.write(i, p_2)

                    i = tf.add(i, 1)

                    return i, p_1_b, p_2_b, a_1_b, a_2_b

                p_1_b = tf.TensorArray(dtype=tf.int32, size=batch_size)
                p_2_b = tf.TensorArray(dtype=tf.int32, size=batch_size)
                a_1_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
                a_2_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
                c = lambda x, y, z, m, n: tf.less(x, batch_size)  # batch的循环
                b = lambda x, y, z, m, n: pointers(x, y, z, m, n)
                b_res = tf.while_loop(cond=c,
                                      body=b,
                                      loop_vars=(i, p_1_b, p_2_b, a_1_b,
                                                 a_2_b))
                p_1 = b_res[1].stack()
                p_2 = b_res[2].stack()
                # print 'p_1', p_1
                # print 'p_2', p_2
                a_1 = b_res[3].stack()
                a_2 = b_res[4].stack()
                # print 'a_1', a_1
                # print 'a_2', a_2
                # self.p = [tf.reshape(p_1, [1, -1]), tf.reshape(p_2, [1, -1])]
                self.p1 = tf.reshape(p_1, [1, -1])
                self.p2 = tf.reshape(p_2, [1, -1])
                a = [tf.reshape(a_1, [1, -1]), tf.reshape(a_2, [1, -1])]
                # print p, a

        with tf.variable_scope("passage_ranking", reuse=tf.AUTO_REUSE):
            w_v_q = tf.get_variable(name='w_v_q',
                                    shape=[hidden_size * 2, hidden_size * 2])
            w_v_p = tf.get_variable(name='w_v_p',
                                    shape=[hidden_size * 2, hidden_size * 2])
            v3 = tf.get_variable(name='v3', shape=[hidden_size * 2, 1])
            v_g = tf.get_variable(name='v_g', shape=[hidden_size * 2, 1])
            w_g_2 = tf.get_variable(name='w_g_2',
                                    shape=[hidden_size * 2, hidden_size * 2])

            def attention_r_p_one_passage(j, start, end, v_p_i, r_q_i,
                                          p_w_num_i, r_p_i):
                v_p_i_j = v_p_i[start:end, :]
                # print 'v_p_i_j', v_p_i_j
                sum_p = tf.matmul(v_p_i_j, w_v_p) + tf.matmul(r_q_i, w_v_q)
                # print 'sum_p', sum_p  # [p_w_n, hidden*2]
                s = tf.matmul(tf.tanh(sum_p), v3)
                # print 's', s  # [p_w_n, 1]
                a = tf.nn.softmax(s)
                # print 'a', a
                r_p_i_j = tf.transpose(tf.matmul(tf.transpose(v_p_i_j),
                                                 a))  # 还是转成行向量
                # print 'r_p_i_j', r_p_i_j  # [1, hidden*2]
                r_p_i.write(j, r_p_i_j)
                start = p_w_num_i[j]
                j = tf.add(j, 1)
                end = p_w_num_i[j]
                return j, start, end, v_p_i, r_q_i, p_w_num_i, r_p_i

            def attention_r_p(i, r_p_b):
                v_p_i = v_p[i]  # 主要是这里了,要分开成不同文章搞
                # print 'v_p_i', v_p_i, v_p_i[:self.passage_word_numbers[i][0],:]
                r_q_i = r_q[i]
                p_num_i = self.passage_numbers[i]
                r_p_i = tf.TensorArray(dtype=tf.float32,
                                       size=p_num_i)  # 这个竟然可以!!!
                # print r_p_i
                j = tf.constant(0)
                start = tf.constant(0)
                p_w_num_i = self.passage_word_numbers[i]
                end = p_w_num_i[0]
                c = lambda x, y, z, m, n, p, q: tf.less(x, p_num_i)
                b = lambda x, y, z, m, n, p, q: attention_r_p_one_passage(
                    x, y, z, m, n, p, q)
                res = tf.while_loop(cond=c,
                                    body=b,
                                    loop_vars=(j, start, end, v_p_i, r_q_i,
                                               p_w_num_i, r_p_i))
                r_p_i = tf.squeeze(res[-1].stack(), axis=1)
                # print 'r_p_i', r_p_i
                r_p_b.write(i, r_p_i)
                i = tf.add(i, 1)
                return i, r_p_b

            r_p_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
            c = lambda x, y: tf.less(x, batch_size)  # batch的循环
            b = lambda x, y: attention_r_p(x, y)
            r_p_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, r_p_b))
            r_p = r_p_b_res[-1].stack()

            # print 'r_p', r_p

            def compute_g_b_one_passage(j, r_q_i, r_p_i, g_i):
                r_p_i_j = tf.reshape(r_p_i[j], shape=[1, -1])
                # print r_q_i, r_p_i_j
                r_p_q = tf.concat([r_q_i, r_p_i_j], axis=1)
                mul_g = tf.matmul(r_p_q, w_g_2)
                g_j = tf.matmul(tf.tanh(mul_g), v_g)  # 这个是数
                # print 'g_j', g_j
                g_i.write(j, g_j)
                j = tf.add(j, 1)
                return j, r_q_i, r_p_i, g_i

            def compute_g_b(i, g_b):
                r_q_i = r_q[i]
                r_p_i = r_p[i]
                p_num_i = self.passage_numbers[i]
                j = tf.constant(0)
                g_i = tf.TensorArray(dtype=tf.float32, size=p_num_i)
                c = lambda x, y, z, m: tf.less(x, p_num_i)
                b = lambda x, y, z, m: compute_g_b_one_passage(x, y, z, m)
                res = tf.while_loop(cond=c,
                                    body=b,
                                    loop_vars=(j, r_q_i, r_p_i, g_i))
                g_i = tf.squeeze(res[-1].stack(), axis=1)  # 向量
                # print 'g_i', g_i
                # 要归一化一下,再加到g_b里
                g_i = tf.nn.softmax(g_i)
                g_b.write(i, g_i)
                i = tf.add(i, 1)
                return i, g_b

            # 还得按batch
            g_b = tf.TensorArray(dtype=tf.float32, size=batch_size)
            c = lambda x, y: tf.less(x, batch_size)  # batch的循环
            b = lambda x, y: compute_g_b(x, y)
            g_b_res = tf.while_loop(cond=c, body=b, loop_vars=(i, g_b))
            g = tf.squeeze(g_b_res[-1].stack(), axis=2)
            # print 'g', g

        if mode == 'train':
            with tf.variable_scope("loss"):
                # 不对,train中要的不是p,是a
                # 通过两个位置先搞个y出来
                # 长度此时还未定,似乎要用lambda??

                def write_y(j, pos, y):
                    if j != pos:
                        y.write(j, 0)
                    else:
                        y.write(j, 1)
                    return j, pos, y

                def to_one_hot(i, y1_b, y2_b):
                    len_p_i = self.inputs_actual_length_concat_p[i]
                    start = self.start_position[i]
                    end = self.end_position[i]
                    y1 = tf.TensorArray(dtype=tf.float32, size=len_p_i)
                    y2 = tf.TensorArray(dtype=tf.float32, size=len_p_i)
                    c = lambda x, y, z: tf.less(x, len_p_i)  # batch的循环
                    b = lambda x, y, z: write_y(x, y, z)
                    j = tf.constant(0)  # batch号
                    y1_res = tf.while_loop(cond=c,
                                           body=b,
                                           loop_vars=(j, start, y1))
                    j = tf.constant(0)  # batch号
                    y2_res = tf.while_loop(cond=c,
                                           body=b,
                                           loop_vars=(j, end, y2))
                    y1_i = y1_res[-1].stack()
                    y2_i = y2_res[-1].stack()
                    y1_b.write(i, y1_i)
                    y2_b.write(i, y2_i)
                    i = tf.add(i, 1)
                    return i, y1_b, y2_b

                y1_b = tf.TensorArray(dtype=tf.float32,
                                      size=batch_size)  # 存放batch中每条的结果
                y2_b = tf.TensorArray(dtype=tf.float32,
                                      size=batch_size)  # 存放batch中每条的结果
                c = lambda x, y, z: tf.less(x, batch_size)  # batch的循环
                b = lambda x, y, z: to_one_hot(x, y, z)
                i = tf.constant(0)  # batch号
                res = tf.while_loop(cond=c, body=b,
                                    loop_vars=(i, y1_b,
                                               y2_b))  # 这个会先不循环,而后面用for则会循环
                y1 = res[-2].stack()
                y2 = res[-1].stack()
                y = [tf.reshape(y1, [1, -1]), tf.reshape(y2, [1, -1])]
                # print 'y', y

                self.loss = 0.0
                for t in range(2):
                    self.loss += tf.reduce_sum(
                        y[t] * tf.log(a[t]) + (1 - y[t]) * (1 - tf.log(a[t])),
                        1)
Ejemplo n.º 19
0
    def GetLoss(self, y_true, y_pred):
        '''
    获取损失值
    y_true:坐标还没归一化,[(batch_size, 13, 13, 3, 5+num_classes), (batch_size, 26, 26, 3, 5+num_classes), (batch_size, 52, 52, 3, 5+num_classes)]
    y_pred:[(batch_size, 13, 13, 3, 5+num_classes), (batch_size, 26, 26, 3, 5+num_classes), (batch_size, 52, 52, 3, 5+num_classes)]
    '''
        print('loss_fun:', type(y_true), type(y_pred))
        layers_size = [[13, 13], [26, 26], [52, 52]]
        anchors_wh = [
            [[116, 90], [156, 198], [373, 326]],
            [[30, 61], [62, 45], [59, 119]],
            [[10, 13], [16, 30], [33, 23]],
        ]
        classes_num = 80
        train_iou_thresh = 0.5
        image_size = tf.constant((416, 416), dtype=tf.float32)
        # (layers_num, anchors_num, 2)
        anchors_wh = tf.constant(anchors_wh, dtype=tf.float32)
        # anchors_wh = anchors_wh / image_size
        anchors_num = tf.shape(anchors_wh)[1]
        layers_size = tf.constant(layers_size, dtype=tf.int32)
        layers_num = tf.shape(layers_size)[0]
        classes_num = tf.constant(classes_num, dtype=tf.int32)
        batch_size = tf.shape(y_true[0])[0]
        batch_size_float = tf.cast(batch_size, dtype=tf.float32)
        loss = 0.0
        layer_index = 0
        for layer_index in range(3):
            y_true_read = y_true[layer_index]
            y_pred_raw = y_pred[layer_index]
            y_pred_raw = tf.reshape(y_pred_raw, tf.shape(y_true_read))
            # 特征网格对应实际图片的坐标
            grid_shape = tf.shape(y_pred_raw)[1:3]  # height, width
            grid_y = tf.range(0, grid_shape[0], dtype=tf.float32)
            grid_x = tf.range(0, grid_shape[1], dtype=tf.float32)
            grid_x, grid_y = tf.meshgrid(grid_x, grid_y)
            grid_x = tf.reshape(grid_x, (grid_shape[0], grid_shape[1], 1, 1))
            grid_y = tf.reshape(grid_y, (grid_shape[0], grid_shape[1], 1, 1))
            grid_xy = tf.concat([grid_x, grid_y], axis=-1)
            # 计算真实坐标与相对坐标
            # y_true
            y_true_object = y_true_read[..., 4:5]
            y_true_classes = y_true_read[..., 5:]
            y_true_read_xy = y_true_read[..., 0:2]
            # tf.print('grid_xy:', tf.math.reduce_max(grid_xy), tf.math.reduce_min(grid_xy))
            # tf.print('grid_shape:', grid_shape[::-1])
            y_true_raw_xy = y_true_read_xy * tf.cast(
                grid_shape[::-1], dtype=tf.float32) - grid_xy
            # tf.print('y_true_raw_xy:', tf.math.reduce_max(y_true_raw_xy), tf.math.reduce_min(y_true_raw_xy))
            # tf.print('y_true_object:', tf.math.reduce_max(y_true_object), tf.math.reduce_min(y_true_object))
            # y_true_raw_xy = y_true_object * y_true_raw_xy
            # tf.print('y_true_raw_xy:', tf.math.reduce_max(y_true_raw_xy), tf.math.reduce_min(y_true_raw_xy))

            y_true_read_wh = y_true_read[..., 2:4]
            y_true_raw_wh = tf.math.log(y_true_read_wh * image_size[::-1] /
                                        anchors_wh[layer_index, ...])
            y_true_raw_wh = tf.where(tf.cast(y_true_object,
                                             dtype=tf.bool), y_true_raw_wh,
                                     tf.zeros_like(y_true_raw_wh))
            # tf.print('y_true_raw_wh:', tf.math.reduce_max(y_true_raw_wh), tf.math.reduce_min(y_true_raw_wh))

            # y_pred
            y_pred_object = y_pred_raw[..., 4:5]
            y_pred_classes = y_pred_raw[..., 5:]
            y_pred_raw_xy = y_pred_raw[..., 0:2]
            # tf.print('y_pred_raw_xy:', tf.math.reduce_max(y_pred_raw_xy), tf.math.reduce_min(y_pred_raw_xy))
            y_pred_read_xy = (tf.math.sigmoid(y_pred_raw_xy) +
                              grid_xy) / tf.cast(grid_shape[::-1],
                                                 dtype=tf.float32)

            y_pred_raw_wh = y_pred_raw[..., 2:4]
            # tf.print('y_pred_raw_wh:', tf.math.reduce_max(y_pred_raw_wh), tf.math.reduce_min(y_pred_raw_wh))
            y_pred_read_wh = tf.math.exp(y_pred_raw_wh) * anchors_wh[
                layer_index, ...] / image_size[::-1]
            # y_pred_read_wh = tf.where(tf.math.is_inf(y_pred_read_wh), tf.zeros_like(y_pred_read_wh), y_pred_read_wh)

            # y_pred_object = tf.math.sigmoid(y_pred_object)
            # y_pred_classes = tf.math.sigmoid(y_pred_classes)

            # 框坐标(batch_size, h, w, anchors_num, (x1, y1, x2, y2))
            y_true_read_wh_half = y_true_read_wh / 2
            y_true_read_mins = y_true_read_xy - y_true_read_wh_half
            y_true_read_maxes = y_true_read_xy + y_true_read_wh_half
            y_true_boxes = tf.concat([y_true_read_mins, y_true_read_maxes],
                                     axis=-1)
            y_pred_read_wh_half = y_pred_read_wh / 2
            y_pred_read_mins = y_pred_read_xy - y_pred_read_wh_half
            y_pred_read_maxes = y_pred_read_xy + y_pred_read_wh_half
            y_pred_boxes = tf.concat([y_pred_read_mins, y_pred_read_maxes],
                                     axis=-1)

            ignore_mask = tf.TensorArray(tf.float32, size=1, dynamic_size=True)

            def foreach_batch(batch_index, ignore_mask):
                y_true_boxes_one = y_true_boxes[batch_index, ...]
                y_pred_boxes_one = y_pred_boxes[batch_index, ...]
                y_true_object_one = y_true_object[batch_index, ...]
                y_true_boxes_tmp = tf.boolean_mask(
                    y_true_boxes_one,
                    tf.cast(y_true_object_one[..., 0], dtype=tf.bool))
                # 计算IOU
                # (boxes_num, 4) => (1, boxes_num, 4)
                y_true_boxes_tmp = tf.expand_dims(y_true_boxes_tmp, axis=0)
                y_pred_boxes_tmp = y_pred_boxes_one
                # (h, w, anchors_num, 4) => (h, w, anchors_num, 1, 4)
                y_pred_boxes_tmp = tf.expand_dims(y_pred_boxes_tmp, axis=-2)
                # (h, w, anchors_num, boxes_num)
                iou = GetIOU(y_pred_boxes_tmp, y_true_boxes_tmp, 'iou')
                # (h, w, anchors_num)
                best_iou = tf.math.reduce_max(iou, axis=-1)
                # 把IOU<0.5的认为是背景
                ignore_mask = ignore_mask.write(
                    batch_index,
                    tf.cast(best_iou < train_iou_thresh, dtype=tf.float32))
                return batch_index + 1, ignore_mask

            # (batch_size, h, w, anchors_num, y_true_boxes_num)
            _, ignore_mask = tf.while_loop(lambda b, *args: b < batch_size,
                                           foreach_batch, [0, ignore_mask])
            ignore_mask = ignore_mask.stack()
            # (batch_size, h, w, anchors_num)
            ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
            # ignore_mask = tf.where(tf.math.is_nan(ignore_mask), tf.zeros_like(ignore_mask), ignore_mask)
            # tf.print('ignore_mask:', tf.math.reduce_max(ignore_mask), tf.math.reduce_min(ignore_mask))
            # 计算loss
            boxes_loss_scale = 2 - y_true_read_wh[..., 0:1] * y_true_read_wh[
                ..., 1:2]
            # tf.print('boxes_loss_scale:', tf.math.reduce_max(boxes_loss_scale), tf.math.reduce_min(boxes_loss_scale))

            xy_loss_bc = tf.keras.losses.binary_crossentropy(
                tf.expand_dims(y_true_raw_xy, axis=-1),
                tf.expand_dims(y_pred_raw_xy, axis=-1),
                from_logits=True)
            xy_loss = y_true_object * boxes_loss_scale * xy_loss_bc
            wh_loss = y_true_object * boxes_loss_scale * 0.5 * tf.math.square(
                y_true_raw_wh - y_pred_raw_wh)
            object_loss_bc = tf.keras.losses.binary_crossentropy(
                tf.expand_dims(y_true_object, axis=-1),
                tf.expand_dims(y_pred_object, axis=-1),
                from_logits=True)
            # tf.print('object_loss_bc:', tf.math.reduce_max(object_loss_bc), tf.math.reduce_min(object_loss_bc))
            object_loss = y_true_object * object_loss_bc + (
                1 - y_true_object) * object_loss_bc * ignore_mask
            classes_loss_bc = tf.keras.losses.binary_crossentropy(
                tf.expand_dims(y_true_classes, axis=-1),
                tf.expand_dims(y_pred_classes, axis=-1),
                from_logits=True)
            # tf.print('classes_loss_bc:', tf.math.reduce_max(classes_loss_bc), tf.math.reduce_min(classes_loss_bc))
            classes_loss = y_true_object * classes_loss_bc

            xy_loss = tf.math.reduce_sum(xy_loss) / batch_size_float
            wh_loss = tf.math.reduce_sum(wh_loss) / batch_size_float
            object_loss = tf.math.reduce_sum(object_loss) / batch_size_float
            classes_loss = tf.math.reduce_sum(classes_loss) / batch_size_float
            # tf.print('loss:', xy_loss, wh_loss, object_loss, classes_loss)
            loss += xy_loss + wh_loss + object_loss + classes_loss
        # tf.print('loss:', loss)
        return loss
Ejemplo n.º 20
0
    def build_graph(self):
        self._define_embedding()
        # hacky but w/e
        try:
            self.embed_dim = self.embed_dim.value
        except:
            pass

        # The inputs
        self.inputs = tf.placeholder(tf.int32, [None, self.max_length])
        self.is_leaf = tf.placeholder(tf.bool, [None, self.max_length])
        self.left_children = tf.placeholder(tf.int32, [None, self.max_length])
        self.right_children = tf.placeholder(tf.int32, [None, self.max_length])
        self.is_node = tf.placeholder(tf.bool, [None, self.max_length])
        self.input_lens = tf.placeholder(tf.int32, [None])
        if self.use_phrases:
            output_shape = [None, self.max_length, self.output_dim]
        else:
            output_shape = [None, self.output_dim]
        self.outputs = tf.placeholder(tf.float32, shape=output_shape)

        self.feats = tf.nn.embedding_lookup(self.embedding, self.inputs)

        # Need to do lift
        # H = tanh(W_lift*c + b_lift)

        # First, we define W_lift. This is actually a 3-D tensor, since it
        # lifts our input vectors into a sqrt(d)-by-sqrt(d) matrix.
        # Initialize with Xavier initialization, then shape into 3D.
        self.W_lift = tf.reshape(
            self.weight_init(self.embed_dim, int(self.hidden_dim**2),
                             'W_lift'),
            [self.embed_dim, self.hidden_dim, self.hidden_dim])
        self.b_lift = tf.reshape(
            self.bias_init(int(self.hidden_dim**2), 'b_lift'),
            [self.hidden_dim, self.hidden_dim])

        self.lifted_feats = tf.nn.tanh(
            tf.tensordot(self.feats, self.W_lift, [[2], [0]]) / 100 +
            self.b_lift)

        # 224D
        self.is_leaf_t = tf.transpose(self.is_leaf)
        self.left_children_t = tf.transpose(self.left_children)
        self.right_children_t = tf.transpose(self.right_children)
        self.lifted_feats_t = tf.transpose(self.lifted_feats, [1, 0, 2, 3])

        # For node combination
        self.W_lstm = self.weight_init(2 * self.hidden_dim_v,
                                       4 * self.hidden_dim_v, 'W_lstm')
        self.b_lstm = self.bias_init(4 * self.hidden_dim_v, 'b_lstm')
        self.W_comb = self.weight_init(self.hidden_dim, self.hidden_dim,
                                       'W_comb')
        self.b_comb = self.weight_init(self.hidden_dim, self.hidden_dim,
                                       'b_comb')
        #self.b_comb2 = self.weight_init(self.hidden_dim, self.hidden_dim, 'b_comb2')
        # maybe xavier init here
        x = np.sqrt(6.0 / self.hidden_dim_v)
        #self.c_init = tf.Variable(tf.random_uniform(tf.shape(self.lifted_feats_t[0]), minval=-x, maxval=x), name="c_init")

        node_tensors = tf.TensorArray(
            tf.float32,
            size=self.max_length,
            #element_shape=(2, self.inputs.shape[0], self.hidden_dim, self.hidden_dim),
            dynamic_size=True,
            clear_after_read=False,
            infer_shape=True)

        # So TF doesn't complain. We're not going to use this value.
        #node_tensors = node_tensors.write(0, [self.lifted_feats_t[0], self.lifted_feats_t[0]])
        #x = node_tensors.gather([0])

        # From 224D github
        # Loop through the tensors, combining them
        def loop_body(node_tensors, i):
            node_is_leaf = tf.gather(self.is_leaf_t, i)
            left_child = tf.gather(self.left_children_t, i)
            right_child = tf.gather(self.right_children_t, i)
            leaf_tensor = tf.stack([
                tf.zeros_like(self.lifted_feats_t[0]),
                tf.gather(self.lifted_feats_t, i)
            ],
                                   axis=1)
            # batchy
            # keep track of [c, H]
            node_tensor = tf.where(
                node_is_leaf,
                leaf_tensor,
                # the things i do for batching
                tf.cond(
                    tf.equal(i, 0), lambda: leaf_tensor,
                    lambda: self.combine_children(
                        node_tensors.gather(left_child),
                        node_tensors.gather(right_child))))
            node_tensors = node_tensors.write(i, node_tensor)
            i = tf.add(i, 1)
            return node_tensors, i

        # while less than #nodes
        loop_cond = lambda node_tensors, i: \
            tf.less(i, tf.reduce_max(self.input_lens))
        # loop thru
        node_tensors, _ = tf.while_loop(loop_cond,
                                        loop_body, [node_tensors, 0],
                                        parallel_iterations=1)

        # Get the last [C, H], and retrieve H from that.
        last_pair = node_tensors.gather(self.input_lens - 1)
        last_H = tf.reshape(self.get_last_val(last_pair),
                            [-1, self.hidden_dim_v])  # allow for inheritance

        hidden_vals = tf.reshape(node_tensors.stack()[:, :, 1],
                                 [self.max_length, -1, self.hidden_dim_v])

        self.W_hy = self.weight_init(self.hidden_dim_v, self.output_dim,
                                     'W_hy')
        self.b_y = self.bias_init(self.output_dim, 'b_y')
        tiled_W_hy = tf.reshape(
            tf.tile(self.W_hy, [self.max_length, 1]),
            [self.max_length, self.hidden_dim_v, self.output_dim])

        self.model = tf.transpose(
            tf.matmul(hidden_vals, tiled_W_hy) + self.b_y, [1, 0, 2])
        self.last = tf.matmul(last_H, self.W_hy) + self.b_y
        self.node_tensors = node_tensors
    def __init__(self,
                 inp,
                 inp_mask,
                 seq2seq_gtruth,
                 post_gtruth,
                 hyper_params=None,
                 training=True,
                 name='Tacotron',
                 reuse=False):
        """
        Build the computational graph.
        :param inp:
        :param inp_mask:
        :param seq2seq_gtruth:
        :param post_gtruth:
        :param hyper_params:
        :param training:
        :param name:
        """
        super(Tacotron, self).__init__(name)
        self.hyper_params = HyperParams(
        ) if hyper_params is None else hyper_params
        with tf.variable_scope(name, reuse=reuse):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)
            self.learning_rate = tf.Variable(
                self.hyper_params.learning_rate[0],
                name='learning_rate',
                trainable=False,
                dtype=tf.float32)

            batch_size = tf.shape(inp)[0]
            input_time_steps = tf.shape(inp)[1]
            output_time_steps = tf.shape(seq2seq_gtruth)[1]

            ### Encoder [begin]
            with tf.variable_scope('character_embedding'):
                embed_inp = EmbeddingLayer(self.hyper_params.embed_class,
                                           self.hyper_params.embed_dim)(inp)
            with tf.variable_scope("changeToVarible"):
                self.single_style_token = tf.get_variable(
                    'style_token', (1, self.hyper_params.styles_kind,
                                    self.hyper_params.style_dim),
                    dtype=tf.float32)
                self.style_token = tf.tile(self.single_style_token,
                                           (batch_size, 1, 1))
            with tf.variable_scope('encoder_pre_net'):
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    embed_inp, 256, tf.nn.relu),
                                               training=training)
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    pre_ed_inp, 128, tf.nn.relu),
                                               training=training)
            encoder_output = modules.cbhg(pre_ed_inp,
                                          training=training,
                                          k=16,
                                          bank_filters=128,
                                          projection_filters=(128, 128),
                                          highway_layers=4,
                                          highway_units=128,
                                          bi_gru_units=128,
                                          sequence_length=inp_mask,
                                          name='encoder_cbhg',
                                          reuse=False)

            with tf.variable_scope('post_text'):
                all_outputs, _ = tf.nn.dynamic_rnn(
                    cell=GRUCell(256),
                    inputs=encoder_output,
                    sequence_length=inp_mask,
                    dtype=encoder_output.dtype,
                    parallel_iterations=unkonwn_parallel_iterations)
                all_outputs = tf.transpose(all_outputs, [1, 0, 2])
                static_encoder_output = all_outputs[-1]
            ### Encoder [end]

            ### Attention Module
            with tf.variable_scope('attention'):
                att_module = AttentionModule(256,
                                             encoder_output,
                                             sequence_length=inp_mask,
                                             time_major=False)
            with tf.variable_scope("attention_style"):
                att_module_style = AttentionModule(256,
                                                   self.style_token,
                                                   time_major=False)

            ### Decoder [begin]
            att_cell = GRUCell(256)
            dec_cell = MultiRNNCell(
                [ResidualWrapper(GRUCell(256)) for _ in range(2)])
            # prepare output alpha TensorArray
            with tf.variable_scope('prepare_decode'):
                reduc = self.hyper_params.reduction_rate
                reduced_time_steps = tf.div(output_time_steps, reduc)
                init_att_cell_state = att_cell.zero_state(
                    batch_size, tf.float32)
                init_dec_cell_state = dec_cell.zero_state(
                    batch_size, tf.float32)
                init_state_tup = tuple(
                    [init_att_cell_state, init_dec_cell_state])
                init_output_ta = tf.TensorArray(size=reduced_time_steps,
                                                dtype=tf.float32)
                init_alpha_ta = tf.TensorArray(size=reduced_time_steps,
                                               dtype=tf.float32)
                init_weight_ta = tf.TensorArray(size=reduced_time_steps,
                                                dtype=tf.float32)
                init_weight_per_ta = tf.TensorArray(size=reduced_time_steps,
                                                    dtype=tf.float32)
                init_alpha_style_ta = tf.TensorArray(size=reduced_time_steps,
                                                     dtype=tf.float32)
                time_major_seq2seq_gtruth = tf.transpose(seq2seq_gtruth,
                                                         perm=(1, 0, 2))
                indic_array = tf.concat([
                    tf.zeros([
                        reduc, batch_size, self.hyper_params.seq2seq_dim
                    ]), time_major_seq2seq_gtruth
                ],
                                        axis=0)
                init_context = tf.zeros([batch_size, 256], dtype=tf.float32)
                init_context_style = tf.zeros([batch_size, 256],
                                              dtype=tf.float32)
                init_time = tf.constant(0, dtype=tf.int32)
            cond = lambda x, *_: tf.less(x, reduced_time_steps)

            def body(this_time, old_context, old_context_style, old_output_ta,
                     old_alpha_ta, old_alpha_style_ta, old_weight_ta,
                     old_weight_per_ta, old_state_tup):
                with tf.variable_scope('decoder_pre_net'):
                    dec_pre_ed_inp = indic_array[reduc * this_time + reduc - 1]
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 256, tf.nn.relu),
                                                       training=training)
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 128, tf.nn.relu),
                                                       training=training)
                with tf.variable_scope('attention_rnn'):
                    att_cell_inp = tf.concat([old_context, dec_pre_ed_inp],
                                             axis=-1)
                    att_cell_out, att_cell_state = att_cell(
                        att_cell_inp, old_state_tup[0])
                with tf.variable_scope('attention'):
                    query = att_cell_state[0]
                    context, alpha = att_module(query)
                    new_alpha_ta = old_alpha_ta.write(this_time, alpha)
                with tf.variable_scope("attention_style"):
                    query_style = att_cell_state[0]
                    context_style, alpha_style = att_module_style(query_style)
                    new_alpha_style_ta = old_alpha_style_ta.write(
                        this_time, alpha_style)
                with tf.variable_scope("weighting"):
                    # weight_dec_pre_ed_inp = tf.expand_dims(tf.layers.dense(dec_pre_ed_inp, 256, tf.nn.sigmoid), axis=1)
                    weight_input = tf.concat(
                        [static_encoder_output, dec_pre_ed_inp], axis=-1)
                    weighting = tf.layers.dense(weight_input, 128,
                                                tf.nn.sigmoid)
                    weighting = tf.layers.dense(weighting, 2, tf.nn.softmax)
                    # weighting = tf.nn.softmax(weighting)
                    weight_text, weight_style = tf.split(weighting, [1, 1], -1)
                    # weighting = tf.nn.softmax(weighting)
                    new_weight_ta = old_weight_ta.write(this_time, weight_text)
                with tf.variable_scope('decoder_rnn'):
                    weighting_context = weight_text * context + weight_style * context_style
                    weight_per = tf.reduce_mean(
                        tf.abs(weight_style * context_style) /
                        (tf.abs(weight_text * context) +
                         tf.abs(weight_style * context_style)))
                    new_weight_per_ta = old_weight_per_ta.write(
                        this_time, weight_per)
                    dec_input = tf.layers.dense(
                        tf.concat([att_cell_out, weighting_context], axis=-1),
                        256)
                    dec_cell_out, dec_cell_state = dec_cell(
                        dec_input, old_state_tup[1])
                    dense_out = tf.layers.dense(
                        dec_cell_out, self.hyper_params.seq2seq_dim * reduc)
                    new_output_ta = old_output_ta.write(this_time, dense_out)
                new_state_tup = tuple([att_cell_state, dec_cell_state])
                return tf.add(
                    this_time, 1
                ), context, context_style, new_output_ta, new_alpha_ta, new_alpha_style_ta, new_weight_ta, new_weight_per_ta, new_state_tup

            # run loop
            _, _, _, seq2seq_output_ta, alpha_ta, alpha_style_ta, weight_ta, weight_per_ta, *_ = tf.while_loop(
                cond,
                body, [
                    init_time, init_context, init_context_style,
                    init_output_ta, init_alpha_ta, init_alpha_style_ta,
                    init_weight_ta, init_weight_per_ta, init_state_tup
                ],
                parallel_iterations=unkonwn_parallel_iterations)

            with tf.variable_scope('reshape_decode'):
                seq2seq_output = tf.reshape(
                    seq2seq_output_ta.stack(),
                    shape=(reduced_time_steps, batch_size,
                           self.hyper_params.seq2seq_dim * reduc))
                seq2seq_output = tf.reshape(
                    tf.transpose(seq2seq_output, perm=(1, 0, 2)),
                    shape=(batch_size, output_time_steps,
                           self.hyper_params.seq2seq_dim))
                self.seq2seq_output = seq2seq_output

                alpha_output = tf.reshape(alpha_ta.stack(),
                                          shape=(reduced_time_steps,
                                                 batch_size, input_time_steps))
                alpha_output = tf.expand_dims(
                    tf.transpose(alpha_output, perm=(1, 0, 2)), -1)
                self.alpha_output = alpha_output

                alpha_output_style = tf.reshape(
                    alpha_style_ta.stack(),
                    shape=(reduced_time_steps, batch_size,
                           self.hyper_params.styles_kind))
                alpha_output_style = tf.expand_dims(
                    tf.transpose(alpha_output_style, perm=(1, 0, 2)),
                    -1)  # batch major
                self.alpha_output_style = alpha_output_style

                weight_ta = tf.reshape(weight_ta.stack(),
                                       shape=(reduced_time_steps, batch_size,
                                              1))
                weight_ta = tf.transpose(weight_ta, perm=(1, 0, 2))
                self.weight_ta = weight_ta

                weight_per_ta = tf.reshape(weight_per_ta.stack(),
                                           shape=(reduced_time_steps, 1))
                self.weight_per_ta = weight_per_ta
            ### Decoder [end]

            ### PostNet [begin]
            post_output = modules.cbhg(
                seq2seq_output,
                training=training,
                k=8,
                bank_filters=128,
                projection_filters=(256, self.hyper_params.seq2seq_dim),
                highway_layers=4,
                highway_units=128,
                bi_gru_units=128,
                sequence_length=None,
                name='decoder_cbhg',
                reuse=False)
            post_output = tf.layers.dense(post_output,
                                          self.hyper_params.post_dim,
                                          name='post_linear_transform')
            self.post_output = post_output
            ### PostNet [end]

        ### Loss
        with tf.variable_scope('loss'):
            self.seq2seq_loss = l1_loss(seq2seq_gtruth, seq2seq_output)
            self.post_loss = l1_loss(post_gtruth, post_output)
            self.loss = self.seq2seq_loss + self.post_loss
Ejemplo n.º 22
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor
    
    num_layers:层的数量,是anchors数量的3分之1;
    args:前3个是yolo_outputs预测值,后3个是y_true真值;
    anchor_mask:anchor box的索引数组,3个1组倒序排序,678对应13x13,345对应26x26,123对应52x52;
    即[[6, 7, 8], [3, 4, 5], [0, 1, 2]];
    input_shape:K.shape(yolo_outputs[0])[1:3],第1个预测矩阵yolo_outputs[0]的结构(shape)的第1~2位,
    即(?, 13, 13, 18)中的(13, 13)。再x32,就是YOLO网络的输入尺寸,
    即(416, 416),因为在网络中,含有5个步长为(2, 2)的卷积操作,降维32=5^2倍;
    grid_shapes:与input_shape类似,K.shape(yolo_outputs[l])[1:3],以列表的形式,选择3个尺寸的预测图维度,
    即[(13, 13), (26, 26), (52, 52)];
    m:第1个预测图的结构的第1位,即K.shape(yolo_outputs[0])[0],输入模型的图片总量,即批次数;
    mf:m的float类型,即K.cast(m, K.dtype(yolo_outputs[0]))
    loss:损失值为0;
    
    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    #anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4], [1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))  #修改之处1
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Ejemplo n.º 23
0
def yolo_loss(inputs, num_anchors):
    ignore_thresh = .5 # Порог вероятности обнаружения объекта
    num_layers = num_anchors // 3 # Подсчитываем количество анкоров на каждом уровне сетки
    y_pred = inputs[:num_layers] # Из входных данных выцепляем посчитанные моделью значения
    y_true = inputs[num_layers:] # Из входных данных выцепляем эталонные значения
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # Задаем маску анкоров для каждого уровня сеток
   
    
    # Получаем размерность входного изображения ( (13 х 13) * 32 = (416 х 416)) и приводим к типу элемента y_true[0]
    input_shape = K.cast(K.shape(y_pred[0])[1:3] * 32, K.dtype(y_true[0])) 
    
    # Получаем двумерный массив, соответствующий размерностям сеток ((13, 13), (26, 26), (52, 52))
    grid_shapes = [K.cast(K.shape(y_pred[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    
    loss = 0 # Значение ошибки
    
    # Считываем количество элементов
    m = K.shape(y_pred[0])[0] # Размер пакета
    batch_size = K.cast(m, K.dtype(y_pred[0])) # Преобразуем к типу y_pred[0]
    
    for l in range(num_layers): # Пробегаем по всем трем уровням сеток
        # Получаем маску для сетки l-го уровня по вероятности определения объекта (5-ый параметр в списке общих параметров). 
        # В массиве object_mask будут значения, которые соответствуют только вероятности обнаружения объекта
        object_mask = y_true[l][..., 4:5] # Вернется набор данных вида ([0][0][0][0]...[1]...[0])
        
        # Получаем аналогичную выборку для сетки l-го уровня с OHE (где записана позиция нашего класса)
        # В массиве true_class будут значения, которые соответствуют только OHE представлению класса для данного уровня анкоров
        true_class = y_true[l][..., 5:] # Вернется набор данных вида ([0][0][0][0]...[1]...[0])
        
        num_sub_anchors = len(anchors[anchor_mask[l]]) # Получаем количество анкоров для отдельного уровян сетки (3)
        
        # Решейпим анкоры отдельного уровня сетки и записываем в переменную anchors_tensor
        anchors_tensor = K.reshape(K.constant(anchors[anchor_mask[l]]), [1, 1, 1, num_sub_anchors, 2])
        
        # Создаем двумерный массив grid со значениями [[[0, 0] , [0, 1] , [0, 2] , ... , [0, k]], 
        #                                             [[1, 0] , [1, 1] , [1, 2] , ... , [1 ,k]],
        #                                             ...
        #                                             [[k, 0] , [k, 1] , [k, 2] , ... , [k, k]]]
        # где k - размерность сетки. Массив хранит индексы ячеек сетки
        grid_shape = K.shape(y_pred[l])[1:3] # Получаем ширину и высоту сетки
        grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),[1, grid_shape[1], 1, 1]) # Создаем вертикальную линию
        grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),[grid_shape[0], 1, 1, 1]) # Создаем горизонтальную линию
        grid = K.concatenate([grid_x, grid_y]) # Объединяем 
        grid = K.cast(grid, K.dtype(y_pred[l])) # Приводим к типу y_pred[l]
        
        # Решейпим y_pred[l]
        feats = K.reshape(y_pred[l], [-1, grid_shape[0], grid_shape[1], num_sub_anchors, num_classes + 5]) 
        
        # Считаем ошибку в определении координат центра объекта
        # Получаем координаты центра объекта из спредиктенного значения
        pred_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 
        # Производим обратные вычисления для оригинальных значений из y_true для координат центра объекта
        true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid  # Реальные координаты центра bounding_box
        box_loss_scale = 2 - y_true[l][...,2:3] * y_true[l][...,3:4] # чем больше бокс, тем меньше ошибка
        # binary_crossentropy для истинного значения и спредиктенного (obect_mask для подсчета только требуемого значения)
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(true_xy, feats[...,0:2], from_logits=True)

        # Считаем ошибку в определении координат ширины и высоты
        # Получаем значения ширины и высоты изображения из спредиктенного значения   
        pred_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) 
        # Производим обратные вычисления для оригинальных значений из y_true для ширины и высоты объекта
        true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) 
        # Оставляем значение высоты и ширины только у тех элементов, где object_mask = 1
        true_wh = K.switch(object_mask, true_wh, K.zeros_like(true_wh)) 
        # Считаем значение ошибки в определении высоты и ширины
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(true_wh-feats[...,2:4])
        
        # Объединяем значения в один  массив
        pred_box = K.concatenate([pred_xy, pred_wh]) 
        
        # Считаем ошибку в определении обнаружения какого-либо класса
        # Для этого вначале надо отсечь все найденные объекты, вероятность которых меньше установленного значения ignore_thresh
        
        # Определяем массив, который будет хранить данные о неподходящих значениях
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) 
        object_mask_bool = K.cast(object_mask, 'bool') # Приводим тип object_mask к типу 'bool'
        
        # Функция, определяющая данные, которые требуется игнорировать
        # Пробегаем по всем элементам пакета (b<m)
        # Получаем параметры реального bounding_box для текущей ячейки
        # Считаем IoU реального и спредиктенного
        # В зависимости от best_iou < ignore_thresh помечаем его как верно распознанный или неверено
        def loop_body(
                b,
                ignore_mask
                ):
            # в true_box запишутся первые 4 параметра (центр, высота и ширина объекта) того элемента, значение которого в object_mask_bool равно True
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) 
            # Подсчитываем iou для спредиктенной ограничивающей рамки (pred_box) и оригинальной (true_box)
            iou = calc_iou(pred_box[b], true_box) 
            # Находим лучшую ограничивающую рамку
            best_iou = K.max(iou, axis=-1) 
            # Записываем в ignore_mask true или false в зависимости от (best_iou < ignore_thresh)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) 
            return b+1, ignore_mask # Увеличиваем счетчик на единицу и возвращаем ignore_mask
        
        # Пробегаем в цикле по всем элементам в пределах значения m (m = batch size)
        _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) 
        ignore_mask = ignore_mask.stack() # Приводим ignore_mask к тензору
        ignore_mask = K.expand_dims(ignore_mask, -1) # Добавляем еще одну размерность в конце ignore_mask
                 
        # Считаем значение ошибки
        # 1 компонент - для значений, которые были верно спредиктены
        # 2 компонент - для значения, которые были неверно спредиктены
        confidence_loss = (
            object_mask * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) +
            (1-object_mask) * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) * ignore_mask
            )
        
        # Считаем ошибку в определении класса объекта
        class_loss = object_mask * K.binary_crossentropy(true_class, feats[...,5:], from_logits=True)
    
        # Считаем суммарную ошибку
        xy_loss = K.sum(xy_loss) / batch_size
        wh_loss = K.sum(wh_loss) / batch_size
        confidence_loss = K.sum(confidence_loss) / batch_size
        class_loss = K.sum(class_loss) / batch_size
        loss += xy_loss + wh_loss + confidence_loss + class_loss
                
    return loss # Возвращаем значение ошибки
Ejemplo n.º 24
0
 def create_output_ta(spec):
     return tf.TensorArray(spec.dtype,
                           size=sequence_length,
                           element_shape=(tf.TensorShape([
                               static_batch_size
                           ]).concatenate(spec.shape)))
    def __init__(self,
                 inp,
                 inp_mask,
                 inp_att,
                 decode_time_steps,
                 hyper_params=None,
                 name='Tacotron'):
        """
        Build the computational graph.
        :param inp:
        :param inp_mask:
        :param decode_time_steps:
        :param hyper_params:
        :param name:
        """
        super(Tacotron, self).__init__(name)
        self.hyper_params = HyperParams(
        ) if hyper_params is None else hyper_params

        with tf.variable_scope(name):
            self.global_step = tf.Variable(0,
                                           name='global_step',
                                           trainable=False)

            batch_size = tf.shape(inp)[0]
            input_time_steps = tf.shape(inp)[1]
            reduc = self.hyper_params.reduction_rate
            output_time_steps = decode_time_steps * reduc

            ### Encoder [begin]
            with tf.variable_scope('character_embedding'):
                embed_inp = EmbeddingLayer(self.hyper_params.embed_class,
                                           self.hyper_params.embed_dim)(inp)
            with tf.variable_scope("changeToVarible"):
                self.single_style_token = tf.get_variable(
                    'style_token', (1, self.hyper_params.styles_kind,
                                    self.hyper_params.style_dim),
                    dtype=tf.float32)
                self.style_token = tf.tile(self.single_style_token,
                                           (batch_size, 1, 1))
            with tf.variable_scope('encoder_pre_net'):
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    embed_inp, 256, tf.nn.relu),
                                               training=False)
                pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                    pre_ed_inp, 128, tf.nn.relu),
                                               training=False)
            encoder_output = modules.cbhg(pre_ed_inp,
                                          training=False,
                                          k=16,
                                          bank_filters=128,
                                          projection_filters=(128, 128),
                                          highway_layers=4,
                                          highway_units=128,
                                          bi_gru_units=128,
                                          sequence_length=inp_mask,
                                          name='encoder_cbhg',
                                          reuse=False)
            inp_att = tf.Print(inp_att, [inp_att],
                               message='inp_att',
                               summarize=10)
            sentence_style = tf.reduce_sum(tf.expand_dims(inp_att, axis=-1) *
                                           self.style_token,
                                           axis=1)
            sentence_style = tf.Print(sentence_style, [sentence_style],
                                      message='style',
                                      summarize=10)

            # with tf.variable_scope('post_text'):
            #     all_outputs, _ = tf.nn.dynamic_rnn(cell=GRUCell(256), inputs=encoder_output, sequence_length=inp_mask,
            #                                    dtype=encoder_output.dtype, parallel_iterations=unkonwn_parallel_iterations)
            #     all_outputs = tf.transpose(all_outputs, [1, 0, 2])
            #     static_encoder_output = all_outputs[-1]
            # ### Encoder [end]
            #
            # sentence_style_att = tf.layers.dense(static_encoder_output, 256, tf.nn.relu)
            # sentence_style_att = tf.layers.dense(sentence_style_att, 64, tf.nn.relu)
            # sentence_style = tf.layers.dense(sentence_style_att, 10, tf.nn.softmax)
            #
            # sentence_style = tf.cond(tf.equal(ctr_flag, 1), lambda: ctr_attention, lambda: sentence_style)
            # sentence_style = tf.Print(sentence_style, [sentence_style], message='att', summarize=10)
            # sentence_style = tf.reduce_sum(tf.expand_dims(sentence_style, axis=-1) * self.style_token, axis=1)
            # sentence_style = tf.Print(sentence_style, [sentence_style], message='style', summarize=10)
            # sentence_style = tf.cond(tf.equal(ctr_flag, 1),
            #                         lambda: tf.reduce_sum(tf.expand_dims(sentence_style, axis=-1) * self.style_token,
            #                                               axis=1),
            #                         lambda: sentence_style)

            ### Attention Module
            with tf.variable_scope('attention'):
                att_module = AttentionModule(256,
                                             encoder_output,
                                             sequence_length=inp_mask,
                                             time_major=False)

            ### Decoder [begin]
            att_cell = GRUCell(256)
            dec_cell = MultiRNNCell(
                [ResidualWrapper(GRUCell(256)) for _ in range(2)])
            # prepare output alpha TensorArray
            with tf.variable_scope('prepare_decode'):
                # prepare output alpha TensorArray
                reduced_time_steps = tf.div(output_time_steps, reduc)
                init_att_cell_state = att_cell.zero_state(
                    batch_size, tf.float32)
                init_dec_cell_state = dec_cell.zero_state(
                    batch_size, tf.float32)
                init_state_tup = tuple(
                    [init_att_cell_state, init_dec_cell_state])
                init_output_ta = tf.TensorArray(size=reduced_time_steps,
                                                dtype=tf.float32)
                init_alpha_ta = tf.TensorArray(size=reduced_time_steps,
                                               dtype=tf.float32)
                init_weight_per_ta = tf.TensorArray(size=reduced_time_steps,
                                                    dtype=tf.float32)
                go_array = tf.zeros(
                    [batch_size, self.hyper_params.seq2seq_dim],
                    dtype=tf.float32)
                init_context = tf.zeros([batch_size, 256], dtype=tf.float32)
                init_time = tf.constant(0, dtype=tf.int32)
            cond = lambda x, *_: tf.less(x, reduced_time_steps)

            def body(this_time, old_output_ta, old_alpha_ta, old_weight_per_ta,
                     old_state_tup, last_context, last_output):
                with tf.variable_scope('decoder_pre_net'):
                    dec_pre_ed_inp = last_output
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 256, tf.nn.relu),
                                                       training=False)
                    dec_pre_ed_inp = tf.layers.dropout(tf.layers.dense(
                        dec_pre_ed_inp, 128, tf.nn.relu),
                                                       training=False)
                with tf.variable_scope('attention_rnn'):
                    # dec_pre_ed_inp = tf.Print(dec_pre_ed_inp, [dec_pre_ed_inp[0]], message='dec', summarize=10)
                    att_cell_inp = tf.concat([last_context, dec_pre_ed_inp],
                                             axis=-1)
                    att_cell_out, att_cell_state = att_cell(
                        att_cell_inp, old_state_tup[0])
                with tf.variable_scope('attention'):
                    query = att_cell_state[0]
                    context, alpha = att_module(query)
                    new_alpha_ta = old_alpha_ta.write(this_time, alpha)
                with tf.variable_scope('decoder_rnn'):
                    weighting_context = context + sentence_style
                    weight_per = tf.reduce_mean(
                        tf.abs(sentence_style) /
                        (tf.abs(context) + tf.abs(sentence_style)))
                    new_weight_per_ta = old_weight_per_ta.write(
                        this_time, weight_per)
                    dec_input = tf.layers.dense(
                        tf.concat([att_cell_out, weighting_context], axis=-1),
                        256)
                    # dec_input = tf.layers.dense(tf.concat([att_cell_out, context], axis=-1), 256)
                    dec_cell_out, dec_cell_state = dec_cell(
                        dec_input, old_state_tup[1])
                    dense_out = tf.layers.dense(
                        dec_cell_out, self.hyper_params.seq2seq_dim * reduc)
                    new_output_ta = old_output_ta.write(this_time, dense_out)
                    new_output = dense_out[:, -self.hyper_params.seq2seq_dim:]
                new_state_tup = tuple([att_cell_state, dec_cell_state])
                return tf.add(this_time, 1), new_output_ta, new_alpha_ta, \
                       new_weight_per_ta, new_state_tup, context, new_output

            # run loop
            _, seq2seq_output_ta, alpha_ta, weight_per_ta, *_ = tf.while_loop(
                cond, body, [
                    init_time, init_output_ta, init_alpha_ta,
                    init_weight_per_ta, init_state_tup, init_context, go_array
                ])
            with tf.variable_scope('reshape_decode'):
                seq2seq_output = tf.reshape(
                    seq2seq_output_ta.stack(),
                    shape=(reduced_time_steps, batch_size,
                           self.hyper_params.seq2seq_dim * reduc))
                seq2seq_output = tf.reshape(
                    tf.transpose(seq2seq_output, perm=(1, 0, 2)),
                    shape=(batch_size, output_time_steps,
                           self.hyper_params.seq2seq_dim))
                self.seq2seq_output = seq2seq_output

                # alpha_output = tf.reshape(alpha_ta.stack(),
                #                           shape=(reduced_time_steps, batch_size, input_time_steps))
                # alpha_output = tf.expand_dims(tf.transpose(alpha_output, perm=(1, 0, 2)), -1)
                # self.alpha_output = alpha_output
                #
                # alpha_output_style = tf.reshape(alpha_style_ta.stack(),
                #                                 shape=(reduced_time_steps, batch_size, self.hyper_params.styles_kind))
                # alpha_output_style = tf.expand_dims(tf.transpose(alpha_output_style, perm=(1, 0, 2)), -1)  # batch major
                # self.alpha_output_style = alpha_output_style
                #
                # weight_ta = tf.reshape(weight_ta.stack(), shape=(reduced_time_steps, batch_size, 1))
                # weight_ta = tf.transpose(weight_ta, perm=(1, 0, 2))
                # self.weight_ta = weight_ta
                #
                # weight_per_ta = tf.reshape(weight_per_ta.stack(), shape=(reduced_time_steps, 1))
                # self.weight_per_ta = weight_per_ta
            ### Decoder [end]

            ### PostNet [begin]
            post_output = modules.cbhg(
                seq2seq_output,
                training=False,
                k=8,
                bank_filters=128,
                projection_filters=(256, self.hyper_params.seq2seq_dim),
                highway_layers=4,
                highway_units=128,
                bi_gru_units=128,
                sequence_length=None,
                name='decoder_cbhg',
                reuse=False)
            post_output = tf.layers.dense(post_output,
                                          self.hyper_params.post_dim,
                                          name='post_linear_transform')
            self.post_output = post_output
Ejemplo n.º 26
0
    def run(self, trajectory, policy_state=None):
        """Apply the policy to trajectory steps and store actions/info.

    If `self.time_major == True`, the tensors in `trajectory` are assumed to
    have shape `[time, batch, ...]`.  Otherwise they are assumed to
    have shape `[batch, time, ...]`.

    Args:
      trajectory: The `Trajectory` to run against.
        If the replay class was created with `time_major=True`, then
        the tensors in trajectory must be shaped `[time, batch, ...]`.
        Otherwise they must be shaped `[batch, time, ...]`.
      policy_state: (optional) A nest Tensor with initial step policy state.

    Returns:
      output_actions: A nest of the actions that the policy took.
        If the replay class was created with `time_major=True`, then
        the tensors here will be shaped `[time, batch, ...]`.  Otherwise
        they'll be shaped `[batch, time, ...]`.
      output_policy_info: A nest of the policy info that the policy emitted.
        If the replay class was created with `time_major=True`, then
        the tensors here will be shaped `[time, batch, ...]`.  Otherwise
        they'll be shaped `[batch, time, ...]`.
      policy_state: A nest Tensor with final step policy state.

    Raises:
      TypeError: If `policy_state` structure doesn't match
        `self.policy.policy_state_spec`, or `trajectory` structure doesn't
        match `self.policy.trajectory_spec`.
      ValueError: If `policy_state` doesn't match
        `self.policy.policy_state_spec`, or `trajectory` structure doesn't
        match `self.policy.trajectory_spec`.
      ValueError: If `trajectory` lacks two outer dims.
    """
        trajectory_spec = self._policy.trajectory_spec
        outer_dims = nest_utils.get_outer_shape(trajectory, trajectory_spec)

        if tf.compat.dimension_value(outer_dims.shape[0]) != 2:
            raise ValueError(
                "Expected two outer dimensions, but saw '{}' dimensions.\n"
                "Trajectory:\n{}.\nTrajectory spec from policy:\n{}.".format(
                    tf.compat.dimension_value(outer_dims.shape[0]), trajectory,
                    trajectory_spec))
        if self._time_major:
            sequence_length = outer_dims[0]
            batch_size = outer_dims[1]
            static_batch_size = tf.compat.dimension_value(
                trajectory.discount.shape[1])
        else:
            batch_size = outer_dims[0]
            sequence_length = outer_dims[1]
            static_batch_size = tf.compat.dimension_value(
                trajectory.discount.shape[0])

        if policy_state is None:
            policy_state = self._policy.get_initial_state(batch_size)
        else:
            nest_utils.assert_same_structure(policy_state,
                                             self._policy.policy_state_spec)

        if not self._time_major:
            # Make trajectory time-major.
            trajectory = tf.nest.map_structure(common.transpose_batch_time,
                                               trajectory)

        trajectory_tas = tf.nest.map_structure(
            lambda t: tf.TensorArray(t.dtype, size=sequence_length).unstack(t),
            trajectory)

        def create_output_ta(spec):
            return tf.TensorArray(spec.dtype,
                                  size=sequence_length,
                                  element_shape=(tf.TensorShape([
                                      static_batch_size
                                  ]).concatenate(spec.shape)))

        output_action_tas = tf.nest.map_structure(create_output_ta,
                                                  trajectory_spec.action)
        output_policy_info_tas = tf.nest.map_structure(
            create_output_ta, trajectory_spec.policy_info)

        read0 = lambda ta: ta.read(0)
        zeros_like0 = lambda t: tf.zeros_like(t[0])
        ones_like0 = lambda t: tf.ones_like(t[0])
        time_step = ts.TimeStep(
            step_type=read0(trajectory_tas.step_type),
            reward=tf.nest.map_structure(zeros_like0, trajectory.reward),
            discount=ones_like0(trajectory.discount),
            observation=tf.nest.map_structure(read0,
                                              trajectory_tas.observation))

        def process_step(time, time_step, policy_state, output_action_tas,
                         output_policy_info_tas):
            """Take an action on the given step, and update output TensorArrays.

      Args:
        time: Step time.  Describes which row to read from the trajectory
          TensorArrays and which location to write into in the output
          TensorArrays.
        time_step: Previous step's `TimeStep`.
        policy_state: Policy state tensor or nested structure of tensors.
        output_action_tas: Nest of `tf.TensorArray` containing new actions.
        output_policy_info_tas: Nest of `tf.TensorArray` containing new
          policy info.

      Returns:
        policy_state: The next policy state.
        next_output_action_tas: Updated `output_action_tas`.
        next_output_policy_info_tas: Updated `output_policy_info_tas`.
      """
            action_step = self._policy.action(time_step, policy_state)
            policy_state = action_step.state
            write_ta = lambda ta, t: ta.write(time - 1, t)
            next_output_action_tas = tf.nest.map_structure(
                write_ta, output_action_tas, action_step.action)
            next_output_policy_info_tas = tf.nest.map_structure(
                write_ta, output_policy_info_tas, action_step.info)

            return (action_step.state, next_output_action_tas,
                    next_output_policy_info_tas)

        def loop_body(time, time_step, policy_state, output_action_tas,
                      output_policy_info_tas):
            """Runs a step in environment.

      While loop will call multiple times.

      Args:
        time: Step time.
        time_step: Previous step's `TimeStep`.
        policy_state: Policy state tensor or nested structure of tensors.
        output_action_tas: Updated nest of `tf.TensorArray`, the new actions.
        output_policy_info_tas: Updated nest of `tf.TensorArray`, the new
          policy info.

      Returns:
        loop_vars for next iteration of tf.while_loop.
      """
            policy_state, next_output_action_tas, next_output_policy_info_tas = (
                process_step(time, time_step, policy_state, output_action_tas,
                             output_policy_info_tas))

            ta_read = lambda ta: ta.read(time)
            ta_read_prev = lambda ta: ta.read(time - 1)
            time_step = ts.TimeStep(
                step_type=ta_read(trajectory_tas.step_type),
                observation=tf.nest.map_structure(ta_read,
                                                  trajectory_tas.observation),
                reward=tf.nest.map_structure(ta_read_prev,
                                             trajectory_tas.reward),
                discount=ta_read_prev(trajectory_tas.discount))

            return (time + 1, time_step, policy_state, next_output_action_tas,
                    next_output_policy_info_tas)

        time = tf.constant(1)
        time, time_step, policy_state, output_action_tas, output_policy_info_tas = (
            tf.while_loop(cond=lambda time, *_: time < sequence_length,
                          body=loop_body,
                          loop_vars=[
                              time, time_step, policy_state, output_action_tas,
                              output_policy_info_tas
                          ],
                          back_prop=False,
                          name="trajectory_replay_loop"))

        # Run the last time step
        last_policy_state, output_action_tas, output_policy_info_tas = (
            process_step(time, time_step, policy_state, output_action_tas,
                         output_policy_info_tas))

        def stack_ta(ta):
            t = ta.stack()
            if not self._time_major:
                t = common.transpose_batch_time(t)
            return t

        stacked_output_actions = tf.nest.map_structure(stack_ta,
                                                       output_action_tas)
        stacked_output_policy_info = tf.nest.map_structure(
            stack_ta, output_policy_info_tas)

        return (stacked_output_actions, stacked_output_policy_info,
                last_policy_state)
def _decoder(cell,
             labels,
             encoder_output,
             sequence_length,
             initial_state,
             dtype=None,
             scope=None):  #inputs:shifted_tgt_inputs[?,?,50],
    #memory:encoder_output[?,?,512],
    #initial_state[?,512]: reduce_mean(encoder_output,axis=1)  #encoder的全局特征
    # Assume that the underlying cell is GRUCell-like
    batch = tf.shape(labels)[0]
    time_steps = tf.shape(labels)[1]
    dtype = dtype or labels.dtype
    output_size = cell.output_size  #256
    zero_output = tf.zeros([batch, output_size], dtype)  #[batch,256]
    zero_value = tf.zeros([batch, encoder_output.shape[-1].value],
                          dtype)  #  [batch,512]

    with tf.variable_scope(scope or "decoder", dtype=dtype):
        labels = tf.transpose(labels, [1, 0, 2])  #[字数,batch,50]
        encoder_output = tf.transpose(encoder_output,
                                      [1, 0, 2])  #[字数,batch,512]

        input_ta = tf.TensorArray(tf.float32,
                                  time_steps,
                                  tensor_array_name="input_array")
        memory_ta = tf.TensorArray(tf.float32,
                                   tf.shape(encoder_output)[0],
                                   tensor_array_name="memory_array")
        output_ta = tf.TensorArray(tf.float32,
                                   time_steps,
                                   tensor_array_name="output_array")

        input_ta = input_ta.unstack(
            labels
        )  #input_ta其实就是[0,1,2,....字的个数],unstack就是将inputs在axis=0维度进行拆分,拆成字的个数个[batch,50]
        memory_ta = memory_ta.unstack(encoder_output)  #拆成字的个数个[batch,512]
        initial_state = layers.nn.linear(initial_state,
                                         output_size,
                                         True,
                                         False,
                                         scope="s_transform")
        initial_state = tf.tanh(
            initial_state)  #经过线性变化加上偏置tanh,成为GRU的初始状态[batch,256]

        def loop_func(t, out_ta, state):
            inp_t = input_ta.read(t)
            mem_t = memory_ta.read(t)  #t时刻输入

            cell_input = [inp_t, mem_t]
            cell_output, new_state = cell(
                cell_input, state)  #cell_output, new_state改进的GRU输出的两个仍然是一样的东西
            cell_output = _copy_through(t, sequence_length["target"],
                                        zero_output, cell_output)
            new_state = _copy_through(t, sequence_length["target"], state,
                                      new_state)

            out_ta = out_ta.write(t, cell_output)

            return t + 1, out_ta, new_state

        time = tf.constant(0, dtype=tf.int32, name="time")
        loop_vars = (time, output_ta, initial_state)

        outputs = tf.while_loop(lambda t, *_: t < time_steps,
                                loop_func,
                                loop_vars,
                                parallel_iterations=32,
                                swap_memory=True)

        output_final_ta = outputs[1]
        final_output = output_final_ta.stack()  #[字数,batch_size,256]
        final_output.set_shape([None, None, output_size])
        final_output = tf.transpose(final_output,
                                    [1, 0, 2])  #[batch_size,字数,256]
        result = {"outputs": final_output, "initial_state": initial_state}

    return result
Ejemplo n.º 28
0
def _unstack_ta(inp):
    return tf.TensorArray(dtype=inp.dtype,
                          size=tf.shape(inp)[0],
                          element_shape=inp.get_shape()[1:]).unstack(inp)
Ejemplo n.º 29
0
    def maml_train_step(self,
                        x_speech_train,
                        x_image_train,
                        x_speech_test,
                        x_image_test,
                        num_steps,
                        meta_optimizer,
                        training=True,
                        stop_gradients=False,
                        clip_norm=None):

        meta_batch_size = tf.shape(x_speech_train)[0]

        with tf.GradientTape() as meta_tape:

            # watch vars in case of tf.Tensor's which are not tracked by default
            meta_tape.watch(self.speech_model.model.trainable_variables)
            meta_tape.watch(self.vision_model.model.trainable_variables)

            # use tf.TensorArray to accumulate results in dynamically unrolled loop
            inner_losses = tf.TensorArray(tf.float32, size=meta_batch_size)
            meta_losses = tf.TensorArray(tf.float32, size=meta_batch_size)

            # train and evaluate meta-objective on each task in the batch
            for batch_index in tf.range(meta_batch_size):
                x_s_1 = x_speech_train[batch_index]
                x_i_1 = x_image_train[batch_index]
                x_s_2 = x_speech_test[batch_index]
                x_i_2 = x_image_test[batch_index]

                # accumulate train and test losses per update for each task
                train_losses = tf.TensorArray(tf.float32, size=num_steps)
                test_losses = tf.TensorArray(tf.float32, size=num_steps)

                # initial "weight update" with current model weights
                speech_weight_updates = self.speech_model.model.trainable_weights
                vision_weight_updates = self.vision_model.model.trainable_weights

                # # create a model copy starting with the exact weight variables from
                # # the base model so we can update the model on the current task and
                # # then take gradients w.r.t. the base weights on the meta-objective
                # # NOTE: not using variable assign which has no grad ... solutions?
                # self.adapt_model.speech_model.model = self.clone_speech_network_func(
                #     self.speech_model.model)

                # self.adapt_model.vision_model.model = self.clone_vision_network_func(
                #     self.vision_model.model)

                for update_step in tf.range(num_steps):
                    # make sure model has previous updates (python state issue .. ?)
                    model_utils.update_model_weights(
                        self.adapt_model.speech_model.model,
                        speech_weight_updates, self.speech_weights_structure)

                    model_utils.update_model_weights(
                        self.adapt_model.vision_model.model,
                        vision_weight_updates, self.vision_weights_structure)

                    # update model on task training samples
                    inner_task_loss, y_s_1, y_i_1 = self.adapt_model.train_step(
                        x_s_1,
                        x_i_1,
                        optimizer=self.inner_optimizer,
                        training=training,
                        stop_gradients=stop_gradients,
                        clip_norm=clip_norm)

                    # compute transformations for `x_speech` and `x_image` and
                    # evaluate meta-objective of updated model on task test samples
                    y_s_2 = self.adapt_model.speech_model.predict(
                        x_s_2, training=training)
                    y_i_2 = self.adapt_model.vision_model.predict(
                        x_i_2, training=training)
                    meta_task_loss = self.loss(y_s_2, y_i_2)

                    train_losses = train_losses.write(update_step,
                                                      inner_task_loss)
                    test_losses = test_losses.write(update_step,
                                                    meta_task_loss)

                inner_losses = inner_losses.write(batch_index,
                                                  train_losses.stack())
                meta_losses = meta_losses.write(batch_index,
                                                test_losses.stack())

            # get stacked tensors from the array
            inner_losses = inner_losses.stack()
            meta_losses = meta_losses.stack()

            # average across task meta-objectives (at the final updates)
            meta_loss = tf.reduce_mean(tf.stack(meta_losses)[:, -1])

        # compute gradient of meta-objective and update MAML model
        network_s_variables = self.speech_model.model.trainable_variables
        network_i_variables = self.vision_model.model.trainable_variables

        meta_gradients_s, meta_gradients_i = meta_tape.gradient(
            meta_loss, [network_s_variables, network_i_variables])

        if "debug" in FLAGS and FLAGS.debug:
            for grad in meta_gradients_s + meta_gradients_i:
                if tf.math.count_nonzero(tf.math.is_nan(grad)) >= 1:
                    tf.print("NaN grad encountered:", grad)
                    tf.print("Loss:", meta_loss)

        # clip gradients by global norm if specified
        if clip_norm is not None:
            meta_gradients_s, global_norm = tf.clip_by_global_norm(
                meta_gradients_s, clip_norm)

            meta_gradients_i, global_norm = tf.clip_by_global_norm(
                meta_gradients_i, clip_norm)

            # debugging in eager mode
            if "debug" in FLAGS and FLAGS.debug and global_norm > clip_norm:
                tf.print("Clipping gradients with global norm", global_norm,
                         "to", "clip norm", clip_norm)

        if isinstance(meta_optimizer, tf.keras.optimizers.Optimizer):
            meta_optimizer.apply_gradients(
                zip(meta_gradients_s + meta_gradients_i,
                    network_s_variables + network_i_variables))

        elif callable(meta_optimizer):
            meta_optimizer(self.speech_model.model, self.vision_model.model,
                           meta_gradients_s, meta_gradients_i)

        else:
            raise ValueError(
                "Argument `meta_optimizer` should be a tf.keras optimizer or a "
                "callable that takes arguments "
                "(network_a, network_b, gradients_a, gradients_b).")

        return meta_loss, inner_losses, meta_losses
Ejemplo n.º 30
0
def process_loss(feature_map_i, y_true, anchors):
    grid_size = tf.shape(feature_map_i)[1:3]
    ratio = tf.cast(
        tf.constant([config.image_size, config.image_size]) / grid_size,
        tf.float32)
    batch_size = tf.cast(tf.shape(feature_map_i)[0], tf.float32)

    x_y_offset, pred_boxes, pred_conf, pred_prob = process_layer(
        feature_map_i, anchors)

    object_mask = y_true[..., 4:5]

    def loop_cond(idx, _):
        return tf.less(idx, tf.cast(batch_size, tf.int32))

    def loop_body(idx, mask):
        valid_true_boxes = tf.boolean_mask(
            y_true[idx, ..., 0:4], tf.cast(object_mask[idx, ..., 0], 'bool'))
        iou = box_iou(pred_boxes[idx], valid_true_boxes)
        best_iou = tf.reduce_max(iou, axis=-1)
        ignore_mask_tmp = tf.cast(best_iou < 0.5, tf.float32)
        mask = mask.write(idx, ignore_mask_tmp)
        return idx + 1, mask

    ignore_mask = tf.TensorArray(tf.float32, size=0, dynamic_size=True)

    _, ignore_mask = tf.while_loop(cond=loop_cond,
                                   body=loop_body,
                                   loop_vars=[0, ignore_mask])
    ignore_mask = ignore_mask.stack()
    ignore_mask = tf.expand_dims(ignore_mask, -1)

    pred_box_xy = pred_boxes[..., 0:2]
    pred_box_wh = pred_boxes[..., 2:4]

    true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset
    pred_xy = pred_box_xy / ratio[::-1] - x_y_offset

    true_tw_th = y_true[..., 2:4] / anchors
    pred_tw_th = pred_box_wh / anchors
    true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0),
                          x=tf.ones_like(true_tw_th),
                          y=true_tw_th)
    pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0),
                          x=tf.ones_like(pred_tw_th),
                          y=pred_tw_th)
    true_tw_th = tf.math.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9))
    pred_tw_th = tf.math.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9))

    box_loss_scale_1 = y_true[..., 2:3] / tf.cast(
        tf.constant([config.image_size, config.image_size])[1], tf.float32)
    box_loss_scale_2 = y_true[..., 3:4] / tf.cast(
        tf.constant([config.image_size, config.image_size])[0], tf.float32)

    box_loss_scale = 2. - box_loss_scale_1 * box_loss_scale_2

    mix_w = y_true[..., -1:]
    xy_loss = tf.reduce_sum(
        tf.square(true_xy - pred_xy) * object_mask * box_loss_scale *
        mix_w) / batch_size
    wh_loss = tf.reduce_sum(
        tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale *
        mix_w) / batch_size

    conf_pos_mask = object_mask
    conf_neg_mask = (1 - object_mask) * ignore_mask
    conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=object_mask, logits=pred_conf)
    conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=object_mask, logits=pred_conf)
    conf_loss = conf_loss_pos + conf_loss_neg

    alpha = 0.25
    gamma = 1.5
    focal_mask = alpha * tf.pow(tf.abs(object_mask - tf.sigmoid(pred_conf)),
                                gamma)
    conf_loss *= focal_mask

    conf_loss = tf.reduce_sum(conf_loss * mix_w) / batch_size

    delta = 0.01
    label_target = (1 - delta) * y_true[..., 5:-1] + delta * 1. / len(
        config.classes)

    class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(
        labels=label_target, logits=pred_prob) * mix_w
    class_loss = tf.reduce_sum(class_loss) / batch_size

    return xy_loss, wh_loss, conf_loss, class_loss