Beispiel #1
0
        def func_point_logits(dec_h, enc_e, enc_len):
            '''
            Args:
                dec_h : shape(b_sz, h_dec_sz)
                enc_e : shape(b_sz, tstp_enc, dec_emb_sz)
                enc_len : shape(b_sz,)
            '''

            dec_h_ex = tf.expand_dims(dec_h, dim=1)  # shape(b_sz, 1, h_dec_sz)
            dec_h_ex = tf.tile(
                dec_h_ex, [1, tstp_enc, 1])  # shape(b_sz, tstp_enc, h_dec_sz)
            linear_concat = tf.concat(
                2, [dec_h_ex, enc_e
                    ])  # shape(b_sz, tstp_enc, h_dec_sz+ dec_emb_sz)
            point_linear = TfUtils.last_dim_linear(  # shape(b_sz, tstp_enc, h_dec_sz)
                linear_concat,
                output_size=h_dec_sz,
                bias=False,
                scope='Ptr_W')
            point_v = TfUtils.last_dim_linear(  # shape(b_sz, tstp_enc, 1)
                tf.tanh(point_linear),
                output_size=1,
                bias=False,
                scope='Ptr_V')
            point_logits = tf.squeeze(point_v,
                                      squeeze_dims=[2
                                                    ])  # shape(b_sz, tstp_enc)
            mask = TfUtils.mkMask(enc_len,
                                  maxLen=tstp_enc)  # shape(b_sz, tstp_enc)
            point_logits = tf.select(mask, point_logits,
                                     tf.ones_like(point_logits) *
                                     small_num)  # shape(b_sz, tstp_enc)

            return point_logits
Beispiel #2
0
        def basic_lstm_model(inputs):
            print "Loading basic lstm model.."
            for i in range(self.config.rnn_numLayers):
                with tf.variable_scope('rnnLayer' + str(i)):
                    lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
                    outputs, _ = tf.nn.dynamic_rnn(
                        lstm_cell,
                        inputs,
                        self.ph_seqLen,  #(b_sz, tstp, h_sz)
                        dtype=tf.float32,
                        swap_memory=True,
                        scope='basic_lstm_model_layer-' + str(i))
                    inputs = outputs  #b_sz, tstp, h_sz
            mask = TfUtils.mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2)  #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(outputs,
                                                 self.ph_seqLen,
                                                 dim=1)  #b_sz, h_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.hidden_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs,
                                        self.config.hidden_size,
                                        bias=True,
                                        scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state,
                                    self.config.class_num,
                                    bias=True,
                                    scope='fnn_softmax')
            return logits
Beispiel #3
0
        def basic_cnn_model(inputs):
            in_channel = self.config.embed_size
            filter_sizes = self.config.filter_sizes
            out_channel = self.config.num_filters
            input = inputs
            for layer in range(self.config.cnn_numLayers):
                with tf.name_scope("conv-layer-" + str(layer)):
                    conv_outputs = []
                    for i, filter_size in enumerate(filter_sizes):
                        with tf.variable_scope("conv-maxpool-%d" %
                                               filter_size):
                            # Convolution Layer
                            filter_shape = [
                                filter_size, in_channel, out_channel
                            ]
                            W = tf.get_variable(name='W', shape=filter_shape)
                            b = tf.get_variable(name='b', shape=[out_channel])
                            conv = tf.nn.conv1d(  # size (b_sz, tstp, out_channel)
                                input,
                                W,
                                stride=1,
                                padding="SAME",
                                name="conv")
                            # Apply nonlinearity
                            h = tf.nn.relu(tf.nn.bias_add(conv, b),
                                           name="relu")
                            conv_outputs.append(h)
                    input = tf.concat(
                        axis=2, values=conv_outputs
                    )  #b_sz, tstp, out_channel*len(filter_sizes)
                    in_channel = out_channel * len(filter_sizes)
            # Maxpooling


#             mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp)
            mask = TfUtils.mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            pooled = tf.reduce_max(
                input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2),
                [1])  #(b_sz, out_channel*len(filter_sizes))
            #size (b_sz, out_channel*len(filter_sizes))
            inputs = tf.reshape(pooled,
                                shape=[b_sz, out_channel * len(filter_sizes)])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs,
                                        self.config.embed_size,
                                        bias=True,
                                        scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state,
                                    self.config.class_num,
                                    bias=True,
                                    scope='fnn_softmax')
            return logits
def average_sentence_as_vector(fetch_output, lengths):
    """
    fetch_output: shape=(batch_size, num_sentence, len_sentence, embed_size)
    lengths: shape=(batch_size, num_sentence)
    maxLen: scalar
    """
    mask = TfUtils.mkMask(
        lengths,
        tf.shape(fetch_output)[-2])  #(batch_size, num_sentence, len_sentence)
    avg = TfUtils.reduce_avg(fetch_output, tf.expand_dims(mask, -1),
                             tf.expand_dims(lengths, -1),
                             -2)  #(batch_size, num_sentence, embed_size)
    return avg
        def basic_cbow_model(inputs):
            mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.embed_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax')
            return logits
Beispiel #6
0
        def lstm_sentence_rep(input):
            with tf.variable_scope('lstm_sentence_rep_scope') as scope:
                input = tf.reshape(input,
                                   shape=[b_sz * tstps_en, -1, emb_sz
                                          ])  #(b_sz*tstps_en, len_sen, emb_sz)
                length = tf.reshape(self.ph_input_encoder_sentence_len,
                                    shape=[-1])  #(b_sz*tstps_en)

                lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(h_sz)
                """tup(shape(b_sz*tstp_enc, len_sen, h_sz))"""
                rep_out, _ = tf.nn.bidirectional_dynamic_rnn(  # tup(shape(b_sz*tstp_enc, len_sen, h_sz))
                    lstm_cell,
                    lstm_cell,
                    input,
                    length,
                    dtype=tf.float32,
                    swap_memory=True,
                    time_major=False,
                    scope='sentence_encode')

                rep_out = tf.concat(2,
                                    rep_out)  #(b_sz*tstps_en, len_sen, h_sz*2)
                rep_out = TfUtils.reduce_avg(
                    rep_out, length, dim=1)  # shape(b_sz*tstps_en, h_sz*2)
                output = tf.reshape(rep_out,
                                    shape=[b_sz, tstps_en, 2 * h_sz
                                           ])  #(b_sz, tstps_en, h_sz*2)

            return output, None, None
Beispiel #7
0
 def get_dec_in():
     dec_in = TfUtils.batch_embed_lookup(encoder_inputs, order_index)    # shape(b_sz, tstp_dec, s_emb_sz)
     bos = get_bos(s_emb_sz)                                             # shape(b_sz, s_emb_sz)
     bos = tf.expand_dims(bos, 1)                                        # shape(b_sz, 1, s_smb_sz)
     dec_in = tf.concat(1, [bos, dec_in])                                # shape(b_sz, tstp_dec+1, s_emb_sz)
     dec_in = dec_in[:, :-1, :]                                          # shape(b_sz, tstp_dec, s_emb_sz)
     return dec_in
 def get_initial_state(hidden_sz):
     '''
     Args:
         hidden_sz: must be a python determined number
     '''
     avg_in_x = TfUtils.reduce_avg(
         encoder_inputs,  # shape(b_sz, s_emb_sz)
         enc_lengths,
         dim=1)
     state = TfUtils.linear(
         avg_in_x,
         hidden_sz,  # shape(b_sz, hidden_sz)
         bias=False,
         scope='initial_transformation')
     state = rnn_cell.LSTMStateTuple(state, tf.zeros_like(state))
     return state
Beispiel #9
0
    def fetch_input(self, embedding, seqIds, scope):
        '''

        Args:
            embedding: embedding matrix to lookup from
            seqIds: sequence ids

        Returns:
            output: shape(b_sz, maxSeqLen, fetch_h_sz)
        '''
        inputs = tf.nn.embedding_lookup(embedding,
                                        seqIds)  # shape(b_sz, tstp, emb_sz)
        if self.config.cnn_after_embed:
            with tf.variable_scope('cnn_after_embed_%s' % scope):
                filter_shape = [
                    3, self.config.embed_size, self.config.embed_size
                ]
                W = tf.get_variable(name='W', shape=filter_shape)
                b = tf.get_variable(name='b', shape=[self.config.embed_size])
                conv = tf.nn.conv1d(  # size (b_sz, tstp, out_channel)
                    inputs,
                    W,
                    stride=1,
                    padding="SAME",
                    name="conv")

            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
            inputs = h  # shape(b_sz, tstp, emb_sz)
        inputs = TfUtils.Dropout(inputs,
                                 self.config.dropout,
                                 train=self.ph_train)

        return inputs
Beispiel #10
0
    def add_loss_op(self, logits, sparse_label, dec_lengths):
        reg_loss = tf.add_n([tf.nn.l2_loss(v) for v in
                             tf.trainable_variables()
                             if v != self.embedding]) * self.config.reg

        valid_loss = TfUtils.seq_loss(logits, sparse_label, dec_lengths)
        train_loss = reg_loss + valid_loss
        return train_loss, valid_loss
Beispiel #11
0
        def add_loss_op(logits, title_label, content_label, tit_len,
                        content_len):
            '''
            Returns:
                loss
            '''
            title_logits, content_logits = logits

            loss1 = TfUtils.seq_loss(title_logits, title_label, tit_len)
            loss2 = TfUtils.seq_loss(content_logits, content_label,
                                     content_len)

            loss = tf.reduce_mean(loss1 + loss2)
            reg_loss = tf.add_n([
                tf.nn.l2_loss(v) for v in tf.trainable_variables()
                if v not in [self.embed_w]
            ])
            return loss + self.config.reg * reg_loss
Beispiel #12
0
        def Dense(output_for_title, output_for_content):
            '''
            Get the logits for final classification, note

            Returns:
                output_for_title: shape(b_sz, rep_sz)(b_sz, seq_title, class_num)
                output_for_content: shape(b_sz, seq_content, class_num)
            '''
            batch_size = tf.shape(output_for_title)[0]
            # batch_dim = self.config.embed_size + self.config.num_filters * len(self.config.filter_sizes) * 3
            batch_dim = 2 * self.config.embed_size + self.config.num_filters * len(
                self.config.filter_sizes) * 3
            print(batch_dim)

            loop_input_title = tf.reshape(output_for_title, [-1, batch_dim])
            loop_input_content = tf.reshape(output_for_content,
                                            [-1, batch_dim])
            if self.config.dense_hidden[-1] != self.config.class_num:
                raise ValueError(
                    'last hidden layer should be %d, but get %d' %
                    (self.config.class_num, self.config.dense_hidden[-1]))
            for i, hid_num in enumerate(self.config.dense_hidden):
                loop_input_title = TfUtils.linear(loop_input_title,
                                                  output_size=hid_num,
                                                  bias=True,
                                                  scope='dense-tit-layer-%d' %
                                                  i)
                if i < len(self.config.dense_hidden) - 1:
                    loop_input_title = tf.nn.relu(loop_input_title)

                loop_input_content = TfUtils.linear(
                    loop_input_content,
                    output_size=hid_num,
                    bias=True,
                    scope='dense-con-layer-%d' % i)
                if i < len(self.config.dense_hidden) - 1:
                    loop_input_content = tf.nn.relu(loop_input_content)

            logits = (tf.reshape(loop_input_title,
                                 [batch_size, -1, self.config.class_num]),
                      tf.reshape(loop_input_content,
                                 [batch_size, -1, self.config.class_num]))
            return logits
Beispiel #13
0
 def domain_layer(output, seq_len):
     W_classifier = tf.get_variable(
         shape=[2 * lstm_dim, 2],
         initializer=tf.truncated_normal_initializer(
             stddev=1.0 / math.sqrt(float(2))),
         name='W_classifier')
     bias = tf.Variable(tf.zeros([2], name='class_bias'))
     output_avg = TfUtils.reduce_avg(output, seq_len, 1)
     logits = tf.matmul(output_avg, W_classifier) + bias
     return logits
Beispiel #14
0
        def basic_cbow_model(inputs):
            mask = TfUtils.mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2)  #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen,
                                                 dim=1)  #b_sz, emb_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.embed_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs,
                                        self.config.embed_size,
                                        bias=True,
                                        scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state,
                                    self.config.class_num,
                                    bias=True,
                                    scope='fnn_softmax')
            return logits
Beispiel #15
0
        def func_point_logits(dec_h, enc_ptr, enc_len):
            '''
            Args:
                dec_h : shape(b_sz, tstp_dec, h_dec_sz)
                enc_ptr : shape(b_sz, tstp_dec, tstp_enc, Ptr_sz)
                enc_len : shape(b_sz,)
            '''
            dec_h_ex = tf.expand_dims(
                dec_h, axis=2)  # shape(b_sz, tstp_dec, 1, h_dec_sz)
            dec_h_ex = tf.tile(dec_h_ex,
                               [1, 1, tstp_enc, 1
                                ])  # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz)
            linear_concat = tf.concat(axis=3, values=[
                dec_h_ex, enc_ptr
            ])  # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz+ Ptr_sz)
            point_linear = TfUtils.last_dim_linear(  # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz)
                linear_concat,
                output_size=h_dec_sz,
                bias=False,
                scope='Ptr_W')
            point_v = TfUtils.last_dim_linear(  # shape(b_sz, tstp_dec, tstp_enc, 1)
                tf.tanh(point_linear),
                output_size=1,
                bias=False,
                scope='Ptr_V')

            point_logits = tf.squeeze(
                point_v, axis=[3])  # shape(b_sz, tstp_dec, tstp_enc)

            enc_len = tf.expand_dims(enc_len, 1)  # shape(b_sz, 1)
            enc_len = tf.tile(enc_len, [1, tstp_dec])  # shape(b_sz, tstp_dec)
            mask = TfUtils.mkMask(
                enc_len, maxLen=tstp_enc)  # shape(b_sz, tstp_dec, tstp_enc)
            point_logits = tf.where(
                mask,
                point_logits,  # shape(b_sz, tstp_dec, tstp_enc)
                tf.ones_like(point_logits) * small_num)

            return point_logits
        def basic_lstm_model(inputs):
            print "Loading basic lstm model.."
            for i in range(self.config.rnn_numLayers):
                with tf.variable_scope('rnnLayer'+str(i)):
                    lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
                    outputs, _ = tf.nn.dynamic_rnn(lstm_cell, inputs, self.ph_seqLen,  #(b_sz, tstp, h_sz)
                                                   dtype=tf.float32 ,swap_memory=True,
                                                   scope = 'basic_lstm_model_layer-'+str(i))
                    inputs = outputs #b_sz, tstp, h_sz
            mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.hidden_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-'+str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax')
            return logits
        def basic_cnn_model(inputs):
            in_channel = self.config.embed_size
            filter_sizes = self.config.filter_sizes
            out_channel = self.config.num_filters
            input = inputs
            for layer in range(self.config.cnn_numLayers):
                with tf.name_scope("conv-layer-"+ str(layer)):
                    conv_outputs = []
                    for i, filter_size in enumerate(filter_sizes):
                        with tf.variable_scope("conv-maxpool-%d" % filter_size):
                            # Convolution Layer
                            filter_shape = [filter_size, in_channel, out_channel]
                            W = tf.get_variable(name='W', shape=filter_shape)
                            b = tf.get_variable(name='b', shape=[out_channel])
                            conv = tf.nn.conv1d(                # size (b_sz, tstp, out_channel)
                              input,
                              W,
                              stride=1,
                              padding="SAME",
                              name="conv")
                            # Apply nonlinearity
                            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                            conv_outputs.append(h)
                    input = tf.concat(axis=2, values=conv_outputs) #b_sz, tstp, out_channel*len(filter_sizes)
                    in_channel = out_channel * len(filter_sizes)
            # Maxpooling
#             mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp)
            mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp
            pooled = tf.reduce_max(input*tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes))
            #size (b_sz, out_channel*len(filter_sizes))
            inputs = tf.reshape(pooled, shape=[b_sz, out_channel*len(filter_sizes)])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax')
            return logits
Beispiel #18
0
    def snt_encoder_cnn(self, seqInput, seqLen):
        '''
        CNN encoder

        Args:
            seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x)
            seqLen:   length for each sequence in the batch

        Returns:
            output: shape(b_sz, dim_h)
        '''
        input_shape = tf.shape(seqInput)
        b_sz = input_shape[0]
        tstp = input_shape[1]

        in_channel = self.config.embed_size
        filter_sizes = self.config.filter_sizes
        out_channel = self.config.num_filters
        input = seqInput
        for layer in range(self.config.cnn_numLayers):
            with tf.variable_scope("conv-layer-" + str(layer)):
                conv_outputs = []
                for i, filter_size in enumerate(filter_sizes):
                    with tf.variable_scope("conv-maxpool-%d" % filter_size):
                        # Convolution Layer
                        filter_shape = [filter_size, in_channel, out_channel]
                        W = tf.get_variable(name='W', shape=filter_shape)
                        b = tf.get_variable(name='b', shape=[out_channel])
                        conv = tf.nn.conv1d(  # size (b_sz, tstp, out_channel)
                            input,
                            W,
                            stride=1,
                            padding="SAME",
                            name="conv")
                        # Apply nonlinearity
                        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                        conv_outputs.append(h)
                input = tf.concat(
                    axis=2, values=conv_outputs
                )  # b_sz, tstp, out_channel*len(filter_sizes)
                in_channel = out_channel * len(filter_sizes)

        mask = TfUtils.mkMask(seqLen, tstp)  # b_sz, tstp

        pooled = tf.reduce_mean(
            input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1])
        # size (b_sz, out_channel*len(filter_sizes))
        snt_enc = tf.reshape(pooled,
                             shape=[b_sz, out_channel * len(filter_sizes)])
        return snt_enc, input
Beispiel #19
0
        def loop_fn(time, cell_output, cell_state, hit_mask):
            """
            Args:
                cell_output: shape(b_sz, h_dec_sz) ==> d
                cell_state: tup(shape(b_sz, h_dec_sz))
                pointer_logits_ta: pointer logits tensorArray
                hit_mask: shape(b_sz, tstp_enc)
            """

            if cell_output is None:  # time == 0
                next_cell_state = init_state
                next_input = bos  # shape(b_sz, dec_emb_sz)
                next_idx = tf.zeros(shape=[b_sz],
                                    dtype=tf.int32)  # shape(b_sz, tstp_enc)
                elements_finished = tf.zeros(shape=[b_sz],
                                             dtype=tf.bool,
                                             name='elem_finished')
                next_hit_mask = tf.zeros(shape=[b_sz, tstp_enc],
                                         dtype=tf.bool,
                                         name='hit_mask')
            else:

                next_cell_state = cell_state

                encoder_e = enc(
                    cell_output, encoder_inputs,
                    enc_lengths)  # shape(b_sz, tstp_enc, dec_emb_sz)
                next_idx = func_point_idx(cell_output, encoder_e, enc_lengths,
                                          hit_mask)  # shape(b_sz,)

                cur_hit_mask = tf.one_hot(
                    next_idx,
                    on_value=True,  # shape(b_sz, tstp_enc)
                    off_value=False,
                    depth=tstp_enc,
                    dtype=tf.bool)
                next_hit_mask = tf.logical_or(
                    hit_mask,
                    cur_hit_mask,  # shape(b_sz, tstp_enc)
                    name='next_hit_mask')

                next_input = TfUtils.batch_embed_lookup(
                    encoder_inputs, next_idx)  # shape(b_sz, s_emb_sz)

                elements_finished = (time >= dec_lengths)  # shape(b_sz,)

            return (elements_finished, next_input, next_cell_state,
                    next_hit_mask, next_idx)
Beispiel #20
0
    def snt_encoder_cbow(self, seqInput, seqLen):
        '''
        Take the average word representation as sentence representation

        Args:
            seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x)
            seqLen:   length for each sequence in the batch

        Returns:
            output: shape(b_sz, dim_h)
        '''

        aggregate_state = TfUtils.reduce_avg(seqInput, seqLen,
                                             dim=1)  # b_sz, emb_sz

        return aggregate_state
Beispiel #21
0
    def snt_encoder_lstm_avg(self, seqInput, seqLen):
        '''
        Take the average of output as sentence representation

        Args:
            seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x)
            seqLen:   length for each sequence in the batch

        Returns:
            output: shape(b_sz, dim_h)
        '''
        lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
        output, states = tf.nn.dynamic_rnn(cell=lstm_cell,
                                           inputs=seqInput,
                                           sequence_length=seqLen,
                                           dtype=tf.float32,
                                           swap_memory=True,
                                           scope='snt_enc')
        snt_enc = TfUtils.reduce_avg(output, lengths=seqLen, dim=1)
        return snt_enc
Beispiel #22
0
        def enc(dec_h, in_x, lengths, fake_call=False):
            '''
            Args:
                inputs: shape(b_sz, tstp_enc, enc_emb_sz)

            '''
            def func_f(in_x, in_h, in_h_hat, fake_call=False):
                if fake_call:
                    return s_emb_sz + h_enc_sz * 4

                in_x_sz = int(in_x.get_shape()[-1])
                in_h_sz = int(in_h.get_shape()[-1])
                if not in_x_sz:
                    assert ValueError('last dimension of the first' +
                                      ' arg should be known, while got %s' %
                                      (str(type(in_x_sz))))
                if not in_h_sz:
                    assert ValueError('last dimension of the second' +
                                      ' arg should be known, while got %s' %
                                      (str(type(in_h_sz))))
                res = tf.concat(2, [in_x, in_h, in_h_hat])
                return res

            if fake_call:
                return func_f(None, None, None, fake_call=True)
            inputs = func_enc_input(dec_h, in_x)

            lstm_out, _ = tf.nn.bidirectional_dynamic_rnn(cell_enc,
                                                          cell_enc,
                                                          inputs,
                                                          lengths,
                                                          swap_memory=True,
                                                          dtype=tf.float32,
                                                          scope='sent_encoder')
            enc_out = tf.concat(2,
                                lstm_out)  # shape(b_sz, tstp_enc, h_enc_sz*2)
            enc_out = tf.reshape(enc_out, [b_sz, tstp_enc, h_enc_sz * 2])

            enc_out_hat = TfUtils.self_attn(enc_out, lengths)
            res = func_f(in_x, enc_out, enc_out_hat)
            return res  # shape(b_sz, tstp_enc, dec_emb_sz)
Beispiel #23
0
 def attend(enc_h, enc_len):
     '''
     Args:
         enc_h: shape(b_sz, tstp_dec, tstp_enc, h_enc_sz*2)
         enc_len: shape(b_sz)
     '''
     enc_len = tf.expand_dims(enc_len, 1)  # shape(b_sz, 1)
     attn_enc_len = tf.tile(enc_len, [1, tstp_dec])
     attn_enc_len = tf.reshape(attn_enc_len, [b_sz * tstp_dec])
     attn_enc_h = tf.reshape(
         enc_h,  # shape(b_sz*tstp_dec, tstp_enc, h_enc_sz*2)
         [b_sz * tstp_dec, tstp_enc,
          np.int(enc_h.get_shape()[-1])])
     attn_out = TfUtils.self_attn(  # shape(b_sz*tstp_dec, tstp_enc, h_enc_sz*2)
         attn_enc_h, attn_enc_len)
     h_hat = tf.reshape(
         attn_out,  # shape(b_sz, tstp_dec, tstp_enc, h_enc_sz*2)
         [
             b_sz, tstp_dec, tstp_enc,
             np.int(attn_out.get_shape()[-1])
         ])
     return h_hat