Beispiel #1
0
        def basic_lstm_model(inputs):
            print "Loading basic lstm model.."
            for i in range(self.config.rnn_numLayers):
                with tf.variable_scope('rnnLayer' + str(i)):
                    lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
                    outputs, _ = tf.nn.dynamic_rnn(
                        lstm_cell,
                        inputs,
                        self.ph_seqLen,  #(b_sz, tstp, h_sz)
                        dtype=tf.float32,
                        swap_memory=True,
                        scope='basic_lstm_model_layer-' + str(i))
                    inputs = outputs  #b_sz, tstp, h_sz
            mask = TfUtils.mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2)  #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(outputs,
                                                 self.ph_seqLen,
                                                 dim=1)  #b_sz, h_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.hidden_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs,
                                        self.config.hidden_size,
                                        bias=True,
                                        scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state,
                                    self.config.class_num,
                                    bias=True,
                                    scope='fnn_softmax')
            return logits
Beispiel #2
0
        def func_point_logits(dec_h, enc_e, enc_len):
            '''
            Args:
                dec_h : shape(b_sz, h_dec_sz)
                enc_e : shape(b_sz, tstp_enc, dec_emb_sz)
                enc_len : shape(b_sz,)
            '''

            dec_h_ex = tf.expand_dims(dec_h, dim=1)  # shape(b_sz, 1, h_dec_sz)
            dec_h_ex = tf.tile(
                dec_h_ex, [1, tstp_enc, 1])  # shape(b_sz, tstp_enc, h_dec_sz)
            linear_concat = tf.concat(
                2, [dec_h_ex, enc_e
                    ])  # shape(b_sz, tstp_enc, h_dec_sz+ dec_emb_sz)
            point_linear = TfUtils.last_dim_linear(  # shape(b_sz, tstp_enc, h_dec_sz)
                linear_concat,
                output_size=h_dec_sz,
                bias=False,
                scope='Ptr_W')
            point_v = TfUtils.last_dim_linear(  # shape(b_sz, tstp_enc, 1)
                tf.tanh(point_linear),
                output_size=1,
                bias=False,
                scope='Ptr_V')
            point_logits = tf.squeeze(point_v,
                                      squeeze_dims=[2
                                                    ])  # shape(b_sz, tstp_enc)
            mask = TfUtils.mkMask(enc_len,
                                  maxLen=tstp_enc)  # shape(b_sz, tstp_enc)
            point_logits = tf.select(mask, point_logits,
                                     tf.ones_like(point_logits) *
                                     small_num)  # shape(b_sz, tstp_enc)

            return point_logits
Beispiel #3
0
        def basic_cnn_model(inputs):
            in_channel = self.config.embed_size
            filter_sizes = self.config.filter_sizes
            out_channel = self.config.num_filters
            input = inputs
            for layer in range(self.config.cnn_numLayers):
                with tf.name_scope("conv-layer-" + str(layer)):
                    conv_outputs = []
                    for i, filter_size in enumerate(filter_sizes):
                        with tf.variable_scope("conv-maxpool-%d" %
                                               filter_size):
                            # Convolution Layer
                            filter_shape = [
                                filter_size, in_channel, out_channel
                            ]
                            W = tf.get_variable(name='W', shape=filter_shape)
                            b = tf.get_variable(name='b', shape=[out_channel])
                            conv = tf.nn.conv1d(  # size (b_sz, tstp, out_channel)
                                input,
                                W,
                                stride=1,
                                padding="SAME",
                                name="conv")
                            # Apply nonlinearity
                            h = tf.nn.relu(tf.nn.bias_add(conv, b),
                                           name="relu")
                            conv_outputs.append(h)
                    input = tf.concat(
                        axis=2, values=conv_outputs
                    )  #b_sz, tstp, out_channel*len(filter_sizes)
                    in_channel = out_channel * len(filter_sizes)
            # Maxpooling


#             mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp)
            mask = TfUtils.mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            pooled = tf.reduce_max(
                input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2),
                [1])  #(b_sz, out_channel*len(filter_sizes))
            #size (b_sz, out_channel*len(filter_sizes))
            inputs = tf.reshape(pooled,
                                shape=[b_sz, out_channel * len(filter_sizes)])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs,
                                        self.config.embed_size,
                                        bias=True,
                                        scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state,
                                    self.config.class_num,
                                    bias=True,
                                    scope='fnn_softmax')
            return logits
def average_sentence_as_vector(fetch_output, lengths):
    """
    fetch_output: shape=(batch_size, num_sentence, len_sentence, embed_size)
    lengths: shape=(batch_size, num_sentence)
    maxLen: scalar
    """
    mask = TfUtils.mkMask(
        lengths,
        tf.shape(fetch_output)[-2])  #(batch_size, num_sentence, len_sentence)
    avg = TfUtils.reduce_avg(fetch_output, tf.expand_dims(mask, -1),
                             tf.expand_dims(lengths, -1),
                             -2)  #(batch_size, num_sentence, embed_size)
    return avg
Beispiel #5
0
    def snt_encoder_cnn(self, seqInput, seqLen):
        '''
        CNN encoder

        Args:
            seqInput: encoder input, shape(b_sz, maxSeqLen, dim_x)
            seqLen:   length for each sequence in the batch

        Returns:
            output: shape(b_sz, dim_h)
        '''
        input_shape = tf.shape(seqInput)
        b_sz = input_shape[0]
        tstp = input_shape[1]

        in_channel = self.config.embed_size
        filter_sizes = self.config.filter_sizes
        out_channel = self.config.num_filters
        input = seqInput
        for layer in range(self.config.cnn_numLayers):
            with tf.variable_scope("conv-layer-" + str(layer)):
                conv_outputs = []
                for i, filter_size in enumerate(filter_sizes):
                    with tf.variable_scope("conv-maxpool-%d" % filter_size):
                        # Convolution Layer
                        filter_shape = [filter_size, in_channel, out_channel]
                        W = tf.get_variable(name='W', shape=filter_shape)
                        b = tf.get_variable(name='b', shape=[out_channel])
                        conv = tf.nn.conv1d(  # size (b_sz, tstp, out_channel)
                            input,
                            W,
                            stride=1,
                            padding="SAME",
                            name="conv")
                        # Apply nonlinearity
                        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                        conv_outputs.append(h)
                input = tf.concat(
                    axis=2, values=conv_outputs
                )  # b_sz, tstp, out_channel*len(filter_sizes)
                in_channel = out_channel * len(filter_sizes)

        mask = TfUtils.mkMask(seqLen, tstp)  # b_sz, tstp

        pooled = tf.reduce_mean(
            input * tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1])
        # size (b_sz, out_channel*len(filter_sizes))
        snt_enc = tf.reshape(pooled,
                             shape=[b_sz, out_channel * len(filter_sizes)])
        return snt_enc, input
        def basic_cbow_model(inputs):
            mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen, dim=1) #b_sz, emb_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.embed_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax')
            return logits
Beispiel #7
0
        def basic_cbow_model(inputs):
            mask = TfUtils.mkMask(self.ph_seqLen, tstp)  # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2)  #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(inputs, self.ph_seqLen,
                                                 dim=1)  #b_sz, emb_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.embed_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs,
                                        self.config.embed_size,
                                        bias=True,
                                        scope='fnn_layer-' + str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state,
                                    self.config.class_num,
                                    bias=True,
                                    scope='fnn_softmax')
            return logits
        def func_point_logits(dec_h, enc_ptr, enc_len):
            '''
            Args:
                dec_h : shape(b_sz, tstp_dec, h_dec_sz)
                enc_ptr : shape(b_sz, tstp_dec, tstp_enc, Ptr_sz)
                enc_len : shape(b_sz,)
            '''
            dec_h_ex = tf.expand_dims(
                dec_h, axis=2)  # shape(b_sz, tstp_dec, 1, h_dec_sz)
            dec_h_ex = tf.tile(dec_h_ex,
                               [1, 1, tstp_enc, 1
                                ])  # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz)
            linear_concat = tf.concat(axis=3, values=[
                dec_h_ex, enc_ptr
            ])  # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz+ Ptr_sz)
            point_linear = TfUtils.last_dim_linear(  # shape(b_sz, tstp_dec, tstp_enc, h_dec_sz)
                linear_concat,
                output_size=h_dec_sz,
                bias=False,
                scope='Ptr_W')
            point_v = TfUtils.last_dim_linear(  # shape(b_sz, tstp_dec, tstp_enc, 1)
                tf.tanh(point_linear),
                output_size=1,
                bias=False,
                scope='Ptr_V')

            point_logits = tf.squeeze(
                point_v, axis=[3])  # shape(b_sz, tstp_dec, tstp_enc)

            enc_len = tf.expand_dims(enc_len, 1)  # shape(b_sz, 1)
            enc_len = tf.tile(enc_len, [1, tstp_dec])  # shape(b_sz, tstp_dec)
            mask = TfUtils.mkMask(
                enc_len, maxLen=tstp_enc)  # shape(b_sz, tstp_dec, tstp_enc)
            point_logits = tf.where(
                mask,
                point_logits,  # shape(b_sz, tstp_dec, tstp_enc)
                tf.ones_like(point_logits) * small_num)

            return point_logits
        def basic_lstm_model(inputs):
            print "Loading basic lstm model.."
            for i in range(self.config.rnn_numLayers):
                with tf.variable_scope('rnnLayer'+str(i)):
                    lstm_cell = rnn_cell.BasicLSTMCell(self.config.hidden_size)
                    outputs, _ = tf.nn.dynamic_rnn(lstm_cell, inputs, self.ph_seqLen,  #(b_sz, tstp, h_sz)
                                                   dtype=tf.float32 ,swap_memory=True,
                                                   scope = 'basic_lstm_model_layer-'+str(i))
                    inputs = outputs #b_sz, tstp, h_sz
            mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp
            mask = tf.expand_dims(mask, axis=2) #b_sz, tstp, 1

            aggregate_state = TfUtils.reduce_avg(outputs, self.ph_seqLen, dim=1) #b_sz, h_sz
            inputs = aggregate_state
            inputs = tf.reshape(inputs, [-1, self.config.hidden_size])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs, self.config.hidden_size, bias=True, scope='fnn_layer-'+str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax')
            return logits
        def basic_cnn_model(inputs):
            in_channel = self.config.embed_size
            filter_sizes = self.config.filter_sizes
            out_channel = self.config.num_filters
            input = inputs
            for layer in range(self.config.cnn_numLayers):
                with tf.name_scope("conv-layer-"+ str(layer)):
                    conv_outputs = []
                    for i, filter_size in enumerate(filter_sizes):
                        with tf.variable_scope("conv-maxpool-%d" % filter_size):
                            # Convolution Layer
                            filter_shape = [filter_size, in_channel, out_channel]
                            W = tf.get_variable(name='W', shape=filter_shape)
                            b = tf.get_variable(name='b', shape=[out_channel])
                            conv = tf.nn.conv1d(                # size (b_sz, tstp, out_channel)
                              input,
                              W,
                              stride=1,
                              padding="SAME",
                              name="conv")
                            # Apply nonlinearity
                            h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                            conv_outputs.append(h)
                    input = tf.concat(axis=2, values=conv_outputs) #b_sz, tstp, out_channel*len(filter_sizes)
                    in_channel = out_channel * len(filter_sizes)
            # Maxpooling
#             mask = tf.sequence_mask(self.ph_seqLen, tstp, dtype=tf.float32) #(b_sz, tstp)
            mask = TfUtils.mkMask(self.ph_seqLen, tstp) # b_sz, tstp
            pooled = tf.reduce_max(input*tf.expand_dims(tf.cast(mask, dtype=tf.float32), 2), [1]) #(b_sz, out_channel*len(filter_sizes))
            #size (b_sz, out_channel*len(filter_sizes))
            inputs = tf.reshape(pooled, shape=[b_sz, out_channel*len(filter_sizes)])

            for i in range(self.config.fnn_numLayers):
                inputs = TfUtils.linear(inputs, self.config.embed_size, bias=True, scope='fnn_layer-'+str(i))
                inputs = tf.nn.tanh(inputs)
            aggregate_state = inputs
            logits = TfUtils.linear(aggregate_state, self.config.class_num, bias=True, scope='fnn_softmax')
            return logits