Beispiel #1
0
def bi_lstm_class(input_,
                  n_hidden=256,
                  n_steps=32,
                  n_input=54,
                  num_class=10,
                  name='class_bi_lstm'):
    with tf.variable_scope(name):

        input_x = tf.unstack(input_, n_steps, 1)
        lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)
        lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)

        x = []
        for i in range(n_steps - 1):
            x.append(tf.concat([input_x[i], input_x[i + 1] - input_x[i]], 1))

        try:
            outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                        [lstm_bw_cell],
                                                        x,
                                                        dtype=tf.float32)
        except Exception:
            outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                  [lstm_bw_cell],
                                                  x,
                                                  dtype=tf.float32)

        h = tf.concat(outputs, 1)

        h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True)
        h = tf.nn.relu(h)

        h, h_w, h_b = linear(h, num_class, 'd_h4_lin', with_w=True)

        return h
Beispiel #2
0
def bidirectional_lstm(input_,
                       cond,
                       n_hidden=256,
                       n_steps=32,
                       n_input=54,
                       name='bidirec_lstm'):
    with tf.variable_scope(name):

        print('new_lstm discrim')
        # weights = tf.get_variable('weights', [4096, 1],
        #                     initializer=tf.random_normal_initializer(stddev=0.02))

        # biases = tf.get_variable('biases', [1], initializer=tf.constant_initializer(0.0))

        # Prepare data shape to match `bidirectional_rnn` function requirements
        # Current data input shape: (batch_size, n_steps, n_input)
        # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

        # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
        input_x = tf.unstack(input_, n_steps, 1)
        # print(image.shape)s
        # print('-----------------------------------x shape: ', x[0].get_shape())

        # Calculate shifts
        x = []
        for i in range(n_steps - 1):
            x.append(
                tf.concat([input_x[i], input_x[i + 1] - input_x[i], cond], 1))

        # Define lstm cells with tensorflow
        # Forward direction cell
        lstm_fw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)
        # Backward direction cell
        lstm_bw_cell = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0)

        # Get lstm cell output
        try:
            outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                        [lstm_bw_cell],
                                                        x,
                                                        dtype=tf.float32)
        except Exception:  # Old TensorFlow version only returns outputs not states
            outputs = rnn.stack_bidirectional_rnn([lstm_fw_cell],
                                                  [lstm_bw_cell],
                                                  x,
                                                  dtype=tf.float32)

        h = tf.concat(outputs, 1)

        h, h_w, h_b = linear(h, 1024, 'd_h3_lin', with_w=True)
        h = tf.nn.relu(h)

        h, h_w, h_b = linear(h, 1, 'd_h4_lin', with_w=True)

        return h
    def __call__(self, inputs, name, training=False):
        """
        Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients.
        :param inputs: The inputs should be a list of shape [sequence_length, batch_size, 64]1
        :param name: Name to give to the tensorflow op
        :param training: Flag that indicates if this is a training or evaluation stage
        :return: Returns the LSTM outputs, as well as the forward and backward hidden states.
        """
        with tf.name_scope('bid-lstm' + name), tf.variable_scope(
                'bid-lstm', reuse=self.reuse):
            with tf.variable_scope("encoder"):
                fw_lstm_cells_encoder = [
                    rnn.LSTMCell(num_units=self.layer_sizes[i],
                                 activation=tf.nn.tanh)
                    for i in range(len(self.layer_sizes))
                ]
                bw_lstm_cells_encoder = [
                    rnn.LSTMCell(num_units=self.layer_sizes[i],
                                 activation=tf.nn.tanh)
                    for i in range(len(self.layer_sizes))
                ]

                outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn(
                    fw_lstm_cells_encoder,
                    bw_lstm_cells_encoder,
                    inputs,
                    dtype=tf.float32)
            print("out shape", tf.stack(outputs, axis=0).get_shape().as_list())
            with tf.variable_scope("decoder"):
                fw_lstm_cells_decoder = [
                    rnn.LSTMCell(num_units=self.layer_sizes[i],
                                 activation=tf.nn.tanh)
                    for i in range(len(self.layer_sizes))
                ]
                bw_lstm_cells_decoder = [
                    rnn.LSTMCell(num_units=self.layer_sizes[i],
                                 activation=tf.nn.tanh)
                    for i in range(len(self.layer_sizes))
                ]
                outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn(
                    fw_lstm_cells_decoder,
                    bw_lstm_cells_decoder,
                    outputs,
                    dtype=tf.float32)

        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='bid-lstm')
        return outputs, output_state_fw, output_state_bw
Beispiel #4
0
def BiRNN(X, weights, biases):
    x = tf.unstack(x, n_steps, 1)
    lstm_tw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)

    try:
        outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_bw_cell,
                                                    lstm_tw_cell,
                                                    x,
                                                    dtype=tf.float32)
    except:
        outputs = rnn.stack_bidirectional_rnn(lstm_tw_cell,
                                              lstm_bw_cell,
                                              x,
                                              dtypr=tf.float32)
def BiRNN(x, weights, bias):
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, num_input)

    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
    x = tf.unstack(x, timesteps, axis=1)

    # Define lstm cells with tensorflow
    # Forward direction cell
    lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)
    # Backward direction cell
    lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # Get lstm cell output
    try:
        outputs, _, _ = rnn.static_bidirectional_rnn(cell_fw=lstm_fw_cell,
                                                     cell_bw=lstm_bw_cell,
                                                     inputs=x,
                                                     dtype=tf.float32)
    except Exception:  # old tensorflow version will only returns outputs not states
        outputs = rnn.stack_bidirectional_rnn(lstm_fw_cell,
                                              lstm_bw_cell,
                                              x,
                                              dtype=tf.float32)

    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Beispiel #6
0
    def __call__(self, inputs, name, training=False):
        """
        Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients.
        :param inputs: The inputs should be a list of shape [timestep_size, batch_size, length]
        :param name: Name to give to the tensorflow op
        :param training: Flag that indicates if this is a training or evaluation stage
        :return: Returns the LSTM outputs
        """
#        print (inputs.shape, 'lstm inputs.shape')
        with tf.name_scope('bid-lstm' + name), tf.variable_scope('bid-lstm', reuse=self.reuse):
            fw_lstm_cells = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh)
                             for i in range(len(self.layer_sizes))]
            bw_lstm_cells = [rnn.LSTMCell(num_units=self.layer_sizes[i], activation=tf.nn.tanh)
                             for i in range(len(self.layer_sizes))]

            outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn(
                fw_lstm_cells,
                bw_lstm_cells,
                inputs,
                dtype=tf.float32
            )
#        print (outputs.shape, 'lstm outputs.shape')
        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bid-lstm')
        return outputs
Beispiel #7
0
    def __common_model(self, X, timesteps, num_hidden, layers, w1, b1):
        x = tf.unstack(X, timesteps, 1)
        fw_lstm_cells_encoder = [
            self.__lstm_cell(num_hidden) for i in range(layers)
        ]
        bw_lstm_cells_encoder = [
            self.__lstm_cell(num_hidden) for i in range(layers)
        ]
        outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn(
            fw_lstm_cells_encoder, bw_lstm_cells_encoder, x, dtype=tf.float32)
        #Checking size
        print("outputs len:", len(outputs))
        print("outputs[0].shape:", outputs[0].shape)
        outputs = tf.reshape(outputs, [timesteps, -1, num_hidden * 2])
        print("R_outputs[0].shape:", outputs.shape)

        # Sort, first batch dimension
        sorted_outputs = tf.transpose(outputs, (1, 0, 2))
        print("sorted_outputs.shape:", sorted_outputs)

        # list is reshaped in order to multiply with the matrix
        ######################################batch * timesteps, num_hidden * 2
        outputs = tf.reshape(sorted_outputs, [-1, num_hidden * 2])

        # Vector Z is calculated
        return tf.matmul(
            outputs,
            w1) + b1  # batch * timesteps, self.num_input  * self.d_vector
Beispiel #8
0
    def g_embedding_biLSTM(self,inputs,reuse=False,layers=False,num=None):

        if layers : 
            layer_sizes = [32 for i in range(num)]
            #layer_sizes = [ 32 , 32 , 32 ,32]
        else : 
            layer_sizes = [ 32 ]

        with tf.variable_scope('encoder',reuse=reuse) as scope:
            if reuse:
               scope.reuse_variables()

            #fw_lstm_cells_encoder = [rnn.LSTMCell(num_units=layer_sizes[i], activation=tf.nn.tanh)
            #                             for i in range(len(layer_sizes))]
            #bw_lstm_cells_encoder = [rnn.LSTMCell(num_units=layer_sizes[i], activation=tf.nn.tanh)
            #                             for i in range(len(layer_sizes))]
           

            fw_lstm_cells_encoder = [self._lstm_cell() for i in range(len(layer_sizes))]
            bw_lstm_cells_encoder = [self._lstm_cell() for i in range(len(layer_sizes))]
            outputs ,outputs_state_fw , outputs_state_bw = rnn.stack_bidirectional_rnn(fw_lstm_cells_encoder,bw_lstm_cells_encoder,inputs,dtype=tf.float32)

        self.g_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope='encoder')

        return outputs
Beispiel #9
0
    def __call__(self, inputs, name, training=False):
        # 只要定义类型的时候,实现__call__函数,这个类型就成为可调用的,相当于重载了括号运算符
        # lstm = BidirectionalLSTM():执行__init__
        # lstm():再次调用()里是(self, inputs, name, training=False):执行__call__
        """
        Runs the bidirectional LSTM, produces outputs and saves both forward and backward states as well as gradients.
        :param inputs: The inputs should be a list of shape [sequence_length, batch_size, 64]
        :param name: Name to give to the tensorflow op
        :param training: Flag that indicates if this is a training or evaluation stage
        :return: Returns the LSTM outputs, as well as the forward and backward hidden states.
        """
        with tf.name_scope('bid-lstm' + name), tf.variable_scope(
                'bid-lstm', reuse=self.reuse):
            fw_lstm_cells = [
                rnn.LSTMCell(num_units=self.layer_sizes[i],
                             activation=tf.nn.tanh)
                for i in range(len(self.layer_sizes))
            ]
            bw_lstm_cells = [
                rnn.LSTMCell(num_units=self.layer_sizes[i],
                             activation=tf.nn.tanh)
                for i in range(len(self.layer_sizes))
            ]

            outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn(
                fw_lstm_cells, bw_lstm_cells, inputs, dtype=tf.float32)

        self.reuse = True
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='bid-lstm')
        return outputs, output_state_fw, output_state_bw
Beispiel #10
0
    def __call__(self, inputs, name):
        """"
         只要定义类型的时候,实现__call__函数,这个类型就成为可调用的,相当于重载了括号运算符
         lstm = BidirectionalLSTM():执行__init__
         lstm():再次调用()里是(self, inputs, name, training=False):执行__call__
        """
        with tf.name_scope('bid_lstm' + name), tf.variable_scope(
                'bid_lstm', reuse=self.reuse):
            fw_lstm_cells = [
                rnn.LSTMCell(num_units=self.layer_sizes[i],
                             activation=tf.nn.tanh)
                for i in range(len(self.layer_sizes))
            ]
            bw_lstm_cells = [
                rnn.LSTMCell(num_units=self.layer_sizes[i],
                             activation=tf.nn.tanh)
                for i in range(len(self.layer_sizes))
            ]

            # 双向LSTM outputs是最后相加前向反向的输出  fw:前向lstm输出 bw反向lstm输出
            outputs, output_state_fw, output_state_bw = rnn.stack_bidirectional_rnn(
                fw_lstm_cells, bw_lstm_cells, inputs, dtype=tf.float32)

        self.reuse = True  # 共享lstm
        self.variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                           scope='bid-lstm')
        # print(outputs.shape, output_state_fw.shape, output_state_bw.shape)
        return outputs, output_state_fw, output_state_bw
Beispiel #11
0
        def simple_stack_bilstm(x,layers,num_hidden,timesteps,name):
            # Unstack to get a list of 'timesteps' tensors of shape (batch_size, num_input)
            x = tf.unstack(x, timesteps, 1)
            # Define lstm cells with tensorflow
            lstm_fw_cell = [rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, reuse=tf.AUTO_REUSE, name=name+"_f_"+str(i)) for i in range(layers)]
            lstm_bw_cell = [rnn.BasicLSTMCell(num_hidden, forget_bias=1.0, reuse=tf.AUTO_REUSE, name=name+"_b_"+str(i)) for i in range(layers)]

            outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)
            #print(type(outputs))
            return outputs
Beispiel #12
0
    def build_model(self, metadata_path=None, embedding_weights=None):
        self.embedding_weights, self.config = ops.embedding_layer(
            metadata_path[0], embedding_weights[0])
        self.pos_embedding_weights, self.config = ops.embedding_layer(
            metadata_path[1], embedding_weights[1], name='pos_embedding')
        self.embedded_input = tf.nn.embedding_lookup(self.embedding_weights,
                                                     self.input)
        self.embedded_pos = tf.nn.embedding_lookup(self.pos_embedding_weights,
                                                   self.pos)

        self.merged_input = tf.concat([self.embedded_input, self.embedded_pos],
                                      axis=-1)
        cells_fw, cells_bw = [], []
        for layer in range(self.args['rnn_layers']):
            cells_fw.append(
                tf.contrib.rnn.LSTMCell(self.args['hidden_units'],
                                        state_is_tuple=True))
            cells_bw.append(
                tf.contrib.rnn.LSTMCell(self.args['hidden_units'],
                                        state_is_tuple=True))

        self.rnn_output, _, _ = stack_bidirectional_rnn(
            cells_fw,
            cells_bw,
            tf.unstack(tf.transpose(self.merged_input, perm=[1, 0, 2])),
            dtype=tf.float32,
            sequence_length=self.input_lengths)

        weight, bias = self.weight_and_bias(2 * self.args['hidden_units'],
                                            self.args['n_classes'])
        self.rnn_output = tf.reshape(
            tf.transpose(tf.stack(self.rnn_output), perm=[1, 0, 2]),
            [-1, 2 * self.args['hidden_units']])
        self.rnn_output = dropout(self.rnn_output,
                                  keep_prob=self.args['dropout'])
        logits = tf.matmul(self.rnn_output, weight) + bias
        prediction = tf.nn.softmax(logits)
        self.prediction = tf.reshape(
            prediction,
            [-1, self.args.get("sequence_length"), self.args['n_classes']])
        open_targets = tf.reshape(self.output, [-1, self.args['n_classes']])
        with tf.name_scope("loss"):
            #self.loss = self.cost()
            self.loss = tf.losses.softmax_cross_entropy(open_targets, logits)

            if self.args["l2_reg_beta"] > 0.0:
                self.regularizer = ops.get_regularizer(
                    self.args["l2_reg_beta"])
                self.loss = tf.reduce_mean(self.loss + self.regularizer)
        with tf.name_scope('accuracy'):
            self.correct_prediction = tf.equal(tf.argmax(prediction, 1),
                                               tf.argmax(open_targets, 1))
            self.accuracy = tf.reduce_mean(
                tf.cast(self.correct_prediction, tf.float32))
Beispiel #13
0
def biRNN():
    inputs = tf.reshape(X, [-1], name='flattened_input')
    embedded_inputs = tf.nn.embedding_lookup(word_embeddings, inputs)

    embedded_inputs = tf.reshape(embedded_inputs, [-1, time_steps, 128])
    embedded_inputs = tf.unstack(embedded_inputs, time_steps, 1)

    lstm_layer_fw = rnn.BasicLSTMCell(num_units, forget_bias=1)
    lstm_layer_bw = rnn.BasicLSTMCell(num_units, forget_bias=1)
    conc_outputs,final_state_fw,final_state_bw=rnn.stack_bidirectional_rnn(lstm_layer_fw,lstm_layer_bw,embedded_inputs,\
                dtype='float32')

    return tf.concat(final_state_fw, final_state_bw)
Beispiel #14
0
def BiRNN(x, weight, biases):
    x = tf.transpose(x, [1, 0, 2])  #以什么为分割标准,就放在第一个
    x = tf.reshape(x, [-1, n_input])  #
    x = tf.split(x, n_steps)  # axis = 0
    # 这样分割后,x->[n_steps, batch_size, n_input]
    #针对stack_bidireactional_rnn的特别输入
    lstm_fw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_bw_cell,
                                                lstm_fw_cell,
                                                x,
                                                dtype=tf.float32)
    #output [n_steps, batch_size, 2 * hidden_size] 这里取最后一个时刻的状态作为
    return tf.matmul(outputs[-1], weight) + biases
def RNN(x, weights, biases):
    
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, n_steps, 1)

    # Define a lstm cell with tensorflow
    #lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    #lstm_cell = rnn.GRUCell(n_hidden)
    lstm_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
    #lstm_cell_bk = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
    lstm_cell_bk = rnn.LSTMBlockCell(n_hidden, forget_bias=1.0 )
    
    # make the deep rnn
    no_of_layers = 3 # layer number of drnn
    stacked_lstm = rnn.MultiRNNCell(
        [
        #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        for _ in range(no_of_layers)
        ]
        )
    
    stacked_lstm_bk = rnn.MultiRNNCell(
        [
        #rnn.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        rnn.LSTMCell(n_hidden, use_peepholes=True, forget_bias=1.0, state_is_tuple = True, reuse = tf.get_variable_scope().reuse)
        for _ in range(no_of_layers)
        ]
        )
    
    # providing the dropout for rnn
    #lstm_cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # for rnn
    stacked_lstm = rnn.DropoutWrapper(stacked_lstm, output_keep_prob=0.5) # for deep rnn
    
    # Get lstm cell output
    #outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # single layer rnn
    #outputs, states = rnn.static_rnn(stacked_lstm,, x, dtype=tf.float32) # deep rnn
    #outputs, states, states_bk = rnn.static_bidirectional_rnn(lstm_cell, lstm_cell_bk, x, dtype=tf.float32) # single layer dirnn
    #outputs, states, states_bk = rnn.static_bidirectional_rnn(stacked_lstm, stacked_lstm_bk, x, dtype=tf.float32) # deep dirnn
    outputs, states, states_bk = rnn.stack_bidirectional_rnn([rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], [rnn.GRUCell(n_hidden) for _ in range(no_of_layers)], x, dtype=tf.float32) # deep dirnn
    
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Beispiel #16
0
    def inference(self, inputs, seq_len=None, reuse=False):
        """
        Inputs
        ------
        inputs : float, shape=[batch_size, seq_length=100, PLAYERS=11, COLS=98, ROWS=46]
            real(from data) or fake(from G)
        seq_len : 
            temparily not used

        Return
        ------
        decision : bool
            real(from data) or fake(from G)
        """
        with tf.variable_scope('D', reuse=reuse) as scope:
            # unstack, axis=1 -> [batch, time, feature]
            print(inputs)
            inputs = tf.transpose(inputs, perm=[0, 1, 3, 4, 2])
            print(inputs)
            inputs = tf.unstack(inputs, num=self.seq_length, axis=1)
            blstm_input = []
            output_list = []
            for time_step in range(self.seq_length):
                with tf.variable_scope('conv') as scope:
                    if time_step > 0:
                        tf.get_variable_scope().reuse_variables()
                    filters_list = [32, 64, 128, 256]
                    next_input = inputs[time_step]
                    for i in range(len(filters_list)):
                        with tf.variable_scope('conv' + str(i)) as scope:
                            conv = layers.conv2d(
                                inputs=next_input,
                                num_outputs=filters_list[i],
                                kernel_size=[5, 5],
                                stride=2,
                                padding='SAME',
                                activation_fn=tf.nn.relu,
                                weights_initializer=layers.xavier_initializer(
                                    uniform=False),
                                weights_regularizer=None,
                                biases_initializer=tf.zeros_initializer(),
                                reuse=scope.reuse,
                                scope=scope)
                            next_input = conv
                    with tf.variable_scope('fc') as scope:
                        flat_input = layers.flatten(next_input)
                        fc = layers.fully_connected(
                            inputs=flat_input,
                            num_outputs=self.hidden_size,
                            activation_fn=tf.nn.relu,
                            weights_initializer=layers.xavier_initializer(
                                uniform=False),
                            biases_initializer=tf.zeros_initializer(),
                            reuse=scope.reuse,
                            scope=scope)
                        blstm_input.append(fc)
            with tf.variable_scope('stack_blstm') as scope:
                stack_blstm, _, _ = rnn.stack_bidirectional_rnn(
                    cells_fw=[
                        self.__lstm_cell() for _ in range(self.rnn_layers)
                    ],
                    cells_bw=[
                        self.__lstm_cell() for _ in range(self.rnn_layers)
                    ],
                    inputs=blstm_input,
                    dtype=tf.float32,
                    sequence_length=seq_len)
            with tf.variable_scope('output') as scope:
                for i, out_blstm in enumerate(stack_blstm):
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    with tf.variable_scope('fc') as scope:
                        fc = layers.fully_connected(
                            inputs=out_blstm,
                            num_outputs=1,
                            activation_fn=self.__leaky_relu,
                            weights_initializer=layers.xavier_initializer(
                                uniform=False),
                            biases_initializer=tf.zeros_initializer(),
                            reuse=scope.reuse,
                            scope=scope)
                        output_list.append(fc)
            # stack, axis=1 -> [batch, time, feature]
            decisions = tf.stack(output_list, axis=1)
            print('decisions', decisions)
            decision = tf.reduce_mean(decisions, axis=1)
            print('decision', decision)
            return decision
Beispiel #17
0
    def __init__(self, args, training=True):
        self.args = args
        if not training:
            args.batch_size = 1
        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        elif args.model == 'nas':
            cell_fn = rnn.NASCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        #list of num_layers forward cells. Each cell is an unrollable RNN of variable length.
        self.cells_fw = []
        for _ in range(args.num_layers):
            cell = cell_fn(
                args.rnn_size)  #rnn_size is the dimension of the hidden layer
            if training and (args.output_keep_prob < 1.0
                             or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(
                    cell,
                    input_keep_prob=args.input_keep_prob,
                    output_keep_prob=args.output_keep_prob)
            self.cells_fw.append(
                cell)  #cells is num_layers of cell stacked together

        #list of backward cells
        self.cells_bw = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            if training and (args.output_keep_prob < 1.0
                             or args.input_keep_prob < 1.0):
                cell = rnn.DropoutWrapper(
                    cell,
                    input_keep_prob=args.input_keep_prob,
                    output_keep_prob=args.output_keep_prob)
            self.cells_bw.append(cell)

        #placeholder for input data
        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])

        #define initial hideen states of each cell as all the default zero_state
        self.initial_states_fw = tuple([
            self.cells_fw[i].zero_state(args.batch_size, tf.float32)
            for i in range(args.num_layers)
        ])
        self.initial_states_bw = tuple([
            self.cells_bw[i].zero_state(args.batch_size, tf.float32)
            for i in range(args.num_layers)
        ])

        #We define an embedding. This is a look-up table for every item in the vocabulary, for a rnn_size-dimensional hidden vector.
        #This embedding will be learned over time as a part of back-propagation.
        embedding = tf.get_variable("embedding",
                                    [args.vocab_size, args.rnn_size])
        #we look up our examples in the embedding to expand the input to rnn_size dimensions.
        inputs = tf.nn.embedding_lookup(embedding, self.input_data)

        # dropout beta testing: double check which one should affect next line
        if training and args.output_keep_prob:
            inputs = tf.nn.dropout(inputs, args.output_keep_prob)

        #split the input items one by one. If char_level, split everything into letters. If word_level, split into words.
        inputs = tf.split(inputs, args.seq_length, 1)
        #inputs is a length seq_length list of batch_size x rnn_size tensors
        inputs = [tf.squeeze(input_, [1]) for input_ in inputs
                  ]  #get rid of the 1-dimension at axis 1, flatten

        #define bidirectional_rnn layer
        #outputs: batch_size x rnn_size and there are seq_length number of outputs. Outputs at every step!
        self.outputs, self.final_state_fw, self.final_state_bw = rnn.stack_bidirectional_rnn(
            self.cells_fw,
            self.cells_bw,
            inputs,
            self.initial_states_fw,
            self.initial_states_bw,
            tf.float32,
            scope="rnnlm")
    def __D(self, inputs, seq_len=None, reuse=False):
        """
        Inputs
        ------
        inputs : float, shape=[batch, length, 272]
            real(from data) or fake(from G)
        seq_len : 
            temparily not used

        Return
        ------
        decision : bool
            real(from data) or fake(from G)
        """
        with tf.variable_scope('D', reuse=reuse) as scope:
            # unstack, axis=1 -> [batch, time, feature]
            inputs = tf.unstack(inputs, num=self.seq_length, axis=1)
            blstm_input = []
            output_list = []
            with tf.variable_scope('fully_connect_input') as scope:
                for time_step in range(self.seq_length):
                    if time_step > 0:
                        tf.get_variable_scope().reuse_variables()
                    fully_connect_input = layers.fully_connected(
                        inputs=inputs[time_step],
                        num_outputs=self.hidden_size,
                        activation_fn=self.__leaky_relu,
                        weights_initializer=layers.xavier_initializer(
                            uniform=False),
                        biases_initializer=tf.constant_initializer(),
                        scope=scope)
                    self.__summarize('fully_connect_input', fully_connect_input, collections=[
                        'D'], postfix='Activation')
                    blstm_input.append(fully_connect_input)
            with tf.variable_scope('stack_bi_lstm') as scope:
                out_blstm_list, _, _ = rnn.stack_bidirectional_rnn(
                    cells_fw=[self.__lstm_cell()
                              for _ in range(self.rnn_layers)],
                    cells_bw=[self.__lstm_cell()
                              for _ in range(self.rnn_layers)],
                    inputs=blstm_input,
                    dtype=tf.float32,
                    sequence_length=seq_len,
                    scope=scope
                )
            with tf.variable_scope('fully_connect') as scope:
                for i, out_blstm in enumerate(out_blstm_list):
                    self.__summarize('out_blstm', out_blstm, collections=[
                        'D'], postfix='Activation')
                    if i > 0:
                        tf.get_variable_scope().reuse_variables()
                    fconnect = layers.fully_connected(
                        inputs=out_blstm,
                        num_outputs=1,
                        activation_fn=self.__leaky_relu,
                        weights_initializer=layers.xavier_initializer(
                            uniform=False),
                        biases_initializer=tf.zeros_initializer(),
                        scope=scope)
                    self.__summarize('fconnect', fconnect, collections=[
                        'D'], postfix='Activation')
                    output_list.append(fconnect)
            # print(output_list)
            # stack, axis=1 -> [batch, time, feature]
            decisions = tf.stack(output_list, axis=1)
            print('decisions', decisions)
            decision = tf.reduce_mean(decisions, axis=1)
            print('decision', decision)

            return decision
Beispiel #19
0
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)

#outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x1,
#                                              dtype=tf.float32)

#outputs, _, _ = rnn.stack_bidirectional_rnn([lstm_fw_cell],[lstm_bw_cell], x1,
#                                              dtype=tf.float32)

stacked_rnn = []
stacked_bw_rnn = []
for i in range(3):
    stacked_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden))
    stacked_bw_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden))

outputs, _, _ = rnn.stack_bidirectional_rnn(stacked_rnn,
                                            stacked_bw_rnn,
                                            x1,
                                            dtype=tf.float32)

pred = tf.contrib.layers.fully_connected(outputs[-1],
                                         n_classes,
                                         activation_fn=None)

# Define loss and optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
Beispiel #20
0
    def __init__(self, conf, pre_word2vec=None, activate_fn=tf.nn.tanh):
        pretrained_w2v = conf.pretrained_w2v
        max_position = conf.max_position
        pos_dim = conf.pos_dim
        num_relation = conf.num_relation
        len_sentence = conf.len_sentence
        num_hidden = conf.num_hidden
        batch_size = conf.batch_size
        reg_weight = conf.reg_weight
        network_type = conf.network_type

        # CNN-specific config settings
        num_filters = conf.num_filters
        filter_sizes = [2, 3, 4]
        word_embedding_dim = 50  #subject to change

        # Note that the first dimension of input_sentence and input_y are different because each row in input_y is per
        # triple, whereas each row in input_sentences corresponds to a single sentence and a triple consists of multiple
        # sentences. We use input_triple_index to align sentences with corresponding label
        # for example, label of input_sentences[input_triple_index[0]:input_triple_index[1]] is input[0]
        self.input_sentences = tf.placeholder(dtype=tf.int32,
                                              shape=[None, len_sentence],
                                              name='input_sentence')
        self.input_pos1 = tf.placeholder(dtype=tf.int32,
                                         shape=[None, len_sentence],
                                         name='input_position1')
        self.input_pos2 = tf.placeholder(dtype=tf.int32,
                                         shape=[None, len_sentence],
                                         name='input_position2')
        self.input_y = tf.placeholder(dtype=tf.int32,
                                      shape=[None, num_relation],
                                      name='input_y')
        self.input_triple_index = tf.placeholder(dtype=tf.int32,
                                                 shape=[None],
                                                 name='input_triple_index')

        num_sentences = self.input_triple_index[-1]

        with tf.device('/gpu:1'):
            if pretrained_w2v:
                self.word2vec = tf.get_variable(initializer=pre_word2vec,
                                                name="word_embedding")
            else:
                self.word2vec = tf.get_variable(
                    shape=[conf.voca_size, conf.word_embedding_dim],
                    name="word_embedding")

            self.pos2vec1 = tf.get_variable(shape=[max_position, pos_dim],
                                            name="pos2vec1")
            self.pos2vec2 = tf.get_variable(shape=[max_position, pos_dim],
                                            name="pos2vec2")

        # concatenate word embedding + position embeddings
        # input_forward.shape = [num_sentence, len_sentence, w2v_dim+2*conf.pos_dim]
        input_forward = tf.concat([
            tf.nn.embedding_lookup(self.word2vec, self.input_sentences),
            tf.nn.embedding_lookup(self.pos2vec1, self.input_pos1),
            tf.nn.embedding_lookup(self.pos2vec2, self.input_pos2)
        ], 2)

        if network_type == 'cnn':
            with tf.device('/gpu:1'):
                #input_forward = tf.unstack(input_forward, len_sentence, 1)
                input_forward = tf.expand_dims(
                    input_forward, -1)  #as conv2d expects 4 rank input
                pooled_outputs = []
                for i, filter_size in enumerate(filter_sizes):
                    with tf.name_scope("conv-maxpool-%s" % filter_size):
                        filter_shape = [
                            filter_size, word_embedding_dim, 1, num_filters
                        ]
                        W = tf.Variable(tf.truncated_normal(filter_shape,
                                                            stddev=0.1),
                                        name="W")
                        b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                        name="b")

                        #Convolution layer
                        conv = tf.nn.conv2d(input_forward,
                                            W,
                                            strides=[1, 1, 1, 1],
                                            padding="SAME",
                                            name="conv")

                        #Activation function (ReLu) layer
                        nl = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

                        #Max-pooling layer
                        pooled = tf.nn.max_pool(
                            nl,
                            ksize=[1, len_sentence - filter_size + 1, 1, 1],
                            strides=[1, 1, 1, 1],
                            padding="SAME",
                            name="pool")

                        pooled_outputs.append(pooled)
                # Combine all pooled features
                num_filters_total = num_filters * len(filter_sizes)
                self.pool = tf.concat(pooled_outputs, 3)

                # m dim : input forward (?)
                sentence_embedding = tf.reshape(self.pool,
                                                [-1, num_filters_total])
                sentence_embedding = tf.nn.dropout(sentence_embedding, 0.5)
                #num_hidden = 16 #(sentence_embedding n dimension)
                h_sentence = sentence_embedding
                num_hidden = num_filters_total
        elif network_type == 'rnn':
            with tf.variable_scope("RNN"):

                def create_rnn_cells(num_units):
                    """return list of rnn cells"""
                    cells = [
                        rnn.GRUCell(num_units, activation=activate_fn)
                        for _ in range(conf.num_layer)
                    ]
                    if conf.dropout and conf.is_train:
                        return [rnn.DropoutWrapper(cell) for cell in cells]
                    else:
                        return cells

                input_forward = tf.unstack(input_forward, len_sentence, 1)

                # construct rnn with high-level api
                if conf.bidirectional:
                    output_rnn, _, _ = rnn.stack_bidirectional_rnn(
                        create_rnn_cells(num_hidden),
                        create_rnn_cells(num_hidden),
                        input_forward,
                        dtype=tf.float32)
                    num_hidden = 2 * num_hidden  # dimension of concatenated fw-bw outputs
                    output_hidden = tf.reshape(
                        tf.concat(output_rnn, 1),
                        [num_sentences, len_sentence, num_hidden])
                else:
                    output_rnn, _ = rnn.static_rnn(
                        create_rnn_cells(num_hidden)[0],
                        input_forward,
                        dtype=tf.float32)
                    output_hidden = tf.reshape(
                        tf.concat(output_rnn, 1),
                        [num_sentences, len_sentence, num_hidden])

                # word-level attention layer, represent a sentence as a weighted sum of word vectors
                with tf.variable_scope("word-attn"):
                    if conf.word_attn:
                        word_attn = tf.get_variable('W', shape=[num_hidden, 1])
                        word_weight = tf.matmul(
                            tf.reshape(
                                output_hidden,
                                [num_sentences * len_sentence, num_hidden]),
                            word_attn)
                        word_weight = tf.reshape(word_weight,
                                                 [num_sentences, len_sentence])
                        sentence_embedding = tf.matmul(
                            tf.reshape(tf.nn.softmax(word_weight),
                                       [num_sentences, 1, len_sentence]),
                            output_hidden)
                        sentence_embedding = tf.reshape(
                            sentence_embedding, [num_sentences, num_hidden])
                    else:
                        sentence_embedding = tf.reduce_mean(output_hidden, 1)

            with tf.variable_scope("fc-hidden"):
                h_sentence = tf.layers.dense(sentence_embedding,
                                             num_hidden,
                                             activation=activate_fn,
                                             name='fc-hidden')

        # sentence-level attention layer, represent a triple as a weighted sum of sentences
        with tf.device('/gpu:1'), tf.variable_scope("sentence-attn"):
            attn_weight = tf.get_variable("W", shape=[num_hidden, 1])
            if conf.use_multiplier:
                multiplier = tf.get_variable("A", shape=[num_hidden])
            triple_embeddings = list()

            for i in range(batch_size):
                target_sentences = h_sentence[self.input_triple_index[i]:self.
                                              input_triple_index[i + 1]]

                if conf.sent_attn:
                    num_triple_sentence = self.input_triple_index[
                        i + 1] - self.input_triple_index[i]
                    if conf.use_multiplier:
                        tmp = tf.multiply(target_sentences, multiplier)
                    else:
                        tmp = target_sentences
                    sentence_weight = tf.reshape(
                        tf.nn.softmax(
                            tf.reshape(tf.matmul(tmp, attn_weight),
                                       [num_triple_sentence])),
                        [1, num_triple_sentence])
                    triple_embedding = tf.squeeze(
                        tf.matmul(sentence_weight,
                                  target_sentences))  # [num_hidden]
                else:
                    # use mean vector if sentence-level attention layer is not used
                    triple_embedding = tf.squeeze(
                        tf.reduce_mean(target_sentences, 0))
                triple_embeddings.append(triple_embedding)

            triple_embeddings = tf.reshape(triple_embeddings, [-1, num_hidden])
            triple_output = tf.layers.dense(triple_embeddings,
                                            num_relation,
                                            name='fc-output')

        # Optmization preparation step
        self.prob = tf.nn.softmax(triple_output)
        self.predictions = tf.argmax(self.prob, axis=1, name="predictions")
        self.total_loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(logits=triple_output,
                                                    labels=self.input_y),
            name="loss")
        self.accuracy = tf.reduce_mean(tf.cast(
            tf.equal(self.predictions, tf.argmax(self.input_y, 1)), "float"),
                                       name="accuracy")

        tf.summary.scalar("loss", self.total_loss)
        # regularization
        self.l2_loss = tf.contrib.layers.apply_regularization(
            regularizer=tf.contrib.layers.l2_regularizer(reg_weight),
            weights_list=tf.trainable_variables())
        self.final_loss = self.total_loss + self.l2_loss
        tf.summary.scalar("l2_loss", self.l2_loss)
        tf.summary.scalar("final_loss", self.final_loss)
Beispiel #21
0
n_input = 28                #MNIST 数据输入(img: 28 * 28)
n_steps = 28                #步骤序列
n_hidden = 128              #隐藏层个数
n_classes = 10              #MNIST 总类别 (0 ~ 9)

tf.reset_default_graph()

#定义占位符
x = tf.placeholder('float',[None,n_steps,n_input])
y = tf.placeholder('float',[None,n_classes])

x1 = tf.unstack(x,n_steps,1)
lstm_fw_cell = rnn.BasicLSTMCell(n_hidden,forget_bias = 1.0)
#反向cell
lstm_bw_cell = rnn.BasicLSTMCell(n_hidden,forget_bias = 1.0)
outputs,_ ,_ = rnn.stack_bidirectional_rnn([lstm_fw_cell],[lstm_bw_cell],x1,dtype = tf.float32)

print(outputs[0].shape,len(outputs))

pred = tf.contrib.layers.fully_connected(outputs[-1],n_classes,activation_fn = None)

#定义参数
learning_rate = 0.01
batch_size = 128
time_steps = 28
#损失函数 交叉熵 P107页,最后一行
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))#tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices = 1)) 
#等价于 
#cost = tf.nn.softmax_cross_entropy_with_logits(labels = y,logits=pred)

#使用梯度下降优化器
x = tf.placeholder('float', [None, n_steps*n_input])
y = tf.placeholder('float', [None, n_classes])

x1 = tf.reshape(x, [-1, 28, 28])
x1 = tf.unstack(x1, n_steps, 1)

stacked_fw_rnn = []
stacked_bw_rnn = []
for i in range(3):
    stacked_fw_rnn.append(BasicLSTMCell(n_hidden))
    stacked_bw_rnn.append(BasicLSTMCell(n_hidden))

fw_mcell = MultiRNNCell(stacked_fw_rnn)
bw_mcell = MultiRNNCell(stacked_bw_rnn)

outputs, _, _ = stack_bidirectional_rnn([fw_mcell], [bw_mcell], x1, dtype=tf.float32)

# output = tf.concat(outputs, 2)

pred = fully_connected(outputs[-1], n_classes, activation_fn=None)

cost = tf.reduce_mean(tf.reduce_sum(tf.square(pred - y)))

global_step = tf.Variable(0, trainable=False)
initial_learning_rate = 0.01

learning_rate = tf.train.exponential_decay(initial_learning_rate,
                                           global_step=global_step,
                                           decay_steps=3,
                                           decay_rate=0.9)
Beispiel #23
0
    def inference(self,
                  inputs,
                  conds,
                  seq_len=None,
                  reuse=False,
                  if_log_scalar_summary=False,
                  log_scope_name=''):
        """
        Inputs
        ------
        inputs : float, shape=[batch_size, seq_length=100, features=10]
            real(from data) or fake(from G)
        conds : float, shape=[batch_size, swq_length=100, features=13]

        Return
        ------
        score : float
            real(from data) or fake(from G)
        """
        concat_ = tf.concat([conds, inputs], axis=-1)
        inputs_ = tf.unstack(concat_, num=self.seq_length, axis=1)
        with tf.variable_scope('C_inference') as scope:
            output_list = []
            if reuse:
                tf.get_variable_scope().reuse_variables()

            with tf.variable_scope('stack_bi_lstm') as scope:
                out_blstm_list, _, _ = rnn.stack_bidirectional_rnn(
                    cells_fw=[
                        self.__lstm_cell() for _ in range(self.num_layers)
                    ],
                    cells_bw=[
                        self.__lstm_cell() for _ in range(self.num_layers)
                    ],
                    inputs=inputs_,
                    dtype=tf.float32,
                    sequence_length=seq_len,
                    scope=scope)
            for i, out_blstm in enumerate(out_blstm_list):
                if i > 0:
                    tf.get_variable_scope().reuse_variables()
                with tf.variable_scope('fully_connect') as scope:
                    fconnect = layers.fully_connected(
                        inputs=out_blstm,
                        num_outputs=1,
                        activation_fn=libs.leaky_relu,
                        weights_initializer=layers.xavier_initializer(),
                        biases_initializer=tf.zeros_initializer(),
                        scope=scope)
                output_list.append(fconnect)
            # stack, axis=1 -> [batch, time, feature]
            decisions = tf.stack(output_list, axis=1)
            final_ = tf.reduce_mean(decisions, axis=1)
            final_ = tf.reshape(final_, shape=[self.batch_size])

            with tf.name_scope('heuristic_penalty') as scope:
                # 0. prepare data
                ball_pos = tf.reshape(
                    conds[:, :, :2],
                    shape=[self.batch_size, self.seq_length, 1, 2])
                teamB_pos = tf.reshape(
                    inputs, shape=[self.batch_size, self.seq_length, 5, 2])
                basket_right_x = tf.constant(
                    self.data_factory.BASKET_RIGHT[0],
                    dtype=tf.float32,
                    shape=[self.batch_size, self.seq_length, 1, 1])
                basket_right_y = tf.constant(
                    self.data_factory.BASKET_RIGHT[1],
                    dtype=tf.float32,
                    shape=[self.batch_size, self.seq_length, 1, 1])
                basket_pos = tf.concat([basket_right_x, basket_right_y],
                                       axis=-1)

                vec_ball_2_teamB = ball_pos - teamB_pos  # [128,100,5,2]
                vec_ball_2_basket = ball_pos - basket_pos  # [128,100,1,2]
                b2teamB_dot_b2basket = tf.matmul(
                    vec_ball_2_teamB, vec_ball_2_basket,
                    transpose_b=True)  # [128,100,5,1]
                b2teamB_dot_b2basket = tf.reshape(
                    b2teamB_dot_b2basket,
                    shape=[self.batch_size, self.seq_length, 5])
                dist_ball_2_teamB = tf.norm(vec_ball_2_teamB,
                                            ord='euclidean',
                                            axis=-1)
                dist_ball_2_basket = tf.norm(vec_ball_2_basket,
                                             ord='euclidean',
                                             axis=-1)
                one_sub_cosine = 1 - b2teamB_dot_b2basket / \
                    (dist_ball_2_teamB * dist_ball_2_basket)
                heuristic_penalty_all = one_sub_cosine * dist_ball_2_teamB
                heuristic_penalty_min = tf.reduce_min(heuristic_penalty_all,
                                                      axis=-1)
                heuristic_penalty = tf.reduce_mean(heuristic_penalty_min)

                if self.if_trainable_lambda:
                    trainable_lambda = tf.get_variable(
                        'trainable_heuristic_penalty_lambda',
                        shape=[],
                        dtype=tf.float32,
                        initializer=tf.constant_initializer(value=1.0))
                else:
                    trainable_lambda = tf.constant(
                        self.heuristic_penalty_lambda)

            # logging
            if if_log_scalar_summary:
                with tf.name_scope(log_scope_name):
                    tf.summary.scalar('heuristic_penalty',
                                      heuristic_penalty,
                                      collections=['C'])
                    tf.summary.scalar('trainable_lambda',
                                      trainable_lambda,
                                      collections=['C'])

            return final_ - trainable_lambda * heuristic_penalty
    def __init__(self, conf, pre_word2vec=None, activate_fn=tf.nn.tanh):
        pretrained_w2v = conf.pretrained_w2v
        max_position = conf.max_position
        pos_dim = conf.pos_dim
        num_relation = conf.num_relation
        len_sentence = conf.len_sentence
        num_hidden = conf.num_hidden
        batch_size = conf.batch_size
        reg_weight = conf.reg_weight

        # Note that the first dimension of input_sentence and input_y are different because each row in input_y is per
        # triple, whereas each row in input_sentences corresponds to a single sentence and a triple consists of multiple
        # sentences. We use input_triple_index to align sentences with corresponding label
        # for example, label of input_sentences[input_triple_index[0]:input_triple_index[1]] is input[0]
        self.input_sentences = tf.placeholder(dtype=tf.int32,
                                              shape=[None, len_sentence],
                                              name='input_sentence')
        self.input_pos1 = tf.placeholder(dtype=tf.int32,
                                         shape=[None, len_sentence],
                                         name='input_position1')
        self.input_pos2 = tf.placeholder(dtype=tf.int32,
                                         shape=[None, len_sentence],
                                         name='input_position2')
        self.input_y = tf.placeholder(dtype=tf.int32,
                                      shape=[None, num_relation],
                                      name='input_y')
        self.input_triple_index = tf.placeholder(dtype=tf.int32,
                                                 shape=[None],
                                                 name='input_triple_index')

        num_sentences = self.input_triple_index[-1]

        if pretrained_w2v:
            self.word2vec = tf.get_variable(initializer=pre_word2vec,
                                            name="word_embedding")
        else:
            self.word2vec = tf.get_variable(
                shape=[conf.voca_size, conf.word_embedding_dim],
                name="word_embedding")

        self.pos2vec1 = tf.get_variable(shape=[max_position, pos_dim],
                                        name="pos2vec1")
        self.pos2vec2 = tf.get_variable(shape=[max_position, pos_dim],
                                        name="pos2vec2")

        # concatenate word embedding + position embeddings
        # input_forward.shape = [num_sentence, len_sentence, w2v_dim+2*conf.pos_dim]
        input_forward = tf.concat([
            tf.nn.embedding_lookup(self.word2vec, self.input_sentences),
            tf.nn.embedding_lookup(self.pos2vec1, self.input_pos1),
            tf.nn.embedding_lookup(self.pos2vec2, self.input_pos2)
        ], 2)

        with tf.variable_scope("RNN"):

            def create_rnn_cells(num_units):
                """return list of rnn cells"""
                cells = [
                    rnn.GRUCell(num_units, activation=activate_fn)
                    for _ in range(conf.num_layer)
                ]
                if conf.dropout and conf.is_train:
                    return [rnn.DropoutWrapper(cell) for cell in cells]
                else:
                    return cells

            input_forward = tf.unstack(input_forward, len_sentence, 1)

            # construct rnn with high-level api
            if conf.bidirectional:
                output_rnn, _, _ = rnn.stack_bidirectional_rnn(
                    create_rnn_cells(num_hidden),
                    create_rnn_cells(num_hidden),
                    input_forward,
                    dtype=tf.float32)
                num_hidden = 2 * num_hidden  # dimension of concatenated fw-bw outputs
                output_hidden = tf.reshape(tf.concat(
                    output_rnn, 1), [num_sentences, len_sentence, num_hidden])
            else:
                output_rnn, _ = rnn.static_rnn(create_rnn_cells(num_hidden)[0],
                                               input_forward,
                                               dtype=tf.float32)
                output_hidden = tf.reshape(tf.concat(
                    output_rnn, 1), [num_sentences, len_sentence, num_hidden])

            # word-level attention layer, represent a sentence as a weighted sum of word vectors
            with tf.variable_scope("word-attn"):
                if conf.word_attn:
                    word_attn = tf.get_variable('W', shape=[num_hidden, 1])
                    word_weight = tf.matmul(
                        tf.reshape(output_hidden,
                                   [num_sentences * len_sentence, num_hidden]),
                        word_attn)
                    word_weight = tf.reshape(word_weight,
                                             [num_sentences, len_sentence])
                    sentence_embedding = tf.matmul(
                        tf.reshape(tf.nn.softmax(word_weight),
                                   [num_sentences, 1, len_sentence]),
                        output_hidden)
                    sentence_embedding = tf.reshape(
                        sentence_embedding, [num_sentences, num_hidden])
                else:
                    sentence_embedding = tf.reduce_mean(output_hidden, 1)

        with tf.variable_scope("fc-hidden"):
            h_sentence = tf.layers.dense(sentence_embedding,
                                         num_hidden,
                                         activation=activate_fn,
                                         name='fc-hidden')

        # sentence-level attention layer, represent a triple as a weighted sum of sentences
        with tf.variable_scope("sentence-attn"):
            attn_weight = tf.get_variable("W", shape=[num_hidden, 1])
            multiplier = tf.get_variable("A", shape=[num_hidden])
            triple_embeddings = list()

            for i in range(batch_size):
                target_sentences = h_sentence[self.input_triple_index[i]:self.
                                              input_triple_index[i + 1]]

                if conf.sent_attn:
                    num_triple_sentence = self.input_triple_index[
                        i + 1] - self.input_triple_index[i]
                    tmp = tf.multiply(target_sentences, multiplier)
                    sentence_weight = tf.reshape(
                        tf.nn.softmax(
                            tf.reshape(tf.matmul(tmp, attn_weight),
                                       [num_triple_sentence])),
                        [1, num_triple_sentence])
                    triple_embedding = tf.squeeze(
                        tf.matmul(sentence_weight,
                                  target_sentences))  # [num_hidden]
                else:
                    # use mean vector if sentence-level attention layer is not used
                    triple_embedding = tf.squeeze(
                        tf.reduce_mean(target_sentences, 0))
                triple_embeddings.append(triple_embedding)

            triple_embeddings = tf.reshape(triple_embeddings, [-1, num_hidden])
            triple_output = tf.layers.dense(triple_embeddings,
                                            num_relation,
                                            name='fc-output')

        self.prob = tf.nn.softmax(triple_output)
        self.predictions = tf.argmax(self.prob, axis=1, name="predictions")
        self.total_loss = tf.reduce_sum(
            tf.nn.softmax_cross_entropy_with_logits(logits=triple_output,
                                                    labels=self.input_y),
            name="loss")
        self.accuracy = tf.reduce_mean(tf.cast(
            tf.equal(self.predictions, tf.argmax(self.input_y, 1)), "float"),
                                       name="accuracy")

        tf.summary.scalar("loss", self.total_loss)
        # regularization
        self.l2_loss = tf.contrib.layers.apply_regularization(
            regularizer=tf.contrib.layers.l2_regularizer(reg_weight),
            weights_list=tf.trainable_variables())
        self.final_loss = self.total_loss + self.l2_loss
        tf.summary.scalar("l2_loss", self.l2_loss)
        tf.summary.scalar("final_loss", self.final_loss)
Beispiel #25
0
#定义占位符
x = tf.placeholder('float',[None,n_steps,n_input])
y = tf.placeholder('float',[None,n_classes])

x1 = tf.unstack(x,n_steps,1)

stacked_rnn = []
stacked_bw_rnn = []
for i in range(3):
    stacked_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden))
    stacked_bw_rnn.append(tf.contrib.rnn.LSTMCell(n_hidden))
    pass
mcell = tf.contrib.rnn.MultiRNNCell(stacked_rnn)
mcell_bw = tf.contrib.rnn.MultiRNNCell(stacked_bw_rnn)

outputs,_ ,_ = rnn.stack_bidirectional_rnn([mcell],[mcell_bw],x1,dtype = tf.float32)

print(outputs[0].shape,len(outputs))

pred = tf.contrib.layers.fully_connected(outputs[-1],n_classes,activation_fn = None)

#定义参数
learning_rate = 0.01
batch_size = 128
time_steps = 28
#损失函数 交叉熵 P107页,最后一行
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred,labels=y))#tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices = 1)) 
#等价于 
#cost = tf.nn.softmax_cross_entropy_with_logits(labels = y,logits=pred)

#使用梯度下降优化器