Ejemplo n.º 1
0
def conv_block(input_tensor, kernel, filters, name, strides=(2, 2)):
    """ Function to create block of ResNet network which include
    three convolution layers and one skip-connection layer.

    Args:
        input_tensor: input tensorflow layer
        kernel: tuple of kernel size in convolution layer
        filters: list of nums filters in convolution layers
        name: name of block
        strides: typle of strides in convolution layer

    Output:
        x: Block output layer """
    filters1, filters2, filters3 = filters
    x = tf.layers.conv2d(input_tensor, filters1, (1, 1), strides, name='convfir' + name, activation=tf.nn.relu,\
                         kernel_initializer=xavier())

    x = tf.layers.conv2d(x, filters2, kernel, name='convsec' + name, activation=tf.nn.relu, padding='SAME',\
                         kernel_initializer=xavier())

    x = tf.layers.conv2d(x, filters3, (1, 1), name='convthr' + name,\
                         kernel_initializer=xavier())

    shortcut = tf.layers.conv2d(input_tensor, filters3, (1, 1), strides, name='short' + name, \
                         kernel_initializer=xavier())
    x = tf.concat([x, shortcut], axis=1)
    x = tf.nn.relu(x)
    return x
Ejemplo n.º 2
0
    def __init__(self, args, wrd_emb):
        self.max_doc_len = args['max_doc_len']
        self.max_sen_len = args['max_sen_len']
        self.cls_cnt = args['cls_cnt']
        self.embedding = args['embedding']
        self.emb_dim = args['emb_dim']
        self.hidden_size = self.emb_dim
        self.prd_cnt = args['prd_cnt']
        self.l2_rate = args['l2_rate']
        self.debug = args['debug']

        self.best_dev_acc = .0
        self.best_test_acc = .0
        self.best_test_rmse = .0

        # initializers for parameters
        self.w_init = xavier()
        self.b_init = tf.initializers.zeros()
        self.e_init = xavier()

        # embeddings in the model
        self.wrd_emb = wrd_emb
        self.prd_emb = var('prd_emb', [self.prd_cnt, self.emb_dim],
                           self.e_init)
        self.embeddings = [self.wrd_emb, self.prd_emb]
Ejemplo n.º 3
0
def identity_block(input_tensor, kernel, filters, name):
    """ Function to create block of ResNet network which include
    three convolution layers.

    Args:
        input_tensor: input tensorflow layer.
        kernel: tuple of kernel size in convolution layer.
        filters: list of nums filters in convolution layers.
        name: name of block.

    Output:
        x: Block output layer """
    filters1, filters2, filters3 = filters
    x = tf.layers.conv2d(input_tensor, filters1, (1, 1), name='convfir' + name, activation=tf.nn.relu,\
                         kernel_initializer=xavier())

    x = tf.layers.conv2d(x, filters2, kernel, name='convsec' + name, activation=tf.nn.relu, padding='SAME',\
                         kernel_initializer=xavier())

    x = tf.layers.conv2d(x, filters3, (1, 1), name='convthr' + name,\
                         kernel_initializer=xavier())


    x = tf.concat([x, input_tensor], axis=1)
    x = tf.nn.relu(x)
    return x
Ejemplo n.º 4
0
    def __init__(self, args):
        self.max_doc_len = args['max_doc_len']
        self.max_sen_len = args['max_sen_len']
        self.cls_cnt = args['cls_cnt']
        self.emb_dim = args['emb_dim']
        self.hidden_size = args['hidden_size']
        self.usr_cnt = args['usr_cnt']
        self.prd_cnt = args['prd_cnt']
        self.l2_rate = args['l2_rate']
        self.debug = args['debug']
        self.lambda1 = args['lambda1']
        self.lambda2 = args['lambda2']
        self.lambda3 = args['lambda3']

        self.best_dev_acc = .0
        self.best_test_acc = .0
        self.best_test_rmse = .0

        # initializers for parameters
        self.weights_initializer = xavier()
        self.biases_initializer = tf.initializers.zeros()
        self.emb_initializer = xavier()

        hsize = self.hidden_size

        # embeddings in the model
        with tf.variable_scope('emb'):
            self.embeddings = {
                # 'wrd_emb': const(self.embedding, name='wrd_emb', dtype=tf.float32),
                # 'wrd_emb': tf.Variable(self.embedding, name='wrd_emb', dtype=tf.float32),
                'usr_emb':
                var('usr_emb', [self.usr_cnt, hsize], self.emb_initializer),
                'prd_emb':
                var('prd_emb', [self.prd_cnt, hsize], self.emb_initializer)
            }
Ejemplo n.º 5
0
 def build_model(self):
     self.feats = tf.placeholder(tf.float32, [1, self.obs_size],
                                 name='input_feats')
     self.init_c = tf.placeholder(tf.float32, [1, self.hidden_dim])
     self.init_h = tf.placeholder(tf.float32, [1, self.hidden_dim])
     self.action = tf.placeholder(tf.int32, name='real_action')
     self.action_mask = tf.placeholder(tf.float32, [self.act_size],
                                       name='action_mask')
     Wi = tf.get_variable('Wi', [self.obs_size, self.hidden_dim],
                          initializer=xavier())
     bi = tf.get_variable('bi', [self.hidden_dim],
                          initializer=tf.constant_initializer(0.))
     projected = tf.matmul(self.feats, Wi) + bi
     lstm = tf.contrib.rnn.LSTMCell(self.hidden_dim, state_is_tuple=True)
     lstm_op, self.state = lstm(inputs=projected,
                                state=(self.init_c, self.init_h))
     reshaped = tf.concat(axis=1, values=(self.state.c, self.state.h))
     Wo = tf.get_variable('Wo', [2 * self.hidden_dim, self.act_size],
                          initializer=xavier())
     bo = tf.get_variable('bo', [self.act_size],
                          initializer=tf.constant_initializer(0.))
     self.logits = tf.matmul(reshaped, Wo) + bo
     self.probs = tf.multiply(tf.squeeze(tf.nn.softmax(self.logits)),
                              self.action_mask)
     self.pred = tf.arg_max(self.probs, dimension=0)
     self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
         logits=self.logits, labels=self.action)  #??
     self.train_op = tf.train.AdadeltaOptimizer(self.lr).minimize(self.loss)
Ejemplo n.º 6
0
 def lstm(inputs, sequence_length, hidden_size, scope):
     cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0.,
                                       initializer=xavier())
     cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2, forget_bias=0.,
                                       initializer=xavier())
     outputs, state = tf.nn.bidirectional_dynamic_rnn(
         cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs,
         sequence_length=sequence_length, dtype=tf.float32, scope=scope)
     outputs = tf.concat(outputs, axis=2)
     return outputs, state
Ejemplo n.º 7
0
    def EncoderCNN(self, is_training, init_vec=None):

        with tf.variable_scope("sentence-encoder",
                               dtype=tf.float32,
                               initializer=xavier(),
                               reuse=tf.AUTO_REUSE):
            input_dim = self.input_embedding.shape[2]
            input_sentence = tf.expand_dims(self.input_embedding, axis=1)
            with tf.variable_scope("conv2d"):
                conv_kernel = self._GetVar(
                    init_vec=init_vec,
                    key='convkernel',
                    name='kernel',
                    shape=[1, 3, input_dim, FLAGS.hidden_size],
                    trainable=True)
                conv_bias = self._GetVar(init_vec=init_vec,
                                         key='convbias',
                                         name='bias',
                                         shape=[FLAGS.hidden_size],
                                         trainable=True)
            x = tf.layers.conv2d(inputs=input_sentence,
                                 filters=FLAGS.hidden_size,
                                 kernel_size=[1, 3],
                                 strides=[1, 1],
                                 padding='same',
                                 reuse=tf.AUTO_REUSE)
            x = tf.reduce_max(x, axis=2)
            x = tf.nn.relu(tf.squeeze(x, 1))

        return x
Ejemplo n.º 8
0
    def EncoderLSTM(self, is_training, init_vec=None):

        with tf.variable_scope("sentence-encoder",
                               dtype=tf.float32,
                               initializer=xavier(),
                               reuse=tf.AUTO_REUSE):
            input_sentence = tf.layers.dropout(self.input_embedding,
                                               rate=self.keep_prob,
                                               training=is_training)
            fw_cell = tf.contrib.rnn.BasicLSTMCell(FLAGS.hidden_size,
                                                   state_is_tuple=True)
            bw_cell = tf.contrib.rnn.BasicLSTMCell(FLAGS.hidden_size,
                                                   state_is_tuple=True)
            outputs, states = tf.nn.bidirectional_dynamic_rnn(
                fw_cell,
                bw_cell,
                input_sentence,
                sequence_length=self.len,
                dtype=tf.float32,
                scope='bi-dynamic-rnn')
            fw_states, bw_states = states
            if isinstance(fw_states, tuple):
                fw_states = fw_states[0]
                bw_states = bw_states[0]
            x = tf.concat(states, axis=1)

        return x
Ejemplo n.º 9
0
def func_module(input_layer, num_inputs, num_outputs):
    """ final module which estimates some function (value, q, policy, etc)
    """
    out = input_layer
    out_weights = tf.Variable(xavier()([num_inputs, num_outputs]))
    out = tf.matmul(out, out_weights)
    return out
def fc_module(input_layer, hiddens, activation_fn=tf.nn.relu):
    """ fully connected module
    """
    out = input_layer
    for num_outputs in hiddens:
        out = fc(out,
                 num_outputs=num_outputs,
                 activation_fn=activation_fn,
                 weights_initializer=xavier())
    return out
Ejemplo n.º 11
0
    def __init__(self, args):
        self.embedding = args['embedding']
        self.wrd_emb = const(self.embedding, name='wrd_emb', dtype=tf.float32)
        self.model = NSCLA(args, self.wrd_emb)
        self.l2_rate = args['l2_rate']
        self.cls_cnt = args['cls_cnt']
        self.embedding_lr = args['embedding_lr']
        self.temperature = args['temperature']
        self.align_rate = args['align_rate']
        self.task_cnt = args['task_cnt']
        self.best_test_acc = 0.
        self.best_dev_acc = 0.
        self.best_test_rmse = 0.
        self.hidden_size = args['emb_dim']

        # initializers for parameters
        self.w_init = xavier()
        self.b_init = tf.initializers.zeros()
        self.e_init = xavier()
Ejemplo n.º 12
0
    def resnet(self):
        """ Simple implementation of Resnet.
        Args:
            self

        Outputs:
            Method return list with len = 2 and some params:
            [0][0]: indices - Placeholder which takes batch indices.
            [0][1]: all_data - Placeholder which takes all images.
            [0][2]; all_lables - Placeholder for lables.
            [0][3]: loss - Value of loss function.
            [0][4]: train - List of train optimizers.
            [0][5]: prob - softmax output, need to prediction.

            [1][0]: accuracy - Current accuracy
            [1][1]: session - tf session """
        with tf.Graph().as_default():
            indices = tf.placeholder(tf.int32, shape=[None, 1])
            all_data = tf.placeholder(tf.float32, shape=[50000, 28, 28])
            input_batch = tf.gather_nd(all_data, indices)
            x1_to_tens = tf.reshape(input_batch, shape=[-1, 28, 28, 1])

            net1 = tf.layers.conv2d(x1_to_tens, 32, (7, 7), strides=(2, 2), padding='SAME', activation=tf.nn.relu, \
                kernel_initializer=xavier(), name='11')
            net1 = tf.layers.max_pooling2d(net1, (2, 2), (2, 2))

            net1 = conv_block(net1, 3, [32, 32, 128], name='22', strides=(1, 1))

            net1 = identity_block(net1, 3, [32, 32, 128], name='33')

            net1 = conv_block(net1, 3, [64, 64, 256], name='53', strides=(1, 1))
            net1 = identity_block(net1, 3, [64, 64, 256], name='63')

            net1 = tf.layers.average_pooling2d(net1, (7, 7), strides=(1, 1))
            net1 = tf.contrib.layers.flatten(net1)

            with tf.variable_scope('dense3'):
                net1 = tf.layers.dense(net1, 10, kernel_initializer=tf.contrib.layers.xavier_initializer())


            prob1 = tf.nn.softmax(net1)
            all_lables = tf.placeholder(tf.float32, [None, 10])

            y = tf.gather_nd(all_lables, indices)

            loss1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net1, labels=y), name='loss3')
            train1 = tf.train.MomentumOptimizer(0.03, 0.8, use_nesterov=True).minimize(loss1)
            lables_hat1 = tf.cast(tf.argmax(net1, axis=1), tf.float32, name='lables_3at')
            lables1 = tf.cast(tf.argmax(y, axis=1), tf.float32, name='labl3es')

            accuracy1 = tf.reduce_mean(tf.cast(tf.equal(lables_hat1, lables1), tf.float32, name='a3ccuracy'))
            session = tf.Session()
            session.run(tf.global_variables_initializer())
        return [[indices, all_data, all_lables, loss1, train1, prob1], [accuracy1, session]]
Ejemplo n.º 13
0
def lstm(inputs,
         sequence_length,
         hidden_size,
         scope,
         bidirectional=True,
         lstm_cells=None):
    if bidirectional:
        if lstm_cells is None:
            cell_fw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2,
                                              forget_bias=0.,
                                              initializer=xavier())
            cell_bw = tf.nn.rnn_cell.LSTMCell(hidden_size // 2,
                                              forget_bias=0.,
                                              initializer=xavier())
        else:
            cell_fw, cell_bw = lstm_cells
        outputs, state = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=inputs,
            sequence_length=sequence_length,
            dtype=tf.float32,
            scope=scope)
        outputs = tf.concat(outputs, axis=2)
    else:
        if lstm_cells is None:
            cell = tf.nn.rnn_cell.LSTMCell(hidden_size,
                                           forget_bias=0.,
                                           initializer=xavier())
        else:
            cell = lstm_cells
        outputs, state = tf.nn.dynamic_rnn(cell=cell,
                                           inputs=inputs,
                                           sequence_length=sequence_length,
                                           dtype=tf.float32,
                                           scope=scope)
        outputs = tf.concat(outputs, axis=2)
    return outputs, state
Ejemplo n.º 14
0
Archivo: ml3.py Proyecto: liqy2019/DAML
    def __init__(self, args):
        self.embedding = args['embedding']
        self.wrd_emb = const(self.embedding, name='wrd_emb', dtype=tf.float32)
        self.model_cnt = 3
        self.l2_rate = args['l2_rate']
        self.cls_cnt = args['cls_cnt']
        self.embedding_lr = args['embedding_lr']
        self.temperature = args['temperature']
        self.align_rate = args['align_rate']
        self.task_cnt = args['task_cnt']
        self.best_test_acc = 0.
        self.best_dev_acc = 0.
        self.best_test_rmse = 0.
        self.hidden_size = args['emb_dim']

        self.model = []
        for i in range(self.model_cnt):
            with tf.variable_scope(f'model{i}'):
                self.model.append(NSCLA(args, self.wrd_emb))

        # initializers for parameters
        self.w_init = xavier()
        self.b_init = tf.initializers.zeros()
        self.e_init = xavier()
Ejemplo n.º 15
0
 def _GetVar(self,
             init_vec,
             key,
             name,
             shape=None,
             initializer=xavier(),
             trainable=True):
     if init_vec is not None and key in init_vec:
         print('using pretrained {} and is {}'.format(
             key, 'trainable' if trainable else 'not trainable'))
         return tf.get_variable(name=name,
                                initializer=init_vec[key],
                                trainable=trainable)
     else:
         return tf.get_variable(name=name,
                                shape=shape,
                                initializer=initializer,
                                trainable=trainable)
Ejemplo n.º 16
0
def get_initializer(params):
    if params.initializer == "uniform":
        max_val = params.initializer_gain
        return tf.random_uniform_initializer(-max_val, max_val)
    elif params.initializer == 'pixellink':
        from tensorflow.contrib.layers import xavier_initializer_conv2d as xavier
        return xavier()
    elif params.initializer == "normal":
        return tf.random_normal_initializer(0.0, params.initializer_gain)
    elif params.initializer == "normal_unit_scaling":
        return tf.variance_scaling_initializer(params.initializer_gain,
                                               mode="fan_avg",
                                               distribution="normal")
    elif params.initializer == "uniform_unit_scaling":
        return tf.variance_scaling_initializer(params.initializer_gain,
                                               mode="fan_avg",
                                               distribution="uniform")
    else:
        raise ValueError("Unrecognized initializer: %s" % params.initializer)
Ejemplo n.º 17
0
 def conv2d(self, input, shape, name):
     with tf.variable_scope(name):
         conv = tf.nn.conv2d(input,
                             tf.get_variable('kernel',
                                             dtype=tf.float32,
                                             shape=shape,
                                             initializer=xavier(),
                                             trainable=True),
                             strides=(1, 1, 1, 1),
                             padding="SAME",
                             name='conv')
         conv = tf.nn.bias_add(conv,
                               tf.get_variable(
                                   'bias',
                                   shape=(shape[3], ),
                                   trainable=True,
                                   initializer=tf.zeros_initializer()),
                               name='biasadd')
         return conv
Ejemplo n.º 18
0
    def EncoderPCNN(self, is_training, init_vec=None):

        with tf.variable_scope("sentence-encoder",
                               dtype=tf.float32,
                               initializer=xavier(),
                               reuse=tf.AUTO_REUSE):
            input_dim = self.input_embedding.shape[2]
            mask_embedding = tf.constant(
                [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
            pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask)
            input_sentence = tf.expand_dims(self.input_embedding, axis=1)
            with tf.variable_scope("conv2d"):
                conv_kernel = self._GetVar(
                    init_vec=init_vec,
                    key='convkernel',
                    name='kernel',
                    shape=[1, 3, input_dim, FLAGS.hidden_size],
                    trainable=True)
                conv_bias = self._GetVar(init_vec=init_vec,
                                         key='convbias',
                                         name='bias',
                                         shape=[FLAGS.hidden_size],
                                         trainable=True)
            x = tf.layers.conv2d(inputs=input_sentence,
                                 filters=FLAGS.hidden_size,
                                 kernel_size=[1, 3],
                                 strides=[1, 1],
                                 padding='same',
                                 reuse=tf.AUTO_REUSE)
            x = tf.reshape(x, [-1, FLAGS.max_length, FLAGS.hidden_size, 1])
            x = tf.reduce_max(
                tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_length, 3]) *
                tf.transpose(x, [0, 2, 1, 3]),
                axis=2)
            x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3]))

        return x
Ejemplo n.º 19
0
    def __init__(self):

        # Some shortcuts for the dimensions we need
        HID_HID = [cfg['lstm_size'], cfg['lstm_size']]
        IN_HID = [cfg['embeddings_size'], cfg['lstm_size']]
        HID = [1, cfg['lstm_size']]

        # The hidden vector is the output
        self.output_size = HID

        # The state consists of the cell and the hidden vectors, and both have
        # the same dimensions
        self.state_size = tf.TensorShape(HID_HID)

        # W are the matrices which multiply the input, and U are the matrices
        # which multiply the previous hidden state

        # Input variables
        self.Wi = tf.get_variable('Wi', IN_HID, initializer=xavier())
        self.Ui = tf.get_variable('Ui', HID_HID, initializer=xavier())
        self.bi = tf.get_variable('bi', HID, initializer=xavier())

        # Modulation variables
        self.Wm = tf.get_variable('Wm', IN_HID, initializer=xavier())
        self.Um = tf.get_variable('Um', HID_HID, initializer=xavier())
        self.bm = tf.get_variable('bm', HID, initializer=xavier())

        # Forget variables
        self.Wf = tf.get_variable('Wf', IN_HID, initializer=xavier())
        self.Uf = tf.get_variable('Uf', HID_HID, initializer=xavier())
        self.bf = tf.get_variable('bf', HID, initializer=xavier())

        # Reveal variables
        self.Wr = tf.get_variable('Wr', IN_HID, initializer=xavier())
        self.Ur = tf.get_variable('Ur', HID_HID, initializer=xavier())
        self.br = tf.get_variable('br', HID, initializer=xavier())
Ejemplo n.º 20
0
    def __init__(self, parameters, neurons_hidden, categories, learning_rate,
                 reg_lambda):
        """
        :param parameters: number of features in the input layer
        :param neurons_hidden: number of hidden units and layers. list of form [num_layer1, num_layer2, ...]
        :param categories: number of categories in the output layer
        :param learning_rate: learning rate
        :param reg_lambda: L2 regularization value
        """

        self.in_vector = tf.placeholder(tf.float32, [None, parameters],
                                        name='input')
        self.target_vect = tf.placeholder(tf.int64, [None], name='target')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.class_weights = tf.placeholder(tf.float32, [categories],
                                            name='class_weights')

        self.weights = []

        # Generate Fully Connected Layers
        self.hidden_layers = []
        for i, num_neurons in enumerate(neurons_hidden):
            with tf.variable_scope('fully_connected-%d' % i):

                # We use Xavier initializer instead of sampling from a gaussian
                w = tf.get_variable(
                    'W',
                    shape=(parameters if i == 0 else neurons_hidden[i - 1],
                           neurons_hidden[i]),
                    initializer=xavier(),
                    regularizer=l2_regularizer(reg_lambda))

                b = tf.Variable(tf.constant(0.1, shape=[neurons_hidden[i]]),
                                name="b")

                self.hidden_layers.append(
                    tf.nn.relu(
                        tf.nn.xw_plus_b(self.in_vector
                                        if i == 0 else self.hidden_layers[-1],
                                        w,
                                        b,
                                        name="ffn")))
        # Apply dropout
        with tf.name_scope('dropout'):
            self.drop = tf.nn.dropout(self.hidden_layers[-1],
                                      self.dropout_keep_prob)

        # Get output
        with tf.variable_scope("output"):
            w = tf.get_variable('W',
                                shape=[neurons_hidden[-1], categories],
                                initializer=xavier(),
                                regularizer=l2_regularizer(reg_lambda))

            b = tf.Variable(tf.constant(0.1, shape=[categories]), name="b")
            self.scores = tf.nn.xw_plus_b(self.drop, w, b, name="scores")
            self.predictions = tf.nn.softmax(self.scores, name='predictions')
            self.category = tf.arg_max(self.scores, 1)

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                self.scores, self.target_vect)

            # Weighted loss depending on class frequency
            scale = tf.gather(self.class_weights, self.target_vect)

            self.loss = tf.reduce_mean(losses * scale) + \
                        sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))  # Weight Decay
            tf.summary.scalar('loss', self.loss)

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(
                tf.argmax(tf.nn.softmax(self.scores), 1), self.target_vect)
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   tf.float32),
                                           name="accuracy")
            tf.summary.scalar('accuracy', self.accuracy)

        # Adam Optimizer with exponential decay and gradient clipping
        with tf.name_scope("Optimizer"):
            step = tf.Variable(0, trainable=False)
            rate = tf.train.exponential_decay(learning_rate, step, 1, 0.9999)
            optimizer = tf.train.AdamOptimizer(rate)
            tvars = tf.trainable_variables()
            gradients = tf.gradients(self.loss, tvars)
            clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5)
            self.train_op = optimizer.apply_gradients(zip(
                clipped_gradients, tvars),
                                                      global_step=step)

        # Keep track of gradient values and sparsity
        for gradient, variable in zip(clipped_gradients, tvars):
            if isinstance(gradient, ops.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient
            tf.summary.histogram(variable.name, variable)
            tf.summary.histogram(variable.name + "/gradients", grad_values)
            tf.summary.histogram(variable.name + "/gradient_norm",
                                 clip_ops.global_norm([grad_values]))
            tf.summary.scalar(variable.name + "/grad/sparsity",
                              tf.nn.zero_fraction(gradient))

        self.merged = tf.summary.merge_all()
Ejemplo n.º 21
0
    def bulid(self,init_vec):

        with tf.variable_scope("embedding-lookup", initializer=xavier(), dtype=tf.float32):

            temp_word_embedding = self._GetVar(init_vec=init_vec, key='wordvec', name='temp_word_embedding',
                shape=[FLAGS.vocabulary_size, FLAGS.word_size],trainable=True)
            unk_word_embedding = self._GetVar(init_vec=init_vec, key='unkvec', name='unk_embedding',shape=[FLAGS.word_size],trainable=True)
            word_embedding = tf.concat([temp_word_embedding, tf.reshape(unk_word_embedding,[1,FLAGS.word_size]),
               tf.reshape(tf.constant(np.zeros(FLAGS.word_size),dtype=tf.float32),[1,FLAGS.word_size])],0)
            temp_pos1_embedding = self._GetVar(init_vec=init_vec, key='pos1vec', name='temp_pos1_embedding',shape=[FLAGS.pos_num,FLAGS.pos_size],trainable=True)
            temp_pos2_embedding = self._GetVar(init_vec=init_vec, key='pos2vec', name='temp_pos2_embedding',shape=[FLAGS.pos_num,FLAGS.pos_size],trainable=True)
            pos1_embedding = tf.concat([temp_pos1_embedding,tf.reshape(tf.constant(np.zeros(FLAGS.pos_size,dtype=np.float32)),[1, FLAGS.pos_size])],0)
            pos2_embedding = tf.concat([temp_pos2_embedding,tf.reshape(tf.constant(np.zeros(FLAGS.pos_size,dtype=np.float32)),[1, FLAGS.pos_size])],0)

            input_word = tf.nn.embedding_lookup(word_embedding, self.word)  # N,max_len,d
            input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1)
            input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2)
            input_embedding = tf.concat(values = [input_word, input_pos1, input_pos2], axis = -1)

            #input_word_type = tf.nn.embedding_lookup(word_embedding, self.word_type)  # N,max_len,d
            #input_pos1_type = tf.nn.embedding_lookup(pos1_embedding, self.pos1_type)
            #input_pos2_type = tf.nn.embedding_lookup(pos2_embedding, self.pos2_type)
            #input_embedding_type = tf.concat(values = [input_word_type, input_pos1_type, input_pos2_type], axis = -1)

            temp_type_embedding = tf.get_variable('type_embedding', shape=[FLAGS.type_num,FLAGS.type_dim] ,initializer=xavier(), dtype=tf.float32)
            type_embedding = tf.concat([tf.reshape(tf.constant(np.zeros(FLAGS.type_dim),dtype=tf.float32),[1,FLAGS.type_dim]),temp_type_embedding],0)

            #en1_type = tf.nn.embedding_lookup(type_embedding, self.en1_type)    # batchsize,max_type_num,type_dim
            #en2_type = tf.nn.embedding_lookup(type_embedding, self.en2_type)
            #en1_type = tf.divide(tf.reduce_sum(en1_type, axis=1), tf.expand_dims(self.en1_type_len, axis=1))
            #en2_type = tf.divide(tf.reduce_sum(en2_type, axis=1), tf.expand_dims(self.en2_type_len, axis=1))
            #x_type = tf.concat([en1_type, en2_type], -1)

            #att_type = tf.get_variable('att_type', [FLAGS.type_dim,1],initializer=xavier())
            #att_1_type = tf.get_variable('att_1_type', [FLAGS.type_dim,50],initializer=xavier())
            #att_2_type = tf.get_variable('att_2_type', [50,1],initializer=xavier())
            #padding = tf.constant(np.zeros(FLAGS.max_type_num)*(-1e8),dtype=tf.float32)
            #en1_type_stack, en2_type_stack = [],[]
            #for i in range(FLAGS.batch_size):
            #    #temp_alpha_1 = tf.squeeze(en1_type[i] @ att_type , -1)  # max_type_num,type_dim * type_dim,1 = max_type_num,1
            #    #temp_alpha_2 = tf.squeeze(en2_type[i] @ att_type , -1)
            #    temp_alpha_1 = tf.squeeze(tf.nn.tanh(en1_type[i] @ att_1_type ) @ att_2_type, -1)
            #    temp_alpha_2 = tf.squeeze(tf.nn.tanh(en2_type[i] @ att_1_type ) @ att_2_type, -1)
            #    temp_alpha_1 = tf.where(tf.equal(self.en1_type_mask[i], 1), temp_alpha_1, padding)
            #    temp_alpha_2 = tf.where(tf.equal(self.en2_type_mask[i], 1), temp_alpha_2, padding) # max_type_num
            #    temp_alpha_1 = tf.nn.softmax(temp_alpha_1)
            #    temp_alpha_2 = tf.nn.softmax(temp_alpha_2)
            #    en1_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_1,0) @ en1_type[i],0)) # 1,max_type_num * max_type_num,type_dim = 1,type_dim = type_dim
            #    en2_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_2,0) @ en2_type[i],0))
            #en1_type_stack = tf.stack(en1_type_stack)
            #en2_type_stack = tf.stack(en2_type_stack)
            #x_type = tf.concat([en1_type_stack, en2_type_stack], -1)

        with tf.variable_scope("entity_typing"):

            input_type_1 = tf.concat(values = [input_word, input_pos1], axis = -1)
            input_type_2 = tf.concat(values = [input_word, input_pos2], axis = -1)

            input_type_1 = tf.concat(values = [input_type_1, input_type_2], axis = 0)

            lstm_cell_forward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size)
            lstm_cell_backward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size)

            #lstm_cell_forward = tf.contrib.rnn.DropoutWrapper(lstm_cell_forward, output_keep_prob=0.5)
            #lstm_cell_backward = tf.contrib.rnn.DropoutWrapper(lstm_cell_backward, output_keep_prob=0.5)
            #print(self.len.get_shape().as_list())
            #print(input_embedding.get_shape().as_list())
            #(all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(lstm_cell_forward,lstm_cell_backward,input_embedding_type,dtype=tf.float32,sequence_length=self.len_type)
            len = tf.concat([self.len,self.len],0)
            (all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(lstm_cell_forward,lstm_cell_backward,input_type_1,dtype=tf.float32,sequence_length=len)
            (fw_outputs,bw_outputs) = (all_states)  # N,max_len,grusize
            outputs_1 = tf.concat([fw_outputs,bw_outputs],-1) # N,max_len,grusize*2

            #(all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(lstm_cell_forward,lstm_cell_backward,input_type_2,dtype=tf.float32,sequence_length=self.len)
            #(fw_outputs,bw_outputs) = (all_states)  # N,max_len,grusize
            #outputs_2 = tf.concat([fw_outputs,bw_outputs],-1) # N,max_len,grusize*2
            #(fw_state,bw_state) = (last_states)
            #(_,h_f) = fw_state
            #(_,h_b) = bw_state
            #states = tf.concat([h_f,h_b],-1)

            ET_att_1 = tf.get_variable('ET_att_1', [FLAGS.rnn_size*2,128],initializer=xavier())
            ET_att_2 = tf.get_variable('ET_att_2', [128,1],initializer=xavier())
            #padding = tf.constant(np.zeros((FLAGS.batch_size,FLAGS.max_len))*(-1e8),dtype=tf.float32)
            padding_1 = tf.ones_like(self.mask,dtype=tf.float32) * tf.constant([-1e8])
            padding_2 = tf.ones_like(self.mask,dtype=tf.float32) * tf.constant([-1e8])
            padding = tf.concat([padding_1,padding_1],0)
            mask = tf.concat([self.mask,self.mask],0)

            outputs_1_ = tf.reshape(outputs_1,[-1,FLAGS.rnn_size*2])
            temp_alpha_1 = tf.reshape(tf.nn.relu(outputs_1_ @ ET_att_1) @ ET_att_2, [-1,FLAGS.max_len])
            temp_alpha_1 = tf.where(tf.equal(mask, 0), padding, temp_alpha_1)
            alpha_1 = tf.nn.softmax(temp_alpha_1,-1)    # N,max_len
            outputs_1 = tf.reshape(tf.expand_dims(alpha_1,1) @ outputs_1, [-1,FLAGS.rnn_size*2])

            #outputs_2_ = tf.reshape(outputs_2,[-1,FLAGS.rnn_size*2])
            #temp_alpha_2 = tf.reshape(tf.nn.relu(outputs_2_ @ ET_att_1) @ ET_att_2, [-1,FLAGS.max_len])
            #temp_alpha_2 = tf.where(tf.equal(self.mask, 0), padding, temp_alpha_2)
            #alpha_2 = tf.nn.softmax(temp_alpha_2,-1)    # N,max_len
            #outputs_2 = tf.reshape(tf.expand_dims(alpha_2,1) @ outputs_2, [-1,FLAGS.rnn_size*2])

            ET_sent_att_1 = tf.get_variable('ET_sent_att_1', [FLAGS.rnn_size*2,128],initializer=xavier())
            ET_sent_att_2 = tf.get_variable('ET_sent_att_2', [128,1],initializer=xavier())
            alpha_type_sent_1 = tf.squeeze(tf.nn.tanh(outputs_1 @ ET_sent_att_1) @ ET_sent_att_2 , -1)
            #alpha_type_sent_2 = tf.squeeze(tf.nn.tanh(outputs_2 @ ET_sent_att_1) @ ET_sent_att_2 , -1)

            type_repre_1 = []
            type_repre_2 = []
            for i in range(FLAGS.batch_size):
                m = outputs_1[self.scope[i]:self.scope[i+1]]# (n , hidden_size)
                sent_score = tf.nn.softmax(alpha_type_sent_1[self.scope[i]:self.scope[i+1]])
                type_repre_1.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m)))

                #m = outputs_2[self.scope[i]:self.scope[i+1]]# (n , hidden_size)
                #sent_score = tf.nn.softmax(alpha_type_sent_2[self.scope[i]:self.scope[i+1]])
                #type_repre_2.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m)))
            for i in range(FLAGS.batch_size):
                m = outputs_1[self.scope[i]+FLAGS.batch_size:self.scope[i+1]+FLAGS.batch_size]# (n , hidden_size)
                sent_score = tf.nn.softmax(alpha_type_sent_1[self.scope[i]+FLAGS.batch_size:self.scope[i+1]+FLAGS.batch_size])
                type_repre_2.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m)))

            type_repre_1 = tf.layers.dropout(tf.stack(type_repre_1), rate = 1 - self.keep_prob, training = self.istrain)
            type_repre_2 = tf.layers.dropout(tf.stack(type_repre_2), rate = 1 - self.keep_prob, training = self.istrain)

            ent1_word = tf.nn.embedding_lookup(word_embedding, self.en1_word)
            ent2_word = tf.nn.embedding_lookup(word_embedding, self.en2_word)

            #en1_outputs = tf.concat([outputs,ent1_word],-1)
            #en2_outputs = tf.concat([outputs,ent2_word],-1)
            en1_outputs = tf.concat([type_repre_1,ent1_word],-1)
            en2_outputs = tf.concat([type_repre_2,ent2_word],-1)

            ET_matrix = self._GetVar(init_vec=init_vec, key='disckernel',
                name='ET_matrix', shape=[39, FLAGS.rnn_size*2 + FLAGS.word_size])
            ET_bias = self._GetVar(init_vec=init_vec, key='discbias',
                name='ET_bias', shape=[39], initializer=tf.zeros_initializer())

            logits_1 = tf.matmul(en1_outputs, ET_matrix, transpose_b=True) + ET_bias
            logits_2 = tf.matmul(en2_outputs, ET_matrix, transpose_b=True) + ET_bias
            #print(logits_1.get_shape().as_list())
            #label_onehot_1 = tf.one_hot(indices=self.en1_type, depth=39, dtype=tf.int32)
            #label_onehot_2 = tf.one_hot(indices=self.en2_type, depth=39, dtype=tf.int32)

            #loss_1 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_onehot_1,logits=logits_1))
            #loss_2 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_onehot_2,logits=logits_2))
            loss_1 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en1_type,logits=logits_1))
            loss_2 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en2_type,logits=logits_2))

            output_1 = tf.nn.sigmoid(logits_1) # batchsize, 39
            output_2 = tf.nn.sigmoid(logits_2)
            ones = tf.ones_like(logits_1)
            zeros = tf.zeros_like(logits_1)
            self.output_1 = tf.where(tf.greater(output_1, 0.5), ones, zeros)    # batch_size, 39
            self.output_2 = tf.where(tf.greater(output_2, 0.5), ones, zeros)
            en1_type_len = tf.reduce_sum(self.output_1[:,1:],keepdims=True,axis=-1)
            en2_type_len = tf.reduce_sum(self.output_2[:,1:],keepdims=True,axis=-1)
            #en1_type_len = tf.reduce_sum(self.output_1,keepdims=True,axis=-1)
            #en2_type_len = tf.reduce_sum(self.output_2,keepdims=True,axis=-1)
            ones = tf.ones_like(en1_type_len)
            en1_type_len_ = tf.where(tf.equal(en1_type_len, 0), ones, en1_type_len)
            en2_type_len_ = tf.where(tf.equal(en2_type_len, 0), ones, en2_type_len)
            en1_type = (self.output_1 @ type_embedding) / en1_type_len_
            en2_type = (self.output_2 @ type_embedding) / en2_type_len_

            #self.output_1 = tf.nn.softmax(logits_1,-1)
            #self.output_2 = tf.nn.softmax(logits_2,-1)
            #output_1 = tf.argmax(self.output_1,-1)
            #output_2 = tf.argmax(self.output_2,-1)
            #output_1 = tf.to_int32(output_1)
            #output_2 = tf.to_int32(output_2)
            #print(self.output_2 .get_shape().as_list())
            #en1_type = tf.nn.embedding_lookup(type_embedding, output_1)
            #en2_type = tf.nn.embedding_lookup(type_embedding, output_2)
            #print(en1_type.get_shape().as_list())
            x_type = tf.concat([en1_type, en2_type], -1)

        with tf.variable_scope("encoder"):

            input_dim = input_embedding.shape[-1]
            mask_embedding = tf.constant([[0,0,0],[1,0,0],[0,1,0],[0,0,1]], dtype=np.float32)
            pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask)
            input_sentence = tf.expand_dims(input_embedding, axis=1)
            with tf.variable_scope("conv2d"):
                conv_kernel = self._GetVar(init_vec=init_vec,key='convkernel',name='kernel',
                    shape=[1,3,input_dim,FLAGS.hidden_size],trainable=True)
                conv_bias = self._GetVar(init_vec=init_vec,key='convbias',name='bias',shape=[FLAGS.hidden_size],trainable=True)
            x = tf.layers.conv2d(inputs = input_sentence, filters=FLAGS.hidden_size,
                kernel_size=[1,3], strides=[1, 1], padding='same', reuse=tf.AUTO_REUSE)

            sequence = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size])

            x = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size, 1])
            x = tf.reduce_max(tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_len, 3]) * tf.transpose(x,[0, 2, 1, 3]), axis = 2)
            x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3]))

        with tf.variable_scope("selector"):

            attention_1 = tf.get_variable('attention_1', [self.hidden_size,300],initializer=xavier())
            attention_2 = tf.get_variable('attention_2', [300,1],initializer=xavier())
            alpha = tf.squeeze(tf.nn.tanh(x @ attention_1) @ attention_2 , -1)

            bag_repre = []
            for i in range(FLAGS.batch_size):
                m = x[self.scope[i]:self.scope[i+1]]# (n , hidden_size)
                sent_score = tf.nn.softmax(alpha[self.scope[i]:self.scope[i+1]])
                bag_repre.append(tf.squeeze(tf.matmul(tf.expand_dims(sent_score,0), m)))
            bag_repre = tf.layers.dropout(tf.stack(bag_repre), rate = 1 - self.keep_prob, training = self.istrain)

        with tf.variable_scope("loss"):

            discrimitive_matrix = self._GetVar(init_vec=init_vec, key='disckernel',
                name='discrimitive_matrix', shape=[53, self.hidden_size + FLAGS.type_dim *2])
            bias = self._GetVar(init_vec=init_vec, key='discbias',
                name='bias', shape=[53], initializer=tf.zeros_initializer())

            bag_repre_type = tf.concat([bag_repre,x_type],-1)

            self.logit = tf.matmul(bag_repre_type, discrimitive_matrix, transpose_b=True) + bias
            self.output = tf.nn.softmax(self.logit,-1)

            label_onehot = tf.one_hot(indices=self.label, depth=FLAGS.num_classes, dtype=tf.int32)

            regularizer = tf.contrib.layers.l2_regularizer(0.00001)
            l2_loss = tf.contrib.layers.apply_regularization(regularizer=regularizer, weights_list=tf.trainable_variables())
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=label_onehot,logits=self.logit)) + l2_loss + loss_1 + loss_2
Ejemplo n.º 22
0
    def bulid(self, init_vec):

        with tf.variable_scope("embedding-lookup",
                               initializer=xavier(),
                               dtype=tf.float32):

            temp_word_embedding = self._GetVar(
                init_vec=init_vec,
                key='wordvec',
                name='temp_word_embedding',
                shape=[FLAGS.vocabulary_size, FLAGS.word_size],
                trainable=True)
            unk_word_embedding = self._GetVar(init_vec=init_vec,
                                              key='unkvec',
                                              name='unk_embedding',
                                              shape=[FLAGS.word_size])
            word_embedding = tf.concat([
                temp_word_embedding,
                tf.reshape(unk_word_embedding, [1, FLAGS.word_size]),
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.word_size), dtype=tf.float32),
                    [1, FLAGS.word_size])
            ], 0)
            temp_pos1_embedding = self._GetVar(
                init_vec=init_vec,
                key='pos1vec',
                name='temp_pos1_embedding',
                shape=[FLAGS.pos_num, FLAGS.pos_size])
            temp_pos2_embedding = self._GetVar(
                init_vec=init_vec,
                key='pos2vec',
                name='temp_pos2_embedding',
                shape=[FLAGS.pos_num, FLAGS.pos_size])
            pos1_embedding = tf.concat([
                temp_pos1_embedding,
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)),
                    [1, FLAGS.pos_size])
            ], 0)
            pos2_embedding = tf.concat([
                temp_pos2_embedding,
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)),
                    [1, FLAGS.pos_size])
            ], 0)

            input_word = tf.nn.embedding_lookup(word_embedding,
                                                self.word)  # N,max_len,d
            input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1)
            input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2)
            input_embedding = tf.concat(
                values=[input_word, input_pos1, input_pos2], axis=-1)

            temp_type_embedding = tf.get_variable(
                'type_embedding',
                shape=[FLAGS.type_num, FLAGS.type_dim],
                initializer=xavier(),
                dtype=tf.float32)
            type_embedding = tf.concat([
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.type_dim), dtype=tf.float32),
                    [1, FLAGS.type_dim]), temp_type_embedding
            ], 0)

            en1_type = tf.nn.embedding_lookup(
                type_embedding,
                self.en1_type)  # batchsize,max_type_num,type_dim
            en2_type = tf.nn.embedding_lookup(type_embedding, self.en2_type)
            #en1_type = tf.divide(tf.reduce_sum(en1_type, axis=1), tf.expand_dims(self.en1_type_len, axis=1))
            #en2_type = tf.divide(tf.reduce_sum(en2_type, axis=1), tf.expand_dims(self.en2_type_len, axis=1))
            x_type = tf.concat([en1_type, en2_type], -1)
            '''#att_type = tf.get_variable('att_type', [FLAGS.type_dim,1],initializer=xavier())
            att_1_type = tf.get_variable('att_1_type', [FLAGS.type_dim,50],initializer=xavier())
            att_2_type = tf.get_variable('att_2_type', [50,1],initializer=xavier())
            padding = tf.constant(np.zeros(FLAGS.max_type_num)*(-1e8),dtype=tf.float32)
            en1_type_stack, en2_type_stack = [],[]
            for i in range(FLAGS.batch_size):
                #temp_alpha_1 = tf.squeeze(en1_type[i] @ att_type , -1)  # max_type_num,type_dim * type_dim,1 = max_type_num,1
                #temp_alpha_2 = tf.squeeze(en2_type[i] @ att_type , -1)
                temp_alpha_1 = tf.squeeze(tf.nn.tanh(en1_type[i] @ att_1_type ) @ att_2_type, -1)
                temp_alpha_2 = tf.squeeze(tf.nn.tanh(en2_type[i] @ att_1_type ) @ att_2_type, -1)
                temp_alpha_1 = tf.where(tf.equal(self.en1_type_mask[i], 1), temp_alpha_1, padding)
                temp_alpha_2 = tf.where(tf.equal(self.en2_type_mask[i], 1), temp_alpha_2, padding) # max_type_num
                temp_alpha_1 = tf.nn.softmax(temp_alpha_1)
                temp_alpha_2 = tf.nn.softmax(temp_alpha_2)
                en1_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_1,0) @ en1_type[i],0)) # 1,max_type_num * max_type_num,type_dim = 1,type_dim = type_dim
                en2_type_stack.append(tf.squeeze(tf.expand_dims(temp_alpha_2,0) @ en2_type[i],0))
            en1_type_stack = tf.stack(en1_type_stack)
            en2_type_stack = tf.stack(en2_type_stack)
            x_type = tf.concat([en1_type_stack, en2_type_stack], -1)'''

        with tf.variable_scope("encoder"):

            input_dim = input_embedding.shape[-1]
            mask_embedding = tf.constant(
                [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
            pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask)
            input_sentence = tf.expand_dims(input_embedding, axis=1)
            with tf.variable_scope("conv2d"):
                conv_kernel = self._GetVar(
                    init_vec=init_vec,
                    key='convkernel',
                    name='kernel',
                    shape=[1, 3, input_dim, FLAGS.hidden_size],
                    trainable=True)
                conv_bias = self._GetVar(init_vec=init_vec,
                                         key='convbias',
                                         name='bias',
                                         shape=[FLAGS.hidden_size],
                                         trainable=True)
            x = tf.layers.conv2d(inputs=input_sentence,
                                 filters=FLAGS.hidden_size,
                                 kernel_size=[1, 3],
                                 strides=[1, 1],
                                 padding='same',
                                 reuse=tf.AUTO_REUSE)

            sequence = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size])

            x = tf.reshape(x, [-1, FLAGS.max_len, FLAGS.hidden_size, 1])
            x = tf.reduce_max(
                tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_len, 3]) *
                tf.transpose(x, [0, 2, 1, 3]),
                axis=2)
            x = tf.nn.relu(tf.reshape(x, [-1, FLAGS.hidden_size * 3]))

        with tf.variable_scope("selector"):

            attention_1 = tf.get_variable('attention_1',
                                          [self.hidden_size, 300],
                                          initializer=xavier())
            attention_2 = tf.get_variable('attention_2', [300, 1],
                                          initializer=xavier())
            alpha = tf.squeeze(tf.nn.tanh(x @ attention_1) @ attention_2, -1)

            bag_repre = []
            for i in range(FLAGS.batch_size):
                m = x[self.scope[i]:self.scope[i + 1]]  # (n , hidden_size)
                sent_score = tf.nn.softmax(alpha[self.scope[i]:self.scope[i +
                                                                          1]])
                #m = x[self.scope[i][0]:self.scope[i][1]]# (n , hidden_size)
                #sent_score = tf.nn.softmax(alpha[self.scope[i][0]:self.scope[i][1]])
                bag_repre.append(
                    tf.squeeze(tf.matmul(tf.expand_dims(sent_score, 0), m)))
            bag_repre = tf.layers.dropout(tf.stack(bag_repre),
                                          rate=1 - self.keep_prob,
                                          training=self.istrain)

        with tf.variable_scope("loss"):

            discrimitive_matrix = self._GetVar(
                init_vec=init_vec,
                key='disckernel',
                name='discrimitive_matrix',
                shape=[53, self.hidden_size + FLAGS.type_dim * 2])
            bias = self._GetVar(init_vec=init_vec,
                                key='discbias',
                                name='bias',
                                shape=[53],
                                initializer=tf.zeros_initializer())

            bag_repre_type = tf.concat([bag_repre, x_type], -1)

            self.logit = tf.matmul(
                bag_repre_type, discrimitive_matrix, transpose_b=True) + bias
            self.output = tf.nn.softmax(self.logit, -1)

            label_onehot = tf.one_hot(indices=self.label,
                                      depth=FLAGS.num_classes,
                                      dtype=tf.int32)

            regularizer = tf.contrib.layers.l2_regularizer(0.00001)
            l2_loss = tf.contrib.layers.apply_regularization(
                regularizer=regularizer, weights_list=tf.trainable_variables())
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    labels=label_onehot, logits=self.logit)) + l2_loss
Ejemplo n.º 23
0
    def __init__(self, is_training, init_vec):

        self.word = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_word')
        self.pos1 = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_pos1')
        self.pos2 = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_pos2')
        self.mask = tf.placeholder(dtype=tf.int32,
                                   shape=[None, FLAGS.max_length],
                                   name='input_mask')
        self.len = tf.placeholder(dtype=tf.int32,
                                  shape=[None],
                                  name='input_len')
        self.label_index = tf.placeholder(dtype=tf.int32,
                                          shape=[None],
                                          name='label_index')
        self.label = tf.placeholder(
            dtype=tf.float32,
            shape=[FLAGS.batch_size, FLAGS.num_classes],
            name='input_label')
        self.scope = tf.placeholder(dtype=tf.int32,
                                    shape=[FLAGS.batch_size + 1],
                                    name='scope')
        self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob')
        self.hier = init_vec['relation_levels'].shape[1]
        self.relation_levels = tf.constant(
            init_vec['relation_levels'],
            shape=[FLAGS.num_classes, self.hier],
            dtype=tf.int32,
            name='relation_levels')
        self.layer = (1 + np.max(init_vec['relation_levels'], 0)).astype(
            np.int32)

        word_size = FLAGS.word_size
        vocab_size = FLAGS.vocabulary_size - 2

        with tf.variable_scope("embedding-lookup",
                               initializer=xavier(),
                               dtype=tf.float32):

            temp_word_embedding = self._GetVar(init_vec=init_vec,
                                               key='wordvec',
                                               name='temp_word_embedding',
                                               shape=[vocab_size, word_size],
                                               trainable=True)
            unk_word_embedding = self._GetVar(init_vec=init_vec,
                                              key='unkvec',
                                              name='unk_embedding',
                                              shape=[word_size],
                                              trainable=True)
            word_embedding = tf.concat([
                temp_word_embedding,
                tf.reshape(unk_word_embedding, [1, word_size]),
                tf.reshape(tf.constant(np.zeros(word_size), dtype=tf.float32),
                           [1, word_size])
            ], 0)

            temp_pos1_embedding = self._GetVar(
                init_vec=init_vec,
                key='pos1vec',
                name='temp_pos1_embedding',
                shape=[FLAGS.pos_num, FLAGS.pos_size],
                trainable=True)
            temp_pos2_embedding = self._GetVar(
                init_vec=init_vec,
                key='pos2vec',
                name='temp_pos2_embedding',
                shape=[FLAGS.pos_num, FLAGS.pos_size],
                trainable=True)
            pos1_embedding = tf.concat([
                temp_pos1_embedding,
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)),
                    [1, FLAGS.pos_size])
            ], 0)
            pos2_embedding = tf.concat([
                temp_pos2_embedding,
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)),
                    [1, FLAGS.pos_size])
            ], 0)

            input_word = tf.nn.embedding_lookup(word_embedding, self.word)
            input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1)
            input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2)
            self.input_embedding = tf.concat(
                values=[input_word, input_pos1, input_pos2], axis=2)

        self.hidden_size, self.sentence_encoder = self._GetEncoder(
            FLAGS.model, is_training)
Ejemplo n.º 24
0
    def __init__(self, is_training, init_vec=None):

        NN.__init__(self, is_training, init_vec)

        x = self.sentence_encoder(is_training, init_vec)

        with tf.variable_scope("sentence-level-attention",
                               initializer=xavier(),
                               dtype=tf.float32):

            relation_matrix = self._GetVar(
                init_vec=init_vec,
                key='relmat',
                name='relation_matrix',
                shape=[FLAGS.num_classes, self.hidden_size])

            current_relation = tf.nn.embedding_lookup(relation_matrix,
                                                      self.label_index)
            attention_logit = tf.reduce_sum(x * current_relation, 1)

            tower_repre = []
            for i in range(FLAGS.batch_size):
                sen_matrix = x[self.scope[i]:self.scope[i + 1]]
                attention_score = tf.nn.softmax(
                    tf.reshape(
                        attention_logit[self.scope[i]:self.scope[i + 1]],
                        [1, -1]))
                final_repre = tf.reshape(
                    tf.matmul(attention_score, sen_matrix), [self.hidden_size])
                tower_repre.append(final_repre)
            stack_repre = tf.layers.dropout(tf.stack(tower_repre),
                                            rate=1 - self.keep_prob,
                                            training=is_training)

        with tf.variable_scope("loss", dtype=tf.float32, initializer=xavier()):

            discrimitive_matrix = self._GetVar(
                init_vec=init_vec,
                key='discmat',
                name='discrimitive_matrix',
                shape=[FLAGS.num_classes, self.hidden_size])

            bias = self._GetVar(init_vec=init_vec,
                                key='disc_bias',
                                name='bias',
                                shape=[FLAGS.num_classes],
                                initializer=tf.zeros_initializer())

            logits = tf.matmul(
                stack_repre, discrimitive_matrix, transpose_b=True) + bias
            self.output = tf.nn.softmax(logits)

            regularizer = tf.contrib.layers.l2_regularizer(FLAGS.weight_decay)
            l2_loss = tf.contrib.layers.apply_regularization(
                regularizer=regularizer, weights_list=tf.trainable_variables())
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    labels=self.label, logits=logits)) + l2_loss

            tf.summary.scalar('loss', self.loss)
            self.predictions = tf.argmax(logits, 1, name="predictions")
            self.correct_predictions = tf.equal(self.predictions,
                                                tf.argmax(self.label, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions,
                                                   "float"),
                                           name="accuracy")

        if not is_training:

            with tf.variable_scope("test"):
                test_attention_logit = tf.matmul(x,
                                                 relation_matrix,
                                                 transpose_b=True)
                test_tower_output = []
                for i in range(FLAGS.batch_size):
                    test_attention_score = tf.nn.softmax(
                        tf.transpose(test_attention_logit[
                            self.scope[i]:self.scope[i + 1], :]))
                    test_final_repre = tf.matmul(
                        test_attention_score,
                        x[self.scope[i]:self.scope[i + 1]])
                    test_logits = tf.matmul(test_final_repre,
                                            discrimitive_matrix,
                                            transpose_b=True) + bias * 3
                    test_output = tf.diag_part(tf.nn.softmax(test_logits))
                    test_tower_output.append(test_output)
                test_stack_output = tf.reshape(
                    tf.stack(test_tower_output),
                    [FLAGS.batch_size, FLAGS.num_classes])
                self.test_output = test_stack_output
Ejemplo n.º 25
0
    def __init__(self, is_training, init_vec):

        NN.__init__(self, is_training, init_vec)

        x = self.sentence_encoder(is_training, init_vec)

        with tf.variable_scope("sentence-level-attention",
                               initializer=xavier(),
                               dtype=tf.float32):

            relation_matrixs = []

            for i in range(self.hier):
                relation_matrixs.append(
                    self._GetVar(init_vec=init_vec,
                                 key='relmat' + str(i),
                                 name='relation_matrix_l' + str(i),
                                 shape=[self.layer[i], self.hidden_size]))

            label_layer = tf.nn.embedding_lookup(self.relation_levels,
                                                 self.label_index)
            attention_logits = []
            for i in range(self.hier):
                current_relation = tf.nn.embedding_lookup(
                    relation_matrixs[i], label_layer[:, i])
                attention_logits.append(tf.reduce_sum(current_relation * x, 1))

            attention_logits_stack = tf.stack(attention_logits)
            attention_score_hidden = tf.concat([
                tf.nn.softmax(
                    attention_logits_stack[:, self.scope[i]:self.scope[i + 1]])
                for i in range(FLAGS.batch_size)
            ], 1)

            tower_repre = []
            for i in range(FLAGS.batch_size):
                sen_matrix = x[self.scope[i]:self.scope[i + 1]]
                layer_score = attention_score_hidden[:, self.scope[i]:self.
                                                     scope[i + 1]]
                layer_repre = tf.reshape(layer_score @ sen_matrix, [-1])
                tower_repre.append(layer_repre)

            stack_repre = tf.layers.dropout(tf.stack(tower_repre),
                                            rate=1 - self.keep_prob,
                                            training=is_training)

        with tf.variable_scope("loss", dtype=tf.float32, initializer=xavier()):

            discrimitive_matrix = self._GetVar(
                init_vec=init_vec,
                key='disckernel',
                name='discrimitive_matrix',
                shape=[FLAGS.num_classes, self.hidden_size * self.hier])

            bias = self._GetVar(init_vec=init_vec,
                                key='discbias',
                                name='bias',
                                shape=[FLAGS.num_classes],
                                initializer=tf.zeros_initializer())

            logits = tf.matmul(
                stack_repre, discrimitive_matrix, transpose_b=True) + bias
            regularizer = tf.contrib.layers.l2_regularizer(FLAGS.weight_decay)
            l2_loss = tf.contrib.layers.apply_regularization(
                regularizer=regularizer, weights_list=tf.trainable_variables())
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    labels=self.label, logits=logits)) + l2_loss

            self.output = tf.nn.softmax(logits)
            tf.summary.scalar('loss', self.loss)
            self.predictions = tf.argmax(logits, 1, name="predictions")
            self.correct_predictions = tf.equal(self.predictions,
                                                tf.argmax(self.label, 1))
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions,
                                                   "float"),
                                           name="accuracy")

        if not is_training:

            with tf.variable_scope("test"):

                test_attention_scores = []
                for i in range(self.hier):
                    current_relation = tf.nn.embedding_lookup(
                        relation_matrixs[i], self.relation_levels[:, i])
                    current_logit = tf.matmul(current_relation,
                                              x,
                                              transpose_b=True)
                    current_score = tf.concat([
                        tf.nn.softmax(
                            current_logit[:, self.scope[j]:self.scope[j + 1]])
                        for j in range(FLAGS.batch_size)
                    ], 1)
                    test_attention_scores.append(current_score)
                test_attention_scores_stack = tf.stack(test_attention_scores,
                                                       1)

                test_tower_output = []
                for i in range(FLAGS.batch_size):
                    test_sen_matrix = tf.tile(
                        tf.expand_dims(x[self.scope[i]:self.scope[i + 1]], 0),
                        [FLAGS.num_classes, 1, 1])
                    test_layer_score = test_attention_scores_stack[:, :, self.
                                                                   scope[i]:
                                                                   self.
                                                                   scope[i +
                                                                         1]]
                    test_layer_repre = tf.reshape(
                        test_layer_score @ test_sen_matrix,
                        [FLAGS.num_classes, -1])
                    test_logits = tf.matmul(test_layer_repre,
                                            discrimitive_matrix,
                                            transpose_b=True) + bias
                    test_output = tf.diag_part(tf.nn.softmax(test_logits))
                    test_tower_output.append(test_output)

                test_stack_output = tf.reshape(
                    tf.stack(test_tower_output),
                    [FLAGS.batch_size, FLAGS.num_classes])
                self.test_output = test_stack_output
Ejemplo n.º 26
0
    def freeznet(self, config=None):
        """ Simple implementation of ResNet with FreezeOut method.
        Args:
            config: dict with params:
                -iteartions: Total number iteration for train model.
                -degree: 1 or 3.
                -learning_rate: initial learning rate.
                -scaled: True or False.
        Outputs:
            Method return list with len = 2 and some params:
            [0][0]: indices - Plcaeholder which takes batch indices.
            [0][1]: all_data - Placeholder which takes all images.
            [0][2]; all_lables - Placeholder for lables.
            [0][3]: loss - Value of loss function.
            [0][4]: train - List of train optimizers.
            [0][5]: prob - softmax output, need to prediction.
            [1][0]: accuracy - Current accuracy
            [1][1]: session - tf session """
        iteration = config['iteration']
        learning_rate = config['learning_rate']
        scaled = config['scaled']

        with tf.Graph().as_default():

            indices = tf.placeholder(tf.int32, shape=[None, 1], name='indices')
            all_data = tf.placeholder(tf.float32, shape=[50000, 28, 28], name='all_data')
            input_batch = tf.gather_nd(all_data, indices, name='input_batch')
            input_batch = tf.reshape(input_batch, shape=[-1, 28, 28, 1], name='x_to_tens')

            net = tf.layers.conv2d(input_batch, 32, (7, 7), strides=(2, 2), padding='SAME', activation=tf.nn.relu, \
                                   kernel_initializer=xavier(), name='1')
            net = tf.layers.max_pooling2d(net, (2, 2), (2, 2), name='max_pool')

            net = conv_block(net, 3, [32, 32, 128], name='2', strides=(1, 1))
            net = identity_block(net, 3, [32, 32, 128], name='3')

            net = conv_block(net, 3, [64, 64, 256], name='4', strides=(1, 1))
            net = identity_block(net, 3, [64, 64, 256], name='5')

            net = tf.layers.average_pooling2d(net, (7, 7), strides=(1, 1))
            net = tf.contrib.layers.flatten(net)

            with tf.variable_scope('dense'):
                net = tf.layers.dense(net, 10, kernel_initializer=tf.contrib.layers.xavier_initializer(), name='dense')

            prob = tf.nn.softmax(net, name='soft')
            all_labels = tf.placeholder(tf.float32, [None, 10], name='all_labels')
            y = tf.gather_nd(all_labels, indices, name='y')

            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=y), name='loss')
            global_steps = []
            train = []

            for i in range(1, 6):
                global_steps.append(tf.Variable(0, trainable=False, name='var_{}'.format(i)))
                train.append(create_train(tf.train.MomentumOptimizer, str(i), \
                                          global_steps[-1], loss, iteration * (i / 10 + 0.5) ** config['degree'], \
                                           iteration, learning_rate, scaled))

            lables_hat = tf.cast(tf.argmax(net, axis=1), tf.float32, name='lables_hat')
            lables = tf.cast(tf.argmax(y, axis=1), tf.float32, name='lables')
            accuracy = tf.reduce_mean(tf.cast(tf.equal(lables_hat, lables), tf.float32, name='accuracy'))

            session = tf.Session()
            session.run(tf.global_variables_initializer())

        return [[indices, all_data, all_labels, loss, train, prob], [accuracy, session]]
Ejemplo n.º 27
0
 def extractor(self, is_training, init_vec=None):
     with tf.variable_scope("sentence_encoder",
                            dtype=tf.float32,
                            initializer=xavier(),
                            reuse=tf.AUTO_REUSE):
         entity = tf.expand_dims(self.input_entity, axis=1) * tf.ones(
             shape=[1, FLAGS.max_length, 1], dtype=tf.float32)
         word_with_entity = tf.concat([self.sentence, entity], 2)
         dim_word_entity = word_with_entity.shape[2]
         t_cnn = 0.05
         "gate entity aware"
         pos_info = bn_dense_layer_v2(self.input_embedding,
                                      dim_word_entity,
                                      True,
                                      0.,
                                      'pos_info',
                                      'tanh',
                                      wd=0.,
                                      keep_prob=1.,
                                      is_train=is_training)
         word_gated_cnn = bn_dense_layer_v2(word_with_entity / t_cnn,
                                            dim_word_entity,
                                            True,
                                            0.,
                                            'word_gated',
                                            'sigmoid',
                                            False,
                                            wd=0.,
                                            keep_prob=1.,
                                            is_train=is_training)
         final_vector_cnn = word_gated_cnn * word_with_entity + (
             1 - word_gated_cnn) * pos_info
         "pcnn"
         mask_embedding = tf.constant(
             [[0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
         pcnn_mask = tf.nn.embedding_lookup(mask_embedding, self.mask)
         cnn_input = tf.expand_dims(final_vector_cnn, axis=1)
         with tf.variable_scope('conv2d_pos'):
             conv_kernel = self._GetVar(
                 init_vec=None,
                 key='convkernel',
                 name='kernel_pos',
                 shape=[1, 3, dim_word_entity, FLAGS.hidden_size],
                 trainable=True)
             conv_bias = self._GetVar(init_vec=None,
                                      key='convbias',
                                      name='bias_pos',
                                      shape=[FLAGS.hidden_size],
                                      trainable=True)
         x = tf.layers.conv2d(inputs=cnn_input,
                              filters=FLAGS.hidden_size,
                              kernel_size=[1, 3],
                              strides=[1, 1],
                              padding='same',
                              reuse=tf.AUTO_REUSE)
         x = tf.reshape(x, [-1, FLAGS.max_length, FLAGS.hidden_size, 1])
         pcnn_x = tf.reshape(pcnn_mask, [-1, 1, FLAGS.max_length, 3
                                         ]) * tf.transpose(x, [0, 2, 1, 3])
         output = tf.nn.relu(
             tf.reshape(tf.reduce_max(pcnn_x, 2),
                        [-1, FLAGS.hidden_size * 3]))
     return output
Ejemplo n.º 28
0
    def bulid(self, init_vec):

        with tf.variable_scope("embedding-lookup",
                               initializer=xavier(),
                               dtype=tf.float32):

            temp_word_embedding = self._GetVar(
                init_vec=init_vec,
                key='wordvec',
                name='temp_word_embedding',
                shape=[FLAGS.vocabulary_size, FLAGS.word_size],
                trainable=True)
            unk_word_embedding = self._GetVar(init_vec=init_vec,
                                              key='unkvec',
                                              name='unk_embedding',
                                              shape=[FLAGS.word_size],
                                              trainable=True)
            word_embedding = tf.concat([
                temp_word_embedding,
                tf.reshape(unk_word_embedding, [1, FLAGS.word_size]),
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.word_size), dtype=tf.float32),
                    [1, FLAGS.word_size])
            ], 0)
            temp_pos1_embedding = self._GetVar(
                init_vec=init_vec,
                key='pos1vec',
                name='temp_pos1_embedding',
                shape=[FLAGS.pos_num, FLAGS.pos_size],
                trainable=True)
            temp_pos2_embedding = self._GetVar(
                init_vec=init_vec,
                key='pos2vec',
                name='temp_pos2_embedding',
                shape=[FLAGS.pos_num, FLAGS.pos_size],
                trainable=True)
            pos1_embedding = tf.concat([
                temp_pos1_embedding,
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)),
                    [1, FLAGS.pos_size])
            ], 0)
            pos2_embedding = tf.concat([
                temp_pos2_embedding,
                tf.reshape(
                    tf.constant(np.zeros(FLAGS.pos_size, dtype=np.float32)),
                    [1, FLAGS.pos_size])
            ], 0)

            input_word = tf.nn.embedding_lookup(word_embedding,
                                                self.word)  # N,max_len,d
            input_pos1 = tf.nn.embedding_lookup(pos1_embedding, self.pos1)
            input_pos2 = tf.nn.embedding_lookup(pos2_embedding, self.pos2)

        with tf.variable_scope("entity_typing"):

            input_type_1 = tf.concat(values=[input_word, input_pos1], axis=-1)
            input_type_2 = tf.concat(values=[input_word, input_pos2], axis=-1)

            input_type_1 = tf.concat(values=[input_type_1, input_type_2],
                                     axis=0)

            lstm_cell_forward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size)
            lstm_cell_backward = tf.contrib.rnn.BasicLSTMCell(FLAGS.rnn_size)

            len = tf.concat([self.len, self.len], 0)
            (all_states, last_states) = tf.nn.bidirectional_dynamic_rnn(
                lstm_cell_forward,
                lstm_cell_backward,
                input_type_1,
                dtype=tf.float32,
                sequence_length=len)
            (fw_outputs, bw_outputs) = (all_states)  # N,max_len,grusize
            outputs_1 = tf.concat([fw_outputs, bw_outputs],
                                  -1)  # N,max_len,grusize*2

            ET_att_1 = tf.get_variable('ET_att_1', [FLAGS.rnn_size * 2, 128],
                                       initializer=xavier())
            ET_att_2 = tf.get_variable('ET_att_2', [128, 1],
                                       initializer=xavier())
            padding_1 = tf.ones_like(self.mask,
                                     dtype=tf.float32) * tf.constant([-1e8])
            padding = tf.concat([padding_1, padding_1], 0)
            mask = tf.concat([self.mask, self.mask], 0)

            outputs_1_ = tf.reshape(outputs_1, [-1, FLAGS.rnn_size * 2])
            temp_alpha_1 = tf.reshape(
                tf.nn.relu(outputs_1_ @ ET_att_1) @ ET_att_2,
                [-1, FLAGS.max_len])
            temp_alpha_1 = tf.where(tf.equal(mask, 0), padding, temp_alpha_1)
            alpha_1 = tf.nn.softmax(temp_alpha_1, -1)  # N,max_len
            outputs_1 = tf.reshape(
                tf.expand_dims(alpha_1, 1) @ outputs_1,
                [-1, FLAGS.rnn_size * 2])

            ET_sent_att_1 = tf.get_variable('ET_sent_att_1',
                                            [FLAGS.rnn_size * 2, 128],
                                            initializer=xavier())
            ET_sent_att_2 = tf.get_variable('ET_sent_att_2', [128, 1],
                                            initializer=xavier())
            alpha_type_sent_1 = tf.squeeze(
                tf.nn.tanh(outputs_1 @ ET_sent_att_1) @ ET_sent_att_2, -1)

            type_repre_1 = []
            type_repre_2 = []
            for i in range(FLAGS.batch_size):
                m = outputs_1[self.scope[i]:self.scope[i +
                                                       1]]  # (n , hidden_size)
                sent_score = tf.nn.softmax(
                    alpha_type_sent_1[self.scope[i]:self.scope[i + 1]])
                type_repre_1.append(
                    tf.squeeze(tf.matmul(tf.expand_dims(sent_score, 0), m)))

            for i in range(FLAGS.batch_size):
                m = outputs_1[self.scope[i] +
                              FLAGS.batch_size:self.scope[i + 1] +
                              FLAGS.batch_size]  # (n , hidden_size)
                sent_score = tf.nn.softmax(
                    alpha_type_sent_1[self.scope[i] +
                                      FLAGS.batch_size:self.scope[i + 1] +
                                      FLAGS.batch_size])
                type_repre_2.append(
                    tf.squeeze(tf.matmul(tf.expand_dims(sent_score, 0), m)))

            type_repre_1 = tf.layers.dropout(tf.stack(type_repre_1),
                                             rate=1 - self.keep_prob,
                                             training=self.istrain)
            type_repre_2 = tf.layers.dropout(tf.stack(type_repre_2),
                                             rate=1 - self.keep_prob,
                                             training=self.istrain)

            ent1_word = tf.nn.embedding_lookup(word_embedding, self.en1_word)
            ent2_word = tf.nn.embedding_lookup(word_embedding, self.en2_word)

            en1_outputs = tf.concat([type_repre_1, ent1_word], -1)
            en2_outputs = tf.concat([type_repre_2, ent2_word], -1)

            ET_matrix = self._GetVar(
                init_vec=init_vec,
                key='disckernel',
                name='ET_matrix',
                shape=[39, FLAGS.rnn_size * 2 + FLAGS.word_size])
            ET_bias = self._GetVar(init_vec=init_vec,
                                   key='discbias',
                                   name='ET_bias',
                                   shape=[39],
                                   initializer=tf.zeros_initializer())

            logits_1 = tf.matmul(en1_outputs, ET_matrix,
                                 transpose_b=True) + ET_bias
            logits_2 = tf.matmul(en2_outputs, ET_matrix,
                                 transpose_b=True) + ET_bias

            loss_1 = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en1_type,
                                                        logits=logits_1))
            loss_2 = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.en2_type,
                                                        logits=logits_2))

            output_1 = tf.nn.sigmoid(logits_1)  # batchsize, 39
            output_2 = tf.nn.sigmoid(logits_2)
            ones = tf.ones_like(logits_1)
            zeros = tf.zeros_like(logits_1)
            self.output_1 = tf.where(tf.greater(output_1, 0.5), ones,
                                     zeros)  # batch_size, 39
            self.output_2 = tf.where(tf.greater(output_2, 0.5), ones, zeros)

        with tf.variable_scope("loss"):

            regularizer = tf.contrib.layers.l2_regularizer(0.00001)
            l2_loss = tf.contrib.layers.apply_regularization(
                regularizer=regularizer, weights_list=tf.trainable_variables())
            self.loss = l2_loss + loss_1 + loss_2
Ejemplo n.º 29
0
    def __init__(self, is_training, init_vec):
        NN.__init__(self, is_training, init_vec)
        x = self.sentence_encoder(is_training, init_vec)
        with tf.variable_scope('bag-vote',
                               initializer=xavier(),
                               dtype=tf.float32):
            hier3_relation_matrix = self._GetVar(
                init_vec=None,
                key=None,
                name='hier3_relation_matrix',
                initializer=tf.orthogonal_initializer(),
                shape=[FLAGS.num_classes, self.hidden_size])
            hier2_relation_matrix = self._GetVar(
                init_vec=None,
                key=None,
                name='hier2_relation_matrix',
                initializer=tf.orthogonal_initializer(),
                shape=[FLAGS.num_hier2_classes, self.hidden_size])
            hier1_relation_matrix = self._GetVar(
                init_vec=None,
                key=None,
                name='hier1_relation_matrix',
                initializer=tf.orthogonal_initializer(),
                shape=[FLAGS.num_hier1_classes, self.hidden_size])

            "hierarchical_rank1"
            hier1_logits = tf.matmul(x,
                                     hier1_relation_matrix,
                                     transpose_b=True)
            hier1_index = tf.nn.softmax(hier1_logits, -1)
            hier1_relation = tf.matmul(hier1_index, hier1_relation_matrix)
            "gate"
            concat_hier1 = tf.concat([x, hier1_relation], 1)
            alpha_hier1 = bn_dense_layer_v2(concat_hier1,
                                            self.hidden_size,
                                            True,
                                            scope='gate_hier1',
                                            activation='sigmoid',
                                            is_train=is_training)
            context_hier1 = alpha_hier1 * x + (1 -
                                               alpha_hier1) * hier1_relation
            "MLP linear"
            middle_hier1 = bn_dense_layer_v2(context_hier1,
                                             1024,
                                             False,
                                             scope='mlp_activation_hier1',
                                             activation='relu',
                                             is_train=is_training)
            output_hier1 = bn_dense_layer_v2(middle_hier1,
                                             self.hidden_size,
                                             False,
                                             scope='mlp_linear_hier1',
                                             activation='linear',
                                             is_train=is_training)
            "add&norm"
            output_hier1 += x
            output_hier1 = tf.contrib.layers.layer_norm(output_hier1)

            "hierarchical_rank2"
            hier2_logits = tf.matmul(x,
                                     hier2_relation_matrix,
                                     transpose_b=True)
            hier2_index = tf.nn.softmax(hier2_logits, -1)
            hier2_relation = tf.matmul(hier2_index, hier2_relation_matrix)
            "gate_hier2"
            concat_hier2 = tf.concat([x, hier2_relation], 1)
            alpha_hier2 = bn_dense_layer_v2(concat_hier2,
                                            self.hidden_size,
                                            True,
                                            scope='gate_hier2',
                                            activation='sigmoid',
                                            is_train=is_training)
            context_hier2 = alpha_hier2 * x + (1 -
                                               alpha_hier2) * hier2_relation
            "MLP linear"
            middle_hier2 = bn_dense_layer_v2(context_hier2,
                                             1024,
                                             False,
                                             scope='mlp_activation_hier2',
                                             activation='relu',
                                             is_train=is_training)
            output_hier2 = bn_dense_layer_v2(middle_hier2,
                                             self.hidden_size,
                                             False,
                                             scope='mlp_linear_hier2',
                                             activation='linear',
                                             is_train=is_training)
            "add&norm"
            output_hier2 += x
            output_hier2 = tf.contrib.layers.layer_norm(output_hier2)

            "hierarchical_rank3"
            hier3_logits = tf.matmul(x,
                                     hier3_relation_matrix,
                                     transpose_b=True)
            hier3_index = tf.nn.softmax(hier3_logits, -1)
            hier3_relation = tf.matmul(hier3_index, hier3_relation_matrix)
            "gate_hier3"
            concat_hier3 = tf.concat([x, hier3_relation], 1)
            alpha_hier3 = bn_dense_layer_v2(concat_hier3,
                                            self.hidden_size,
                                            True,
                                            scope='gate_hier3',
                                            activation='sigmoid',
                                            is_train=is_training)
            context_hier3 = alpha_hier3 * x + (1 -
                                               alpha_hier3) * hier3_relation
            "MLP linear"
            middle_hier3 = bn_dense_layer_v2(context_hier3,
                                             1024,
                                             False,
                                             scope='mlp_activation_hier3',
                                             activation='relu',
                                             is_train=is_training)
            output_hier3 = bn_dense_layer_v2(middle_hier3,
                                             self.hidden_size,
                                             False,
                                             scope='mlp_linear_hier3',
                                             activation='linear',
                                             is_train=is_training)
            "add&norm"
            output_hier3 += x
            output_hier3 = tf.contrib.layers.layer_norm(output_hier3)

            output_hier = tf.concat([output_hier1, output_hier2, output_hier3],
                                    1)
            prob_bag_hier3 = bn_dense_layer_v2(
                output_hier,
                1,
                True,
                scope='self-attn-hier3',
                activation='linear',
                is_train=is_training)  #->(bs, 1)

            tower_repre = []
            for i in range(FLAGS.batch_size):
                prob_hier3 = tf.nn.softmax(
                    tf.reshape(prob_bag_hier3[self.scope[i]:self.scope[i + 1]],
                               [1, -1]))
                sen_hier3 = tf.reshape(
                    tf.matmul(prob_hier3,
                              output_hier[self.scope[i]:self.scope[i + 1]]),
                    [self.hidden_size * 3])
                tower_repre.append(sen_hier3)
            stack_repre = tf.stack(tower_repre)

            fusion_repre = tf.layers.dropout(stack_repre,
                                             rate=1 - self.keep_prob,
                                             training=is_training)

            with tf.variable_scope("loss",
                                   dtype=tf.float32,
                                   initializer=xavier()):
                discrimitive_matrix = self._GetVar(
                    init_vec=None,
                    key='discmat',
                    name='discrimitive_matrix',
                    initializer=tf.orthogonal_initializer(),
                    shape=[FLAGS.num_classes, 3 * self.hidden_size])
                bias = self._GetVar(init_vec=None,
                                    key='disc_bias',
                                    name='bias',
                                    shape=[FLAGS.num_classes])
                logits = tf.matmul(
                    fusion_repre, discrimitive_matrix, transpose_b=True) + bias
                regularizer = tf.contrib.layers.l2_regularizer(
                    FLAGS.weight_decay)
                l2_loss = tf.contrib.layers.apply_regularization(
                    regularizer=regularizer,
                    weights_list=tf.trainable_variables())
                n_hier1 = tf.cast(FLAGS.num_hier1_classes - 1, tf.float32)
                p_hier1 = 1.0 - 0.1
                q_hier1 = 0.1 / n_hier1
                soft_hier1 = tf.one_hot(tf.cast(self.sen_hier1, tf.int32),
                                        depth=FLAGS.num_hier1_classes,
                                        on_value=p_hier1,
                                        off_value=q_hier1)
                hier1_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        labels=soft_hier1, logits=hier1_logits))

                n_hier2 = tf.cast(FLAGS.num_hier2_classes - 1, tf.float32)
                p_hier2 = 1.0 - 0.1
                q_hier2 = 0.1 / n_hier2
                soft_hier2 = tf.one_hot(tf.cast(self.sen_hier2, tf.int32),
                                        depth=FLAGS.num_hier2_classes,
                                        on_value=p_hier2,
                                        off_value=q_hier2)

                hier2_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        labels=soft_hier2, logits=hier2_logits))
                n_hier3 = tf.cast(FLAGS.num_classes - 1, tf.float32)
                p_hier3 = 1.0 - 0.1
                q_hier3 = 0.1 / n_hier3
                soft_hier3 = tf.one_hot(tf.cast(self.label_index, tf.int32),
                                        depth=FLAGS.num_classes,
                                        on_value=p_hier3,
                                        off_value=q_hier3)
                hier3_loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        labels=soft_hier3, logits=hier3_logits))

                self.loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(labels=self.label,
                                                            logits=logits)
                ) + hier3_loss + hier2_loss + hier1_loss + l2_loss
                self.output = tf.nn.softmax(logits)
                tf.summary.scalar('loss', self.loss)
                self.predictions = tf.argmax(logits, 1, name="predictions")
                self.correct_predictions = tf.equal(self.predictions,
                                                    tf.argmax(self.label, 1))
                self.accuracy = tf.reduce_mean(tf.cast(
                    self.correct_predictions, "float"),
                                               name="accuracy")

        if not is_training:
            self.test_output = self.output