예제 #1
0
    def __init__(self, feature_type=None, elmo_type=None, pre_embed=None):

        if feature_type is None and elmo_type is None:
            exit(0)

        self.lr = Params.lr
        self.word_dim = Params.word_dim

        self.num_classes = Params.num_classes
        self.batch_size = Params.batch_size
        self.hidden_dim = Params.hidden_dim
        self.sen_len = Params.sen_max_len
        self.sen_num = Params.doc_max_sen

        self.batch_sen_num = self.batch_size * self.sen_num

        self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob')
        self.input_word = tf.placeholder(dtype=tf.int32,
                                         shape=[None, self.sen_len],
                                         name='input_word')
        self.input_word_ELMO = tf.placeholder(dtype=tf.int32,
                                              shape=[None, None],
                                              name='input_word_ELMO')
        self.input_label = tf.placeholder(dtype=tf.float32,
                                          shape=[None, self.num_classes],
                                          name='input_label')

        if feature_type in ['word', 'char']:

            self.word_embedding = tf.get_variable(initializer=pre_embed,
                                                  name='word_embedding')
            all_input_words = tf.nn.embedding_lookup(self.word_embedding,
                                                     self.input_word)
            all_input_words = tf.nn.dropout(all_input_words, self.keep_prob)

            layer1_forward = self.LSTM()
            layer1_backward = self.LSTM()
            with tf.variable_scope('LSTM'):
                all_output_words, _ = tf.nn.bidirectional_dynamic_rnn(
                    layer1_forward,
                    layer1_backward,
                    all_input_words,
                    dtype=tf.float32)
            all_output_words = tf.concat(axis=2, values=all_output_words)

            self.attention_w1 = tf.get_variable(name='attention_w1',
                                                shape=[2 * self.hidden_dim, 1])
            word_alpha = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(
                        tf.matmul(
                            tf.reshape(all_output_words,
                                       [-1, 2 * self.hidden_dim]),
                            self.attention_w1), [self.batch_sen_num, -1]), 1),
                [self.batch_sen_num, 1, -1])
            all_output_sens = tf.reshape(
                tf.matmul(word_alpha, all_output_words),
                [-1, 2 * self.hidden_dim])
            all_output_sens = tf.reshape(
                all_output_sens, [-1, self.sen_num, 2 * self.hidden_dim])

        if elmo_type in ['word', 'char']:
            if elmo_type == 'word':
                options_file = Params.elmo_word_options_file
                weight_file = Params.elmo_word_weight_file
                embed_file = Params.elmo_word_embed_file
            else:
                options_file = Params.elmo_char_options_file
                weight_file = Params.elmo_char_weight_file
                embed_file = Params.elmo_char_embed_file

            bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_sen_num)
            bilm_embedding_op = bilm(self.input_word_ELMO)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            bilm_representation = bilm_embedding['weighted_op']
            bilm_representation = tf.nn.dropout(bilm_representation,
                                                self.keep_prob)

            layer2_forward = self.LSTM()
            layer2_backward = self.LSTM()

            with tf.variable_scope('LSTM_ELMO'):
                elmo_output_words, _ = tf.nn.bidirectional_dynamic_rnn(
                    layer2_forward,
                    layer2_backward,
                    bilm_representation,
                    dtype=tf.float32)

            elmo_output_words = tf.concat(axis=2, values=elmo_output_words)

            self.attention_w2 = tf.get_variable(name='attention_w2',
                                                shape=[2 * self.hidden_dim, 1])

            elmo_word_alpha = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(
                        tf.matmul(
                            tf.reshape(elmo_output_words,
                                       [-1, 2 * self.hidden_dim]),
                            self.attention_w2), [self.batch_sen_num, -1]), 1),
                [self.batch_sen_num, 1, -1])
            elmo_output_sens = tf.reshape(
                tf.matmul(elmo_word_alpha, elmo_output_words),
                [-1, 2 * self.hidden_dim])
            elmo_output_sens = tf.reshape(
                elmo_output_sens, [-1, self.sen_num, 2 * self.hidden_dim])

        if feature_type != None and elmo_type != None:
            all_output_sens = tf.concat(
                axis=2, values=[all_output_sens, elmo_output_sens])
        elif elmo_type != None:
            all_output_sens = elmo_output_sens

        all_output_sens = tf.nn.dropout(all_output_sens, self.keep_prob)

        layer3_forward = self.LSTM()
        layer3_backward = self.LSTM()

        with tf.variable_scope('LSTM-SEN'):
            final_output_sens, _ = tf.nn.bidirectional_dynamic_rnn(
                layer3_forward,
                layer3_backward,
                all_output_sens,
                dtype=tf.float32)

        final_output_sens = tf.concat(axis=2, values=final_output_sens)

        self.attention_w3 = tf.get_variable(name='attention_w3',
                                            shape=[2 * self.hidden_dim, 1])

        sen_alpha = tf.reshape(
            tf.nn.softmax(
                tf.reshape(
                    tf.matmul(
                        tf.reshape(final_output_sens,
                                   [-1, 2 * self.hidden_dim]),
                        self.attention_w3), [-1, self.sen_num]), 1),
            [-1, 1, self.sen_num])
        self.doc_rep = tf.reshape(tf.matmul(sen_alpha, final_output_sens),
                                  [-1, 2 * self.hidden_dim])

        self.doc_rep = tf.nn.dropout(self.doc_rep, self.keep_prob)

        out = tf.layers.dense(self.doc_rep,
                              self.num_classes,
                              use_bias=True,
                              activation=None)

        self.prob = tf.nn.softmax(out, 1)
        self.prediction = tf.argmax(self.prob, 1, name="prediction")
        self.accuracy = tf.cast(
            tf.equal(self.prediction, tf.argmax(self.input_label, 1)), "float")

        self.classfier_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=out,
                                                    labels=self.input_label))
        self._classfier_train_op = tf.train.AdamOptimizer(self.lr).minimize(
            self.classfier_loss)
    def create_model(self, share_dense=True):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, n_sub, n_sent],
                                      name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32,
                                       shape=[None, n_sub, 4],
                                       name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file

            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        all_input_expanded = tf.expand_dims(self.word_encoding, -1)

        c_outputs = []
        for c in range(n_sub):
            pooled_outputs = []
            for i, filter_size in enumerate(filter_sizes):
                with tf.variable_scope('conv-maxpool-{}-{}'.format(
                        c, filter_size)):
                    # 卷积层
                    filter_shape = [filter_size, self.embed_size, 1, n_filters]
                    W = tf.get_variable('W',
                                        initializer=tf.truncated_normal(
                                            filter_shape, stddev=0.1))
                    b = tf.get_variable('b',
                                        initializer=tf.constant(
                                            0.1, shape=[n_filters]))
                    conv = tf.nn.conv2d(all_input_expanded,
                                        W,
                                        strides=[1] * 4,
                                        padding='VALID',
                                        name='conv')
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
                    pooled = tf.nn.max_pool(
                        h,
                        ksize=[1, self.max_len - filter_size + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name='pool')
                    pooled_outputs.append(pooled)
            num_filters_total = n_filters * len(filter_sizes)
            h_pool = tf.concat(pooled_outputs, 3)
            h_pool_flatten = tf.reshape(h_pool, [-1, 1, num_filters_total])
            h_drop = tf.nn.dropout(h_pool_flatten, self.dropout_keep_prob)
            dense = tf.layers.dense(h_drop, 4, activation=None)
            c_outputs.append(dense)

        self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4])
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, n_sub, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
            # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
예제 #3
0
파일: main_sum.py 프로젝트: kmounlp/NER
def model_fn(features, labels, mode, params):
    # For serving features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    (features['chars'],
                     features['nchars']), features['elmo_input'])

    # Read vocabs and inputs
    ((words, nwords), (chars, nchars), elmo_inputs) = features

    dropout = params['dropout']
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    vocab_chars = tf.contrib.lookup.index_table_from_file(
        params['chars'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open() as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1
    with Path(params['chars']).open(encoding="utf8") as f:
        num_chars = sum(1 for _ in f) + params['num_oov_buckets']
    options_file = "./options.json"
    weight_file = "./weights.hdf5"

    bilm = BidirectionalLanguageModel(options_file=options_file,
                                      weight_file=weight_file,
                                      use_character_inputs=True)
    ops = bilm(elmo_inputs)
    weight_op = weight_layers("nerelmo", ops)['weighted_op']
    # Char Embeddings
    char_ids = vocab_chars.lookup(chars)
    variable = tf.get_variable('chars_embeddings',
                               [num_chars, params['dim_chars']], tf.float32)
    char_embeddings = tf.nn.embedding_lookup(variable, char_ids)
    char_embeddings = tf.layers.dropout(char_embeddings,
                                        rate=dropout,
                                        training=training)

    # Char LSTM
    dim_words = tf.shape(char_embeddings)[1]
    dim_chars = tf.shape(char_embeddings)[2]
    flat = tf.reshape(char_embeddings, [-1, dim_chars, params['dim_chars']])
    t = tf.transpose(flat, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['char_lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    _, (_, output_fw) = lstm_cell_fw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    _, (_, output_bw) = lstm_cell_bw(t,
                                     dtype=tf.float32,
                                     sequence_length=tf.reshape(nchars, [-1]))
    output = tf.concat([output_fw, output_bw], axis=-1)
    char_embeddings = tf.reshape(output, [-1, dim_words, 50])

    # Word Embeddings
    word_ids = vocab_words.lookup(words)
    fasttext = np.load(params['fasttext'])['embeddings']  # np.array
    variable = np.vstack([fasttext, [[0.] * params['dim']]])
    variable = tf.Variable(variable, dtype=tf.float32)  #, trainable=False)
    word_embeddings = tf.nn.embedding_lookup(variable, word_ids)

    # Concatenate Word and Char Embeddings
    embeddings = tf.concat([word_embeddings, char_embeddings, weight_op],
                           axis=-1)
    embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(embeddings, perm=[1, 0, 2])  # Need time-major
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
예제 #4
0
    def create_model(self, share_dense=True, concat_sub=True):
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y2')

        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len], name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new

        elif self.main_feature.lower() in ['elmo_word', 'elmo_char', 'elmo_qiuqiu']:
            self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len+2], name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file
            self.bilm = BidirectionalLanguageModel(options_file,
                                                    weight_file,
                                                    use_character_inputs=False,
                                                    embedding_weight_file=embed_file,
                                                    max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output', bilm_embedding_op,l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new

        else:
            exit('wrong feature')

        c_outputs = []
        for c in range(n_sub):
            with tf.variable_scope('lstm-{}'.format(c)):
                # self.forward = self.LSTM()
                # self.backward = self.LSTM()
                # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32)
                # x = tf.concat(x, -1)
                #### cudnn lstm ####
                self.forward_lstm = cudnn_rnn.CudnnLSTM(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32)
                self.forward_gru = cudnn_rnn.CudnnGRU(num_layers=1, num_units=self.hidden_dim, direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION, dtype=tf.float32)
                x, _ = self.forward_lstm(tf.transpose(self.word_encoding, [1, 0, 2]))
                x, _ = self.forward_gru(x)
                x = tf.transpose(x, [1, 0, 2])

            with tf.variable_scope('pooling-{}'.format(c)):
                max_pooled = tf.reshape(tf.reduce_max(x, 1), [-1, 2*self.hidden_dim])
                avg_pooled = tf.reshape(tf.reduce_mean(x, 1), [-1, 2*self.hidden_dim])

                att_w = tf.get_variable(shape=[2*self.hidden_dim,self.hidden_dim], name='att_w')
                att_b = tf.get_variable(shape=[self.hidden_dim],name='att_b')
                att_v = tf.get_variable(shape=[self.hidden_dim,1],name='att_v')

                x_reshape = tf.reshape(x, [-1, 2*self.hidden_dim])
                score = tf.reshape(tf.matmul(tf.nn.tanh(tf.matmul(x_reshape, att_w)) + att_b, att_v), [-1, 1, self.max_len])
                alpha = tf.nn.softmax(score, axis=-1)
                att_pooled = tf.reshape(tf.matmul(alpha, x), [-1, 2*self.hidden_dim])

                concat_pooled = tf.concat((max_pooled, att_pooled, avg_pooled), -1)

                concat_pooled = tf.nn.dropout(concat_pooled, self.dropout_keep_prob)
                dense = tf.layers.dense(concat_pooled, 4, activation=None)
                c_outputs.append(dense)

        self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4])
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, n_sub, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y, [-1,4])))
            # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight * (y[:, 0]*tf.log(y_[:, 0]))
                                        -class1_weight * (y[:, 1]*tf.log(y_[:, 1]))
                                        -class2_weight * (y[:, 2]*tf.log(y_[:, 2]))
                                        -class3_weight * (y[:, 3]*tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
예제 #5
0
    def _check_weighted_layer(self, l2_coef, do_layer_norm, use_top_only):
        # create the Batcher
        vocab_file = os.path.join(FIXTURES, 'vocab_test.txt')
        batcher = Batcher(vocab_file, 50)

        # load the model
        options_file = os.path.join(FIXTURES, 'options.json')
        weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5')
        character_ids = tf.placeholder('int32', (None, None, 50))
        model = BidirectionalLanguageModel(
            options_file, weight_file, max_batch_size=4)
        bilm_ops = model(character_ids)

        weighted_ops = []
        for k in range(2):
            ops = weight_layers(str(k), bilm_ops, l2_coef=l2_coef,
                                do_layer_norm=do_layer_norm,
                                use_top_only=use_top_only)
            weighted_ops.append(ops)

        # initialize
        self.sess.run(tf.global_variables_initializer())

        n_expected_trainable_weights = 2 * (1 + int(not use_top_only))
        self.assertEqual(len(tf.trainable_variables()),
                         n_expected_trainable_weights)
        # and one regularizer per weighted layer
        n_expected_reg_losses = 2 * int(not use_top_only)
        self.assertEqual(
            len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)),
            n_expected_reg_losses,
        )

        # Set the variables.
        weights = [[np.array([0.1, 0.3, 0.5]), np.array([1.1])],
                   [np.array([0.2, 0.4, 0.6]), np.array([0.88])]]
        for k in range(2):
            with tf.variable_scope('', reuse=True):
                if not use_top_only:
                    W = tf.get_variable('{}_ELMo_W'.format(k))
                    _ = self.sess.run([W.assign(weights[k][0])])
                gamma = tf.get_variable('{}_ELMo_gamma'.format(k))
                _ = self.sess.run([gamma.assign(weights[k][1])])

        # make some data
        sentences = [
            ['The', 'first', 'sentence', '.'],
            ['The', 'second'],
            ['Third']
        ]
        X_chars = batcher.batch_sentences(sentences)

        ops = model(character_ids)
        lm_embeddings, mask, weighted0, weighted1 = self.sess.run(
            [ops['lm_embeddings'], ops['mask'],
             weighted_ops[0]['weighted_op'], weighted_ops[1]['weighted_op']],
            feed_dict={character_ids: X_chars}
        )
        actual_elmo = [weighted0, weighted1]

        # check the mask first
        expected_mask = [[True, True, True, True],
                         [True, True, False, False],
                         [True, False, False, False]]
        self.assertTrue((expected_mask == mask).all())

        # Now compute the actual weighted layers
        for k in range(2):
            normed_weights = np.exp(weights[k][0] + 1.0 / 3) / np.sum(
                np.exp(weights[k][0] + 1.0 / 3))
            # masked layer normalization
            expected_elmo = np.zeros((3, 4, lm_embeddings.shape[-1]))
            if not use_top_only:
                for j in range(3):  # number of LM layers
                    if do_layer_norm:
                        mean = np.mean(lm_embeddings[:, j, :, :][mask])
                        std = np.std(lm_embeddings[:, j, :, :][mask])
                        normed_lm_embed = (lm_embeddings[:, j, :, :] - mean) / (
                                std + 1E-12)
                        expected_elmo += normed_weights[j] * normed_lm_embed
                    else:
                        expected_elmo += normed_weights[j] * lm_embeddings[
                                                             :, j, :, :]
            else:
                expected_elmo += lm_embeddings[:, -1, :, :]

            # the scale parameter
            expected_elmo *= weights[k][1]
            self.assertTrue(
                np.allclose(expected_elmo, actual_elmo[k], atol=1e-6)
            )
예제 #6
0
    def create_model(self, concat_sub=True):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, 10, 4],
                                      name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32,
                                       shape=[None, n_sub, 4],
                                       name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file

            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        self.layer_embedding = tf.get_variable(shape=[10, self.hidden_dim],
                                               name='layer_embedding')
        layer_reshape = tf.reshape(self.layer_embedding,
                                   [1, 10, 1, self.hidden_dim])
        layer_reshape_tile = tf.tile(layer_reshape,
                                     [self.batch_size, 1, self.max_len, 1])

        self.forward = self.LSTM()
        self.backwad = self.LSTM()
        self.forward2 = self.LSTM()
        self.backwad2 = self.LSTM()

        with tf.variable_scope('sentence_encode'):
            s1_out, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,
                                                        self.backwad,
                                                        self.word_encoding,
                                                        dtype=tf.float32)
        # output_sentence = 0.5*(all_output_words[0] + all_output_words[1])
        s1_out = tf.concat(axis=2, values=s1_out)
        s1_reshape = tf.reshape(s1_out,
                                [-1, 1, self.max_len, 2 * self.hidden_dim])
        s1_tile = tf.tile(s1_reshape, [1, 10, 1, 1])  # 第一层lstm复制10份

        s2_input = tf.reshape(tf.concat((s1_tile, layer_reshape_tile), -1),
                              [-1, self.max_len, 3 * self.hidden_dim])

        with tf.variable_scope('sentence_encode2'):
            s2_out, _ = tf.nn.bidirectional_dynamic_rnn(self.forward2,
                                                        self.backwad2,
                                                        s2_input,
                                                        dtype=tf.float32)
        # output_sentence = 0.5*(all_output_words[0] + all_output_words[1])
        s2_out = tf.reshape(tf.concat(axis=-1, values=s2_out),
                            [-1, 10, self.max_len, 2 * self.hidden_dim])
        res_out = s2_out + s1_tile
        res_dense = tf.layers.dense(res_out,
                                    self.hidden_dim,
                                    activation=tf.nn.relu)

        res_layer_concat = tf.reshape(
            tf.concat((res_dense, layer_reshape_tile), -1),
            [-1, 2 * self.hidden_dim])

        self.att_w = tf.get_variable(
            shape=[2 * self.hidden_dim, self.hidden_dim], name='att_w')
        self.att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b')
        self.att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v')

        score = tf.reshape(
            tf.matmul(
                tf.nn.tanh(
                    tf.matmul(res_layer_concat, self.att_w) + self.att_b),
                self.att_v), [-1, 1, self.max_len])
        alpha = tf.nn.softmax(score)
        layer_sentence = tf.reshape(
            tf.matmul(
                alpha,
                tf.reshape(res_out, [-1, self.max_len, 2 * self.hidden_dim])),
            [-1, n_sub, 2 * self.hidden_dim])

        if concat_sub:
            # 是否拼接layer_sub信息
            layer_sub = tf.reshape(self.layer_embedding,
                                   [1, n_sub, self.hidden_dim])
            layer_sub_tile = tf.tile(layer_sub, [self.batch_size, 1, 1])

            layer_total = tf.concat((layer_sentence, layer_sub_tile), -1)
            outputs = tf.reshape(layer_total, [-1, 3 * self.hidden_dim])
        else:
            outputs = tf.reshape(layer_sentence, [-1, 2 * self.hidden_dim])

        self.logits = tf.layers.dense(outputs, 4, activation=None)
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, 10, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
            self.loss += tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y2,
                                                            [-1, 4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
예제 #7
0
    def create_model(self):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, 10, 4],
                                      name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32,
                                       shape=[None, n_sub, 4],
                                       name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        self.input_ids = tf.placeholder(dtype=tf.int32,
                                        shape=[None, 190],
                                        name='input_ids')
        self.mask_ids = tf.placeholder(dtype=tf.int32,
                                       shape=[None, 190],
                                       name='mask_ids')
        self.type_ids = tf.placeholder(dtype=tf.int32,
                                       shape=[None, 190],
                                       name='type_ids')
        self.is_training = tf.placeholder(dtype=tf.bool, name='is_training')

        #  bert_hidden_size = bert_output_layer.shape[-1].value
        #  hidden_size = output_layer.shape[-1].value

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file

            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        self.layer_embedding = tf.get_variable(shape=[10, self.hidden_dim],
                                               name='layer_embedding')

        self.forward = self.LSTM()
        self.backwad = self.LSTM()
        # self.forward2 = self.LSTM()
        # self.backwad2 = self.LSTM()

        # add point
        self.forward2 = self.GRU()
        self.backwad2 = self.GRU()

        # bert使用
        bert_config = modeling.BertConfig.from_json_file(
            self.config.BERT_CONFIG_FILES)

        bert_model = modeling.BertModel(config=bert_config,
                                        is_training=self.is_training,
                                        input_ids=self.input_ids,
                                        input_mask=self.mask_ids,
                                        token_type_ids=self.type_ids)
        if self.is_training is not None:
            print('bert config hidden dropout -- ---',
                  bert_config.hidden_dropout_prob)
            print('bert config hidden dropout -- ---',
                  bert_config.attention_probs_dropout_prob)
        self.word_encoding = bert_model.get_sequence_output()
        all_layer_output = bert_model.get_all_encoder_layers()
        self.word_encoding = (all_layer_output[0] + all_layer_output[1] +
                              all_layer_output[2] + all_layer_output[3]) / 4
        with tf.variable_scope('sentence_encode'):
            all_output_words, _ = tf.nn.bidirectional_dynamic_rnn(
                self.forward,
                self.backwad,
                self.word_encoding,
                dtype=tf.float32)
        # output_sentence = 0.5*(all_output_words[0] + all_output_words[1])
        output_sentence = tf.concat(axis=2, values=all_output_words)

        with tf.variable_scope('sentence_encode2'):
            all_output_words, _ = tf.nn.bidirectional_dynamic_rnn(
                self.forward2,
                self.backwad2,
                output_sentence,
                dtype=tf.float32)
        # output_sentence = 0.5*(all_output_words[0] + all_output_words[1])
        output_sentence = tf.concat(axis=2, values=all_output_words)
        output_sentence = tf.layers.dense(output_sentence,
                                          self.hidden_dim,
                                          activation=tf.nn.tanh)
        sentence_reshape = tf.reshape(output_sentence,
                                      [-1, 1, self.max_len, self.hidden_dim])
        sentence_reshape_tile = tf.tile(sentence_reshape,
                                        [1, 10, 1, 1])  # 句子复制10份

        layer_reshape = tf.reshape(self.layer_embedding,
                                   [1, 10, 1, self.hidden_dim])
        layer_reshape_tile = tf.tile(layer_reshape,
                                     [self.batch_size, 1, self.max_len, 1])

        embed_concat = tf.reshape(
            tf.concat(axis=3,
                      values=[sentence_reshape_tile, layer_reshape_tile]),
            [-1, 2 * self.hidden_dim])

        self.att_w = tf.get_variable(
            shape=[2 * self.hidden_dim, self.hidden_dim], name='att_w')
        self.att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b')
        self.att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v')

        score = tf.reshape(
            tf.matmul(
                tf.nn.tanh(tf.matmul(embed_concat, self.att_w) + self.att_b),
                self.att_v), [-1, 10, self.max_len])
        alpah = tf.nn.softmax(score, axis=2)
        layer_sentence = tf.matmul(alpah, output_sentence)

        layer_reshape2 = tf.reshape(self.layer_embedding,
                                    [1, 10, self.hidden_dim])
        layer_reshape2_tile = tf.tile(layer_reshape2, [self.batch_size, 1, 1])
        layer_sentence = tf.concat(
            axis=2, values=[layer_sentence, layer_reshape2_tile])
        layer_sentence = tf.reshape(layer_sentence, [-1, 2 * self.hidden_dim])

        layer_sentence = tf.layers.dense(layer_sentence,
                                         self.hidden_dim,
                                         activation=tf.nn.relu)

        # add point
        layer_sentence = tf.nn.dropout(layer_sentence, self.dropout_keep_prob)

        self.logits = tf.layers.dense(layer_sentence, 4, activation=None)
        y_ = tf.nn.softmax(self.logits, axis=1)
        self.prob = tf.reshape(y_, [-1, 10, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
            self.loss += tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y2,
                                                            [-1, 4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
예제 #8
0
    def _check_weighted_layer(self, l2_coef, do_layer_norm, use_top_only):
        # create the Batcher
        vocab_file = os.path.join(FIXTURES, 'vocab_test.txt')
        batcher = Batcher(vocab_file, 50)

        # load the model
        options_file = os.path.join(FIXTURES, 'options.json')
        weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5')
        character_ids = tf.placeholder('int32', (None, None, 50))
        model = BidirectionalLanguageModel(
            options_file, weight_file, max_batch_size=4)
        bilm_ops = model(character_ids)

        weighted_ops = []
        for k in range(2):
            ops = weight_layers(str(k), bilm_ops, l2_coef=l2_coef, 
                                     do_layer_norm=do_layer_norm,
                                     use_top_only=use_top_only)
            weighted_ops.append(ops)

        # initialize
        self.sess.run(tf.global_variables_initializer())

        n_expected_trainable_weights = 2 * (1 + int(not use_top_only))
        self.assertEqual(len(tf.trainable_variables()),
                         n_expected_trainable_weights)
        # and one regularizer per weighted layer
        n_expected_reg_losses = 2 * int(not use_top_only)
        self.assertEqual(
            len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)),
            n_expected_reg_losses,
        )

        # Set the variables.
        weights = [[np.array([0.1, 0.3, 0.5]), np.array([1.1])],
                   [np.array([0.2, 0.4, 0.6]), np.array([0.88])]]
        for k in range(2):
            with tf.variable_scope('', reuse=True):
                if not use_top_only:
                    W = tf.get_variable('{}_ELMo_W'.format(k))
                    _ = self.sess.run([W.assign(weights[k][0])])
                gamma = tf.get_variable('{}_ELMo_gamma'.format(k))
                _ = self.sess.run([gamma.assign(weights[k][1])])

        # make some data
        sentences = [
            ['The', 'first', 'sentence', '.'],
            ['The', 'second'],
            ['Third']
        ]
        X_chars = batcher.batch_sentences(sentences)

        ops = model(character_ids)
        lm_embeddings, mask, weighted0, weighted1 = self.sess.run(
            [ops['lm_embeddings'], ops['mask'],
             weighted_ops[0]['weighted_op'], weighted_ops[1]['weighted_op']],
            feed_dict={character_ids: X_chars}
        )
        actual_elmo = [weighted0, weighted1]

        # check the mask first
        expected_mask = [[True, True, True, True],
                         [True, True, False, False],
                         [True, False, False, False]]
        self.assertTrue((expected_mask == mask).all())

        # Now compute the actual weighted layers
        for k in range(2):
            normed_weights = np.exp(weights[k][0] + 1.0 / 3) / np.sum(
                                  np.exp(weights[k][0] + 1.0 / 3))
            # masked layer normalization
            expected_elmo = np.zeros((3, 4, lm_embeddings.shape[-1]))
            if not use_top_only:
                for j in range(3):  # number of LM layers
                    if do_layer_norm:
                        mean = np.mean(lm_embeddings[:, j, :, :][mask])
                        std = np.std(lm_embeddings[:, j, :, :][mask])
                        normed_lm_embed = (lm_embeddings[:, j, :, :] - mean) / (
                            std + 1E-12)
                        expected_elmo += normed_weights[j] * normed_lm_embed
                    else:
                        expected_elmo += normed_weights[j] * lm_embeddings[
                                                                    :, j, :, :]
            else:
                expected_elmo += lm_embeddings[:, -1, :, :]

            # the scale parameter
            expected_elmo *= weights[k][1]
            self.assertTrue(
                np.allclose(expected_elmo, actual_elmo[k], atol=1e-6)
            )
    def create_model(self, share_dense=True, concat_sub=True):
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32, shape=[None,n_sub,4], name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32, name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len], name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding, name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new

        elif self.main_feature.lower() in ['elmo_word', 'elmo_char', 'elmo_qiuqiu']:
            self.input_x = tf.placeholder(dtype=tf.int32, shape=[None,self.max_len+2], name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file

            self.bilm = BidirectionalLanguageModel(options_file,
                                                    weight_file,
                                                    use_character_inputs=False,
                                                    embedding_weight_file=embed_file,
                                                    max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output', bilm_embedding_op,l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding, self.dropout_keep_prob) # new

        else:
            exit('wrong feature')

        inputs_expanded = tf.expand_dims(self.word_encoding, -1)
        n_filters = 128
        filter_shape = [3, self.embed_size, 1, n_filters]
        W = tf.get_variable(initializer=tf.truncated_normal(filter_shape, stddev=0.1), name='W')
        b = tf.get_variable('b', initializer=tf.constant(0.1, shape=[n_filters]))
        conv = tf.nn.conv2d(inputs_expanded, W, strides=[1]*4, padding='VALID', name='conv2d')
        h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
        h = tf.reshape(h, [-1, self.max_len-3+1, n_filters])

        self.forward = self.LSTM()
        self.backward = self.LSTM()
        x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, h, dtype=tf.float32)
        x = tf.concat(x, -1)
        output_sentence = tf.layers.dense(x, self.hidden_dim, activation=tf.nn.relu)

        x_reshape = tf.reshape(output_sentence, [-1, 1, self.max_len-3+1, self.hidden_dim])
        x_tile = tf.tile(x_reshape, [1, n_sub, 1, 1])  # 句子复制n_sub份

        sub_embedding = tf.get_variable(shape=[n_sub, self.hidden_dim], name='sub_embedding')
        sub_reshape = tf.reshape(sub_embedding, [1, n_sub, 1, self.hidden_dim])
        sub_tile = tf.tile(sub_reshape, [self.batch_size, 1, self.max_len-3+1, 1])

        embed_concat = tf.reshape(tf.concat((x_tile, sub_tile), -1), [-1, 2*self.hidden_dim])

        att_w = tf.get_variable(shape=[2*self.hidden_dim, self.hidden_dim], name='att_w')
        att_b = tf.get_variable(shape=[self.hidden_dim], name='att_b')
        att_v = tf.get_variable(shape=[self.hidden_dim, 1], name='att_v')

        score = tf.matmul(tf.nn.tanh(tf.matmul(embed_concat, att_w) + att_b), att_v)
        score_fit = tf.reshape(score, [-1, n_sub, self.max_len-3+1])
        alpha = tf.nn.softmax(score_fit)

        layer_sentence = tf.matmul(alpha, output_sentence)

        if concat_sub:
            # 是否拼接layer_sub信息
            layer_sub = tf.reshape(sub_embedding, [1, n_sub, self.hidden_dim])
            layer_sub_tile = tf.tile(layer_sub, [self.batch_size, 1, 1])

            layer_total = tf.concat((layer_sentence, layer_sub_tile), -1)
            outputs = tf.reshape(layer_total, [-1, 2*self.hidden_dim])
        else:
            outputs = tf.reshape(layer_sentence, [-1, self.hidden_dim])

        self.logits = tf.layers.dense(layer_sentence, 4, activation=None)
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, 10, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y, [-1,4])))
            # self.loss += tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.reshape(self.input_y2, [-1,4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight * (y[:, 0]*tf.log(y_[:, 0]))
                                        -class1_weight * (y[:, 1]*tf.log(y_[:, 1]))
                                        -class2_weight * (y[:, 2]*tf.log(y_[:, 2]))
                                        -class3_weight * (y[:, 3]*tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
    def create_model(self, share_dense=True, concat_sub=True):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, n_sub, 4],
                                      name='input_y')
        self.input_y2 = tf.placeholder(dtype=tf.float32,
                                       shape=[None, n_sub, 4],
                                       name='input_y2')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file
            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        c_outputs = []
        for c in range(n_sub):
            with tf.variable_scope('lstm-{}'.format(c)):
                # self.forward = self.LSTM()
                # self.backward = self.LSTM()
                # x, _ = tf.nn.bidirectional_dynamic_rnn(self.forward,self.backward, self.word_encoding, dtype=tf.float32)
                # x = tf.concat(x, -1)
                #### cudnn lstm ####
                self.forward = cudnn_rnn.CudnnLSTM(
                    num_layers=1,
                    num_units=self.hidden_dim,
                    direction=cudnn_rnn.CUDNN_RNN_BIDIRECTION,
                    dtype=tf.float32)
                x, _ = self.forward(tf.transpose(self.word_encoding,
                                                 [1, 0, 2]))
                x = tf.transpose(x, [1, 0, 2])

            with tf.variable_scope('conv-{}'.format(c)):
                inputs_expanded = tf.expand_dims(x, -1)
                filter_shape = [3, 2 * self.hidden_dim, 1, n_filters]
                W = tf.get_variable(initializer=tf.truncated_normal(
                    filter_shape, stddev=0.1),
                                    name='W')
                b = tf.get_variable('b',
                                    initializer=tf.constant(0.1,
                                                            shape=[n_filters]))
                conv = tf.nn.conv2d(inputs_expanded,
                                    W,
                                    strides=[1] * 4,
                                    padding='VALID',
                                    name='conv2d')
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
                max_pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, self.max_len - 3 + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='max_pool')
                avg_pooled = tf.nn.avg_pool(
                    h,
                    ksize=[1, self.max_len - 3 + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name='avg_pool')
                concat_pooled = tf.reshape(
                    tf.concat((max_pooled, avg_pooled), -1),
                    [-1, 2 * n_filters])

                concat_pooled = tf.nn.dropout(concat_pooled,
                                              self.dropout_keep_prob)
                dense = tf.layers.dense(concat_pooled, 4, activation=None)
                c_outputs.append(dense)

        self.logits = tf.reshape(tf.concat(c_outputs, axis=1), [-1, 10, 4])
        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, n_sub, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
            self.loss += tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y2,
                                                            [-1, 4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self
예제 #11
0
파일: main2.py 프로젝트: kmounlp/NER
def model_fn(features, labels, mode, params):
    # For serving, features are a bit different
    if isinstance(features, dict):
        features = ((features['words'], features['nwords']),
                    features['elmo_input'])

    # Read vocabs and inputs
    dropout = params['dropout']
    ((words, nwords), elmo_inputs) = features
    training = (mode == tf.estimator.ModeKeys.TRAIN)
    vocab_words = tf.contrib.lookup.index_table_from_file(
        params['words'], num_oov_buckets=params['num_oov_buckets'])
    with Path(params['tags']).open(encoding="utf8") as f:
        indices = [idx for idx, tag in enumerate(f) if tag.strip() != 'O']
        num_tags = len(indices) + 1

    options_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\checkpoint\options.json"
    weight_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\weights.hdf5"
    bilm = BidirectionalLanguageModel(options_file=options_file,
                                      weight_file=weight_file,
                                      use_character_inputs=True)
    ops = bilm(elmo_inputs)
    weight_op = weight_layers("nerelmo", ops, use_top_only=True)['weighted_op']
    # elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
    # embeddings = elmo(
    #     inputs={
    #         "tokens": words,
    #         "sequence_len": nwords
    #     },
    #     signature="tokens",
    #     as_dict=True)["elmo"]
    # Word Embeddings
    # from allennlp.modules.elmo import Elmo, batch_to_ids
    #
    # options_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\checkpoint\options.json"
    # weight_file = r"C:\Users\NLP-Ho\Downloads\bilm-tf-master\output_path\to\weights.hdf5"
    #
    # elmo = Elmo(options_file, weight_file, 2, dropout=0)
    #
    # # use batch_to_ids to convert sentences to character ids
    # character_ids = batch_to_ids(words)
    # # print(character_ids[0].shape)
    # # print(len(character_ids))
    #
    # embeddings = elmo(character_ids)
    # print(embeddings['elmo_representations'])
    # word_ids = vocab_words.lookup(words)
    # BiLM = BidirectionalLanguageModel(options_file, weight_file)
    # ops = BiLM(word_ids)
    # weight_op = weight_layers("name", ops)['weighted_op']

    # glove = np.load(params['W2V'])['embeddings']  # np.array
    # variable = np.vstack([glove, [[0.]*params['dim']]])
    # variable = tf.Variable(variable, dtype=tf.float32, trainable=False)
    # embeddings = tf.nn.embedding_lookup(variable, word_ids)
    # embeddings = tf.layers.dropout(embeddings, rate=dropout, training=training)

    # LSTM
    t = tf.transpose(weight_op, perm=[1, 0, 2])
    lstm_cell_fw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.LSTMBlockFusedCell(params['lstm_size'])
    lstm_cell_bw = tf.contrib.rnn.TimeReversedFusedRNN(lstm_cell_bw)
    output_fw, _ = lstm_cell_fw(t, dtype=tf.float32, sequence_length=nwords)
    output_bw, _ = lstm_cell_bw(t, dtype=tf.float32, sequence_length=nwords)
    output = tf.concat([output_fw, output_bw], axis=-1)
    output = tf.transpose(output, perm=[1, 0, 2])
    output = tf.layers.dropout(output, rate=dropout, training=training)

    # CRF
    logits = tf.layers.dense(output, num_tags)
    crf_params = tf.get_variable("crf", [num_tags, num_tags], dtype=tf.float32)
    pred_ids, _ = tf.contrib.crf.crf_decode(logits, crf_params, nwords)

    if mode == tf.estimator.ModeKeys.PREDICT:
        # Predictions
        reverse_vocab_tags = tf.contrib.lookup.index_to_string_table_from_file(
            params['tags'])
        pred_strings = reverse_vocab_tags.lookup(tf.to_int64(pred_ids))
        predictions = {'pred_ids': pred_ids, 'tags': pred_strings}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    else:
        # Loss
        vocab_tags = tf.contrib.lookup.index_table_from_file(params['tags'])
        tags = vocab_tags.lookup(labels)
        log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
            logits, tags, nwords, crf_params)
        loss = tf.reduce_mean(-log_likelihood)

        # Metrics
        weights = tf.sequence_mask(nwords)
        metrics = {
            'acc': tf.metrics.accuracy(tags, pred_ids, weights),
            'precision': precision(tags, pred_ids, num_tags, indices, weights),
            'recall': recall(tags, pred_ids, num_tags, indices, weights),
            'f1': f1(tags, pred_ids, num_tags, indices, weights),
        }
        for metric_name, op in metrics.items():
            tf.summary.scalar(metric_name, op[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              eval_metric_ops=metrics)

        elif mode == tf.estimator.ModeKeys.TRAIN:
            train_op = tf.train.AdamOptimizer().minimize(
                loss, global_step=tf.train.get_or_create_global_step())
            return tf.estimator.EstimatorSpec(mode,
                                              loss=loss,
                                              train_op=train_op)
    def create_model(self, share_dense=True):
        self.input_y = tf.placeholder(dtype=tf.float32,
                                      shape=[None, n_sub, n_sent],
                                      name='input_y')
        self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                name='dropout_keep_prob')
        self.output_keep_prob = tf.placeholder(dtype=tf.float32,
                                               name='output_keep_prob')

        if self.main_feature.lower() in ['word', 'char']:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len],
                                          name='input_x')
            self.word_embedding = tf.get_variable(initializer=self.embedding,
                                                  name='word_embedding')
            self.word_encoding = tf.nn.embedding_lookup(
                self.embedding, self.input_x)
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        elif self.main_feature.lower() in [
                'elmo_word', 'elmo_char', 'elmo_qiuqiu'
        ]:
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_len + 2],
                                          name='input_x')
            if self.main_feature == 'elmo_word':
                options_file = self.config.elmo_word_options_file
                weight_file = self.config.elmo_word_weight_file
                embed_file = self.config.elmo_word_embed_file
            elif self.main_feature == 'elmo_char':
                options_file = self.config.elmo_char_options_file
                weight_file = self.config.elmo_char_weight_file
                embed_file = self.config.elmo_char_embed_file
            elif self.main_feature == 'elmo_qiuqiu':
                options_file = self.config.elmo_qiuqiu_options_file
                weight_file = self.config.elmo_qiuqiu_weight_file
                embed_file = self.config.elmo_qiuqiu_embed_file

            self.bilm = BidirectionalLanguageModel(
                options_file,
                weight_file,
                use_character_inputs=False,
                embedding_weight_file=embed_file,
                max_batch_size=self.batch_size)
            bilm_embedding_op = self.bilm(self.input_x)
            bilm_embedding = weight_layers('output',
                                           bilm_embedding_op,
                                           l2_coef=0.0)
            self.word_encoding = bilm_embedding['weighted_op']
            self.word_encoding = tf.nn.dropout(self.word_encoding,
                                               self.dropout_keep_prob)  # new

        else:
            exit('wrong feature')

        rcnn_outputs = []
        for i in range(n_sub):
            with tf.variable_scope('rcnn_output_%d' % i):
                output_bigru = self.bi_gru(self.word_encoding, hidden_size)
                output = self.textcnn(output_bigru, self.max_len)
                rcnn_outputs.append(output)

        n_filter_total = n_filter * len(filter_sizes)
        outputs = tf.reshape(tf.concat(rcnn_outputs, 1),
                             (-1, n_sub, n_filter_total))

        if share_dense:
            cnn_outputs = tf.reshape(outputs, (-1, n_filter_total))
            W = tf.get_variable('W', shape=[n_filter_total, self.n_classes])
            b = tf.get_variable('b',
                                initializer=tf.constant(0.1,
                                                        shape=[self.n_classes
                                                               ]))
            self.logits = tf.nn.xw_plus_b(cnn_outputs, W, b, name='scores')
        else:
            cnn_outputs = tf.reshape(tf.concat(outputs, 1),
                                     (-1, n_sub, n_filter_total))
            W = tf.get_variable(
                'W', shape=[self.batch_size, n_filter_total, self.n_classes])
            b = tf.get_variable('b',
                                initializer=tf.constant(0.1,
                                                        shape=[self.n_classes
                                                               ]))
            self.logits = tf.nn.xw_plus_b(cnn_outputs, W, b, name='scores')

        y_ = tf.nn.softmax(self.logits)
        self.prob = tf.reshape(y_, [-1, n_sub, 4])
        self.prediction = tf.argmax(self.prob, 2, name="prediction")

        if not self.config.balance:
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=self.logits,
                                                        labels=tf.reshape(
                                                            self.input_y,
                                                            [-1, 4])))
        else:
            #  class0_weight = 0.882 * self.n_classes  # 第0类的权重系数
            #  class1_weight = 0.019 * self.n_classes  # 第1类的权重系数
            #  class2_weight = 0.080 * self.n_classes  # 第2类的权重系数
            #  class3_weight = 0.019 * self.n_classes  # 第3类的权重系数
            class0_weight = 1  # 第0类的权重系数
            class1_weight = 3  # 第1类的权重系数
            class2_weight = 3  # 第2类的权重系数
            class3_weight = 3  # 第3类的权重系数
            #  coe = tf.constant([1., 1., 1., 1.])
            #  y = tf.reshape(self.input_y, [-1, 4]) * coe
            #  self.loss = -tf.reduce_mean(y * tf.log(y_))

            y = tf.reshape(self.input_y, [-1, 4])
            self.loss = tf.reduce_mean(-class0_weight *
                                       (y[:, 0] * tf.log(y_[:, 0])) -
                                       class1_weight *
                                       (y[:, 1] * tf.log(y_[:, 1])) -
                                       class2_weight *
                                       (y[:, 2] * tf.log(y_[:, 2])) -
                                       class3_weight *
                                       (y[:, 3] * tf.log(y_[:, 3])))
            #  tf.reduce_mean(-class1_weight*tf.reduce_sum(y_[:,0] * tf.log(y[:,0])-class2_weight*tf.reduce_sum(y_[:,1] * tf.log(y[:,1])-class3_weight*tf.reduce_sum(y_[:,2] * tf.log(y[:,2]))

        return self