def __init__(self, config, features, dropout_keep_prob, init_embeddings=None): """PartLabelModel 的构造器. Args: config: `Config` 实例,用来描述模型. features: dict 用来向模型传递输入,和训练时所用的标签 dropout_keep_prob: float32 Tensor of shape [] 用来传递dropout保留率. init_embedding: (optional) float32 np.ndarray of shape [vocab_size, embedding_size]. bi_embedding: (optional) float32 np.ndarray of shape [bigram_size, embedding_size]. Raises: ValueError: 在config中没有设置multi-tag """ super(PartLabelModel).__init__() input_ids = features["input_ids"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.seq_length = seq_length x, batch_size, feat_size = model_utils.input_embedding( input_ids, config, init_embeddings=init_embeddings) x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size]) x = tf.nn.dropout(x, dropout_keep_prob) with tf.variable_scope('rnn'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=x, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) with tf.variable_scope('noise_correct'): pure_noise_matrix = tf.Variable(config.noise_matrix, dtype=tf.float32, name='noise_matrix', trainable=False) tf.logging.info(f"\n{config.noise_matrix}") if config.fix_noise: noise_matrix = pure_noise_matrix else: eye_matrix = tf.Variable(np.eye(4), dtype=tf.float32, name='eye_matrix', trainable=False) rate = tf.Variable(np.ones([4, 1]), dtype=tf.float32, name='rate') norm_rate = tf.sigmoid(rate) noise_matrix = tf.broadcast_to(norm_rate, [4, 4]) * pure_noise_matrix + \ tf.broadcast_to((1 - norm_rate), [4, 4]) * eye_matrix with tf.variable_scope('loss'): # crf if config.multitag: # 计算每一位置上的候选标签数量 candidate_label_num = tf.reduce_sum(self.label_ids, axis=2) ## 对全标签数据使用CRF计算负对数损失 gt_bool = tf.cast(self.label_ids, dtype=tf.bool) # 将弱标签数据的长度设置为0,以屏蔽对弱标签数据的求值 full_label_data = tf.equal( tf.reduce_max(candidate_label_num, axis=-1), 1) full_label_seq_len = tf.where(full_label_data, self.seq_length, tf.zeros_like(self.seq_length)) self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( scores, gt_bool, full_label_seq_len, transition_param) nll_loss = -self.log_likelihood ## 对弱标签数据使用点积损失 gt_float = tf.cast(self.label_ids, dtype=tf.float32) prob = tf.nn.softmax(scores, axis=-1) # 获取弱标签数据的掩码 partial_label_mask = tf.cast(tf.logical_and( candidate_label_num > 1, candidate_label_num < config.num_classes), dtype=tf.float32) partial_label_factor = 1.0 / ( 1e-12 + tf.reduce_sum(partial_label_mask, axis=-1)) if config.log_dot_loss: # 使用对数约束 dot_loss = -partial_label_factor * tf.reduce_sum( partial_label_mask * tf.log( tf.clip_by_value(tf.einsum( "bld, bld->bl", gt_float, tf.einsum("ji, blj->bli", noise_matrix, prob)), clip_value_min=1e-16, clip_value_max=1)), axis=-1) else: # 使用线性约束 dot_loss = partial_label_factor * tf.reduce_sum( partial_label_mask * (1 - tf.clip_by_value(tf.einsum( "bld, bld->bl", gt_float, tf.einsum("ji, blj->bli", noise_matrix, prob)), clip_value_min=0, clip_value_max=1)), axis=-1) else: raise ValueError("PartLabelModel request multi-tag") self.loss = tf.reduce_mean(nll_loss + dot_loss)
def __init__(self, config, features, dropout_keep_prob, init_embeddings=None): """Constructor for BertModel. Args: config: `BertConfig` instance. is_training: bool. rue for training model, false for eval model. Controls whether dropout will be applied. input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size]. label_ids: (optional) int64 Tensor of shape [batch_size, seq_length]. seq_length: (optional) int64 Tensor of shape [batch_size]. init_embedding: (optional) Raises: ValueError: The config is invalid or one of the input tensor shapes is invalid. """ super(AAAIModel).__init__() input_ids = features["input_ids"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.seq_length = seq_length x, batch_size, feat_size = model_utils.input_embedding( input_ids, config, init_embeddings=init_embeddings) x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size]) x = tf.nn.dropout(x, dropout_keep_prob) with tf.variable_scope('rnn'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=x, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) # self.prediction = tf.cast(tf.argmax(scores, axis=2), tf.int32) with tf.variable_scope('loss'): # crf candidate_label_num = tf.reduce_sum(self.label_ids, axis=2) exact_label_mask = tf.cast(tf.equal(candidate_label_num, 1), dtype=tf.float32) part_label_mask = tf.cast(tf.logical_and( candidate_label_num > 1, candidate_label_num < config.num_classes), dtype=tf.float32) # first_term_factor = 1.0 / (1e-12 + tf.reduce_sum(exact_label_mask, axis=-1)) j_l0_norm = 1.0 / (1e-12 + tf.reduce_sum(part_label_mask * tf.cast( config.num_classes - candidate_label_num, dtype=tf.float32), axis=-1)) gt = tf.cast(self.label_ids, dtype=tf.float32) masked_gt = tf.expand_dims(tf.cast(part_label_mask, dtype=tf.int64), -1) * \ tf.ones_like(self.label_ids) + self.label_ids masked_gt = tf.cast(masked_gt, dtype=tf.bool) self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( scores, masked_gt, self.seq_length, transition_param) first_term = -self.log_likelihood tf.reduce_logsumexp(scores, axis=-1) second_term = -j_l0_norm * tf.reduce_sum( part_label_mask * tf.einsum("bld, bld->bl", 1.0 - gt, tf.log(1.0 - tf.nn.sigmoid(scores))), axis=-1) self.loss = tf.reduce_mean(first_term + second_term)
def __init__(self, config, features, dropout_keep_prob, init_embeddings=None): super(AttendedDictModel).__init__() input_ids = features["input_ids"] input_dicts = features["input_dicts"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.label_ids = label_ids self.dict = input_dicts self.seq_length = seq_length dict_shape = model_utils.get_shape_list(input_dicts, expected_rank=3) self.dict_dim = dict_shape[2] x, batch_size, feat_size = model_utils.input_embedding( input_ids, config, init_embeddings=init_embeddings) x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size]) x = tf.nn.dropout(x, dropout_keep_prob) with tf.variable_scope('character'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=x, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('dict_attention'): dict_attention = layers.fully_connected(inputs=output, num_outputs=self.dict_dim, activation_fn=tf.sigmoid) # [B, L, D] self.dict = tf.cast(self.dict, dtype=tf.float32) attend_dict = tf.multiply(self.dict, dict_attention) with tf.variable_scope('dict'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=attend_dict, sequence_length=self.seq_length, dtype=tf.float32) dict_output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): output = tf.concat([dict_output, output], axis=2) scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) with tf.variable_scope('loss'): # crf if config.multitag: self.label_ids = tf.cast(self.label_ids, dtype=tf.bool) self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) else: self.log_likelihood, _ = crf.crf_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) self.loss = tf.reduce_mean(-self.log_likelihood)
def __init__(self, config, features, dropout_keep_prob, init_embeddings=None): """Constructor for BertModel. Args: config: `Config` 实例,用来描述模型. features: dict 用来向模型传递输入,和训练时所用的标签 dropout_keep_prob: float32 Tensor of shape [] 用来传递dropout保留率. init_embedding: (optional) float32 np.ndarray of shape [vocab_size, embedding_size] 初始化一元字嵌入. bi_embedding: (optional) float32 np.ndarray of shape [bigram_size, embedding_size] 初始化二元字嵌入. """ super(BaselineModel).__init__() input_ids = features["input_ids"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.seq_length = seq_length # 对输入进行嵌入处理 x, batch_size, feat_size = model_utils.input_embedding( input_ids, config, init_embeddings=init_embeddings) # 将嵌入后的数据展平处理 x = tf.reshape(x, [batch_size, -1, feat_size * config.embedding_size]) x = tf.nn.dropout(x, dropout_keep_prob) with tf.variable_scope('rnn'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell( config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell( config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=x, sequence_length=self.seq_length, dtype=tf.float32 ) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): scores = layers.fully_connected( inputs=output, num_outputs=config.num_classes, activation_fn=None ) transition_param = tf.get_variable("transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) # with tf.variable_scope('noise_correct'): # pure_noise_matrix = tf.Variable(config.noise_matrix, dtype=tf.float32, name='noise_matrix', trainable=False) # tf.logging.info(f"\n{config.noise_matrix}") # eye_matrix = tf.Variable(np.eye(4), dtype=tf.float32, name='noise_matrix', trainable=False) # rate = tf.Variable(np.ones([4, 1]), dtype=tf.float32, name='rate') # norm_rate = tf.sigmoid(rate) # noise_matrix = tf.broadcast_to(norm_rate, [4, 4]) * pure_noise_matrix + \ # tf.broadcast_to((1 - norm_rate), [4, 4]) * eye_matrix # candidate_label_num = tf.reduce_sum(self.label_ids, axis=2) # part_label_mask = tf.expand_dims(tf.cast( # tf.logical_and(candidate_label_num > 1, candidate_label_num < config.num_classes), dtype=tf.float32), # axis=-1) # scores = part_label_mask * tf.einsum("ji, blj->bli", noise_matrix, scores) + (1 - part_label_mask) * scores with tf.variable_scope('loss'): # crf if config.multitag: # 如果是多标签则使用 crf_multitag_log_likelihood self.label_ids = tf.cast(self.label_ids, dtype=tf.bool) self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) else: self.log_likelihood, _ = crf.crf_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) self.loss = tf.reduce_mean(-self.log_likelihood)
def __init__(self, config, features, dropout_keep_prob, init_embeddings=None): super(AttendedInputModel).__init__() input_ids = features["input_ids"] input_dicts = features["input_dicts"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.label_ids = label_ids self.dict = input_dicts self.seq_length = seq_length dict_shape = model_utils.get_shape_list(input_dicts, expected_rank=3) self.dict_dim = dict_shape[2] x, self.batch_size, feat_size = model_utils.input_embedding( input_ids, config, init_embeddings=init_embeddings) # with tf.variable_scope('dict'): # self.dict = tf.cast(self.dict, dtype=tf.float32) # (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( # cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), # cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), # inputs=self.dict, # sequence_length=self.seq_length, # dtype=tf.float32 # ) # dict_output = tf.concat([forward_output, backword_output], axis=2) dict_output = tf.cast(self.dict, dtype=tf.float32) with tf.variable_scope('input_attention'): input_attention = layers.fully_connected(inputs=dict_output, num_outputs=feat_size, activation_fn=tf.sigmoid) input_bias = layers.fully_connected(inputs=dict_output, num_outputs=feat_size, activation_fn=tf.sigmoid) # [B, L, F] * [B, L, F, E] -> [B, L, F, E] input_attention = tf.expand_dims(input_attention, -1) attend_input = tf.multiply(x, input_attention) + tf.expand_dims( input_bias, axis=-1) attend_input = tf.reshape( attend_input, [self.batch_size, -1, feat_size * config.embedding_size]) attend_input = tf.nn.dropout(attend_input, dropout_keep_prob) with tf.variable_scope('character'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=attend_input, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): output = tf.concat([dict_output, output], axis=2) scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) # with tf.variable_scope('loss'): # # crf # if config.multitag: # self.label_ids = tf.cast(self.label_ids, dtype=tf.bool) # self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( # scores, self.label_ids, self.seq_length, transition_param) # else: # self.log_likelihood, _ = crf.crf_log_likelihood( # scores, self.label_ids, self.seq_length, transition_param) # self.loss = tf.reduce_mean(-self.log_likelihood) with tf.variable_scope('noise_correct'): pure_noise_matrix = tf.Variable(config.noise_matrix, dtype=tf.float32, name='noise_matrix', trainable=False) tf.logging.info(f"\n{config.noise_matrix}") if config.fix_noise: noise_matrix = pure_noise_matrix else: eye_matrix = tf.Variable(np.eye(4), dtype=tf.float32, name='eye_matrix', trainable=False) rate = tf.Variable(np.ones([4, 1]), dtype=tf.float32, name='rate') norm_rate = tf.sigmoid(rate) noise_matrix = tf.broadcast_to(norm_rate, [4, 4]) * pure_noise_matrix + \ tf.broadcast_to((1 - norm_rate), [4, 4]) * eye_matrix with tf.variable_scope('loss'): # crf if config.multitag: prob = tf.nn.softmax(scores, axis=-1) candidate_label_num = tf.reduce_sum(self.label_ids, axis=2) full_label_data = tf.equal( tf.reduce_max(candidate_label_num, axis=-1), 1) self.label_ids = tf.cast(self.label_ids, dtype=tf.bool) full_label_seq_len = tf.where(full_label_data, self.seq_length, tf.zeros_like(self.seq_length)) self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( scores, self.label_ids, full_label_seq_len, transition_param) gt = tf.cast(self.label_ids, dtype=tf.float32) nll_loss = -self.log_likelihood part_label_mask = tf.cast(tf.logical_and( candidate_label_num > 1, candidate_label_num < config.num_classes), dtype=tf.float32) j_l0_norm = 1.0 / (1e-12 + tf.reduce_sum(part_label_mask, axis=-1)) if config.log_dot_loss: ## log dot loss dot_loss = -j_l0_norm * tf.reduce_sum( part_label_mask * tf.log( tf.clip_by_value(tf.einsum( "bld, bld->bl", gt, tf.einsum("ji, blj->bli", noise_matrix, prob)), clip_value_min=1e-16, clip_value_max=1)), axis=-1) else: ## dot loss dot_loss = j_l0_norm * tf.reduce_sum( part_label_mask * (1 - tf.clip_by_value(tf.einsum( "bld, bld->bl", gt, tf.einsum("ji, blj->bli", noise_matrix, prob)), clip_value_min=0, clip_value_max=1)), axis=-1) else: raise ValueError("PartLabelModel request multi-tag") self.loss = tf.reduce_mean(nll_loss + 0.01 * dot_loss)