def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [2]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu with tf.variable_scope("part1"): self.model1 = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) with tf.variable_scope("part2"): self.model2 = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = tf.concat([ self.model1.get_sequence_output(), self.model2.get_sequence_output() ], axis=2) pred, loss = task.predict(enc, label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [METHOD_CROSSENT, METHOD_HINGE]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert_get_hidden.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss all_layer_grads = [] all_layers = self.model.all_layer_outputs for i in range(len(all_layers)): grad = tf.gradients(self.logits, all_layers[i]) all_layer_grads.append(grad) grad_emb = tf.gradients(self.logits, self.model.embedding_output) self.all_layer_grads = all_layer_grads self.grad_emb = grad_emb
def __init__(self, hp, voca_size, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(2) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.acc)
def __init__(self, hp, voca_size, num_class_list, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length], name="input_ids") input_mask = tf.placeholder(tf.int64, [None, seq_length], name="input_mask") segment_ids = tf.placeholder(tf.int64, [None, seq_length], name="segment_ids") self.x_list = [input_ids, input_mask, segment_ids] self.y1 = tf.placeholder(tf.int64, [None], name="y1") self.y2 = tf.placeholder(tf.int64, [None], name="y2") self.y = [self.y1, self.y2] summary1 = {} summary2 = {} self.summary_list = [summary1, summary2] use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) task = Classification(num_class_list[0]) pred, loss = task.predict(self.model.get_sequence_output(), self.y1, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc summary1['loss1'] = tf.summary.scalar('loss', self.loss) summary1['acc1'] = tf.summary.scalar('acc', self.acc) with tf.variable_scope("cls2"): task2 = Classification(num_class_list[1]) pred, loss = task2.predict(self.model.get_sequence_output(), self.y2, True) self.logits2 = task2.logits self.sout2 = tf.nn.softmax(self.logits2) self.pred2 = pred self.loss2 = loss self.acc2 = task2.acc summary2['loss2'] = tf.summary.scalar('loss2', self.loss2) summary2['acc2'] = tf.summary.scalar('acc2', self.acc2) self.logit_list = [self.logits, self.logits2] self.loss_list = [self.loss, self.loss2] self.pred_list = [self.pred, self.pred2]
def __init__(self, hp, voca_size, is_training): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) # self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids self.encoded_embedding_in = tf.placeholder( tf.float32, [None, seq_length, hp.hidden_units]) self.attention_mask_in = tf.placeholder(tf.float32, [None, seq_length, seq_length]) use_one_hot_embeddings = use_tpu self.model = bert.BertEmbeddingInOut( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, embeddding_as_input=(self.encoded_embedding_in, self.attention_mask_in), ) self.encoded_embedding_out = self.model.embedding_output self.attention_mask_out = self.model.attention_mask pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.acc) cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) #cl = tf.nn.sigmoid(cl) #cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl #self.pkc = self.conf_logits * self.rf_mask #rl_loss_list = tf.reduce_sum(self.pkc, axis=1) rl_loss_list = tf.reduce_sum(self.conf_logits * tf.cast(self.rf_mask, tf.float32), axis=1) num_tagged = tf.nn.relu(self.conf_logits + 1) self.verbose_loss = tf.reduce_mean(tf.reduce_sum(num_tagged, axis=1)) self.rl_loss = tf.reduce_mean(rl_loss_list)
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) input_ids = placeholder(tf.int64, [None, seq_length]) input_mask = placeholder(tf.int64, [None, seq_length]) segment_ids = placeholder(tf.int64, [None, seq_length]) label_ids = placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = placeholder(tf.float32, [None, seq_length]) elif method in [METHOD_CROSSENT, METHOD_HINGE]: self.rf_mask = placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc tf.summary.scalar('loss', self.loss) tf.summary.scalar('acc', self.acc) if method == 0: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) cl = tf.nn.sigmoid(cl) # cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl # self.pkc = self.conf_logits * self.rf_mask # rl_loss_list = tf.reduce_sum(self.pkc, axis=1) rl_loss_list = tf.reduce_sum(self.conf_logits * tf.cast(self.rf_mask, tf.float32), axis=1) self.rl_loss = tf.reduce_mean(rl_loss_list) elif method == 1: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl #rl_loss_list = tf_module.cossim(cl, self.rf_mask) #self.pkc = self.conf_logits * self.rf_mask rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask, axis=1) self.rl_loss = tf.reduce_mean(rl_loss_list) elif method == METHOD_CROSSENT: cl = tf.layers.dense(self.model.get_sequence_output(), 2, name="aux_conflict") probs = tf.nn.softmax(cl) losses = tf.losses.softmax_cross_entropy(onehot_labels=tf.one_hot( self.rf_mask, 2), logits=cl) self.conf_logits = probs[:, :, 1] - 0.5 self.rl_loss = tf.reduce_mean(losses) elif method == 3: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) self.bias = tf.Variable(0.0) self.conf_logits = (cl + self.bias) rl_loss_list = tf.nn.relu(1 - self.conf_logits * self.rf_mask) rl_loss_list = tf.reduce_mean(rl_loss_list, axis=1) self.rl_loss = tf.reduce_mean(rl_loss_list) labels = tf.greater(self.rf_mask, 0) hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits) self.hinge_loss = tf.reduce_sum(hinge_losses) elif method == 4: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl labels = tf.greater(self.rf_mask, 0) hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits) self.rl_loss = hinge_losses elif method == 5: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) #cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl self.labels = tf.cast(tf.greater(self.rf_mask, 0), tf.float32) self.rl_loss = tf.reduce_mean( tf_module.correlation_coefficient_loss(cl, -self.rf_mask)) elif method == 6: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") #cl = tf.layers.dense(cl1, 1, name="aux_conflict2") cl = tf.reshape(cl, [-1, seq_length]) #cl = tf.nn.sigmoid(cl) #cl = tf.contrib.layers.layer_norm(cl) self.conf_logits = cl #rl_loss_list = tf.reduce_sum(self.conf_logits * self.rf_mask , axis=1) self.rl_loss = tf.reduce_mean( tf_module.correlation_coefficient_loss(cl, -self.rf_mask)) elif method == METHOD_HINGE: cl = tf.layers.dense(self.model.get_sequence_output(), 1, name="aux_conflict") cl = tf.reshape(cl, [-1, seq_length]) self.conf_logits = cl labels = tf.greater(self.rf_mask, 0) hinge_losses = tf.losses.hinge_loss(labels, self.conf_logits) self.rl_loss = tf.reduce_sum(hinge_losses) self.conf_softmax = tf.nn.softmax(self.conf_logits, axis=-1)
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig(vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False task = Classification(data_generator.NLI.nli_info.num_classes) task2_num_classes = 3 input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) label_ids = tf.placeholder(tf.int64, [None]) if method in [0,1,3,4,5,6]: self.rf_mask = tf.placeholder(tf.float32, [None, seq_length]) elif method in [2]: self.rf_mask = tf.placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids self.y1 = tf.placeholder(tf.int64, [None], name="y1") self.y2 = tf.placeholder(tf.int64, [None], name="y2") self.f_loc1 = tf.placeholder(tf.int64, [None], name="f_loc1") self.f_loc2 = tf.placeholder(tf.int64, [None], name="f_loc2") use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pred, loss = task.predict(self.model.get_sequence_output(), label_ids, True) self.logits = task.logits self.sout = tf.nn.softmax(self.logits) self.pred = pred self.loss = loss self.acc = task.acc #tf.summary.scalar('loss', self.loss) #tf.summary.scalar('acc', self.acc) enc = self.model.get_sequence_output() # [Batch, Seq_len, hidden_dim] logits_raw = tf.layers.dense(enc, 3) # [Batch, seq_len, 3] def select(logits, f_loc): mask = tf.reshape(tf.one_hot(f_loc, seq_length), [-1,seq_length, 1]) # [Batch, seq_len, 1] t = tf.reduce_sum(logits * mask, axis=1) return t logits1 = select(logits_raw, self.f_loc1) # [Batch, 3] logits2 = select(logits_raw, self.f_loc2) # [Batch, 3] self.logits1 = logits1 self.logits2 = logits2 label1 = tf.one_hot(self.y1, task2_num_classes) # [Batch, num_class] label2 = tf.one_hot(self.y2, task2_num_classes) losses1_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=label1) losses2_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=label2) self.loss_paired = tf.reduce_mean(losses1_arr) #+ tf.reduce_mean(losses2_arr)