def call(self, pooled_output, label_ids): if self.is_training: pooled_output = dropout(pooled_output, 0.1) self.pooled_output = pooled_output #self.logits = tf.layers.dense(pooled_output, self.num_classes, name="cls_dense") output_weights = tf1.get_variable( "output_weights", [3, self.hidden_size], initializer=tf1.truncated_normal_initializer(stddev=0.02) ) output_bias = tf1.get_variable( "output_bias", [3], initializer=tf1.zeros_initializer() ) logits = tf.matmul(pooled_output, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) self.logits = logits preds = tf.cast(tf.argmax(self.logits, axis=-1), tf.int32) labels = tf.one_hot(label_ids, self.num_classes) # self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=self.logits, # labels=labels) self.loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=label_ids) self.loss = tf.reduce_mean(self.loss_arr) self.acc = tf_module.accuracy(self.logits, label_ids)
def __init__(self, hp, voca_size, num_classes, is_training, feature_loc=0): # define decoder inputs self.x = tf.placeholder(dtype=tf.int32, shape=[None, hp.seq_max]) # Batch_size * Text_length self.y = tf.placeholder(tf.int32, shape=(None, )) self.enc = transformer_encode(self.x, hp, voca_size, is_training) # Decoder # Final linear projection self.logits = tf.layers.dense(self.enc[:, feature_loc, :], num_classes, name="cls_dense") self.acc = tf_module.accuracy(self.logits, self.y) #tf.summary.scalar('acc', self.acc) # Loss labels = tf.one_hot(self.y, num_classes) self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=labels) #self.s_loss_arr = tf_module.f1_loss(self.logits, labels) self.loss = tf.reduce_mean(self.loss_arr) #tf.summary.scalar('loss', self.loss) self.f1 = tf_module.f1(self.logits, self.y)
def __init__(self, num_classes, ssdr_config, core_model, seq_length, is_training): super(WSSDRWrapperInterface, self).__init__() placeholder = tf.compat.v1.placeholder bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json")) def_max_length = FLAGS.max_def_length loc_max_length = FLAGS.max_loc_length tf_logging.debug("WSSDRWrapper init()") tf_logging.debug("seq_length %d" % seq_length) tf_logging.debug("def_max_length %d" % def_max_length) tf_logging.debug("loc_max_length %d" % loc_max_length) self.input_ids = placeholder(tf.int64, [None, seq_length], name="input_ids") self.input_mask_ = placeholder(tf.int64, [None, seq_length], name="input_mask") self.segment_ids = placeholder(tf.int64, [None, seq_length], name="segment_ids") self.d_location_ids = placeholder(tf.int64, [None, loc_max_length], name="d_location_ids") self.d_input_ids = placeholder(tf.int64, [None, def_max_length], name="d_input_ids") self.d_input_mask = placeholder(tf.int64, [None, def_max_length], name="d_input_mask") self.d_segment_ids = placeholder(tf.int64, [None, def_max_length], name="d_segment_ids") self.ab_mapping = placeholder(tf.int64, [None, 1], name="ab_mapping") if ssdr_config.use_ab_mapping_mask: self.ab_mapping_mask = placeholder(tf.int64, [None, FLAGS.def_per_batch], name="ab_mapping_mask") else: self.ab_mapping_mask = None # [batch,seq_len], 1 if the indices in d_locations_id y_lookup = get_y_lookup_from_location_ids(self.d_location_ids, seq_length) self.y_cls = placeholder(tf.int64, [None]) self.network = core_model( config=bert_config, ssdr_config=ssdr_config, is_training=is_training, input_ids=self.input_ids, input_mask=self.input_mask_, token_type_ids=self.segment_ids, d_input_ids=self.d_input_ids, d_input_mask=self.d_input_mask, d_segment_ids=self.d_segment_ids, d_location_ids=self.d_location_ids, ab_mapping=self.ab_mapping, ab_mapping_mask=self.ab_mapping_mask, use_one_hot_embeddings=False, ) self.cls_logits = keras.layers.Dense(num_classes)(self.network.get_pooled_output()) self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.cls_logits, labels=self.y_cls) self.cls_loss = tf.reduce_mean(self.cls_loss_arr) self.lookup_logits = keras.layers.Dense(2)(self.network.get_sequence_output()) self.lookup_p_at_1 = tf_module.p_at_1(self.lookup_logits[:,:, 1], y_lookup) self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.lookup_logits, labels=y_lookup) self.y_lookup = y_lookup self.lookup_loss_per_example = tf.reduce_sum(self.lookup_loss_arr, axis=-1) self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example) self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
def __init__(self, hp, voca_size, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = tf.placeholder(tf.int64, [None, seq_length]) input_mask = tf.placeholder(tf.int64, [None, seq_length]) segment_ids = tf.placeholder(tf.int64, [None, seq_length]) s_portion = tf.placeholder(tf.float32, [None]) d_portion = tf.placeholder(tf.float32, [None]) s_sum = tf.placeholder(tf.int64, [None]) d_sum = tf.placeholder(tf.int64, [None]) self.x_list = [input_ids, input_mask, segment_ids] self.y = [s_portion, d_portion] self.y_sum = [s_sum, d_sum] use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) enc = self.model.get_sequence_output() pool = tf.layers.dense(enc[:, 0, :], hp.hidden_units, name="pool") s_logits = tf.layers.dense(pool, 2, name="cls_dense_support") d_logits = tf.layers.dense(pool, 2, name="cls_dense_dispute") loss = 0 self.acc = [] for logits, y, mask_sum in [(s_logits, self.y[0], s_sum), (d_logits, self.y[1], d_sum)]: labels = tf.cast(tf.greater(y, 0.5), tf.int32) labels = tf.one_hot(labels, 2) preds = tf.to_int32(tf.argmax(logits, axis=-1)) acc = tf_module.accuracy(logits, y) self.acc.append(acc) loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=labels) loss_arr = loss_arr * tf.cast(mask_sum, tf.float32) loss += tf.reduce_sum(loss_arr) self.loss = loss tf.summary.scalar('loss', self.loss) tf.summary.scalar('s_acc', self.acc[0]) tf.summary.scalar('d_acc', self.acc[1])
def __init__(self, hp, hp_aux, voca_size, num_classes, is_training, feature_loc=0): # define decoder inputs self.x = tf.placeholder(dtype=tf.int32, shape=[None, hp.seq_max]) self.y = tf.placeholder(tf.int32, shape=(None, )) with tf.variable_scope("aux"): self.y_aux = tf.placeholder(tf.int32, shape=(None, )) num_classes = 3 enc_aux = transformer_encode(self.x, hp_aux, voca_size, is_training) aux_logit = tf.layers.dense(enc_aux[:, 0, :], num_classes, name="cls_dense") labels = tf.one_hot(self.y_aux, num_classes) loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=aux_logit, labels=labels) self.aux_loss = tf.reduce_mean(loss_arr) self.aux_acc = tf_module.accuracy(aux_logit, self.y_aux) aux_v = tf.reshape(aux_logit, [-1, 1, num_classes]) added_dim = hp.hidden_units - num_classes aux_v = tf.pad(aux_v, [(0, 0), (0, 0), (0, added_dim)]) self.enc = transformer_aux(self.x, hp, voca_size, is_training, aux_v) # Decoder # Final linear projection self.logits = tf.layers.dense(self.enc[:, feature_loc, :], num_classes, name="cls_dense") self.acc = tf_module.accuracy(self.logits, self.y) labels = tf.one_hot(self.y, num_classes) self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=labels) #self.s_loss_arr = tf_module.f1_loss(self.logits, labels) self.loss = tf.reduce_mean(self.loss_arr)
def __init__(self, num_classes, seq_length, is_training): super(DictReaderWrapper, self).__init__() placeholder = tf.compat.v1.placeholder bert_config = BertConfig.from_json_file(os.path.join(data_path, "bert_config.json")) def_max_length = FLAGS.max_def_length loc_max_length = FLAGS.max_loc_length tf_logging.debug("DictReaderWrapper init()") tf_logging.debug("seq_length %d" % seq_length) tf_logging.debug("def_max_length %d" % def_max_length) tf_logging.debug("loc_max_length %d" % loc_max_length) self.input_ids = placeholder(tf.int64, [None, seq_length]) self.input_mask_ = placeholder(tf.int64, [None, seq_length]) self.segment_ids = placeholder(tf.int64, [None, seq_length]) self.d_input_ids = placeholder(tf.int64, [None, def_max_length]) self.d_input_mask = placeholder(tf.int64, [None, def_max_length]) self.d_location_ids = placeholder(tf.int64, [None, loc_max_length]) self.y_cls = placeholder(tf.int64, [None]) self.y_lookup = placeholder(tf.int64, [None, seq_length]) self.network = DictReaderModel( config=bert_config, d_config=bert_config, is_training=is_training, input_ids=self.input_ids, input_mask=self.input_mask_, d_input_ids=self.d_input_ids, d_input_mask=self.d_input_mask, d_location_ids=self.d_location_ids, use_target_pos_emb=True, token_type_ids=self.segment_ids, use_one_hot_embeddings=False, ) self.cls_logits = keras.layers.Dense(num_classes)(self.network.pooled_output) self.cls_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.cls_logits, labels=self.y_cls) self.cls_loss = tf.reduce_mean(self.cls_loss_arr) self.lookup_logits = keras.layers.Dense(2)(self.network.sequence_output) self.lookup_loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.lookup_logits, labels=self.y_lookup) self.lookup_loss_per_example = tf.reduce_mean(self.lookup_loss_arr, axis=-1) self.lookup_loss = tf.reduce_mean(self.lookup_loss_per_example) self.acc = tf_module.accuracy(self.cls_logits, self.y_cls)
def predict_ex(self, enc, Y, mode): feature_loc = 0 pooled = enc[:, feature_loc, :] logits = tf.keras.layers.Dense(self.num_classes, name="cls_dense")(pooled) preds = tf.argmax(logits, axis=-1) self.acc = tf_module.accuracy(logits, Y) self.logits = logits if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: self.loss_arr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=Y) self.loss = tf.reduce_mean(self.loss_arr) return preds, self.loss else: return preds
def predict_ex(enc, Y, mode): feature_loc = 0 logits_raw = tf.layers.dense( enc[:, feature_loc, :], data_generator.NLI.nli_info.num_classes, name="cls_dense") if hp.use_reorder: logits_reorder = [ logits_raw[:, 1], logits_raw[:, 0], logits_raw[:, 2] ] logits_candidate = tf.stack(logits_reorder, axis=1) # [-1, 3] else: logits_candidate = logits_raw logits_candidate = tf.reshape( logits_candidate, [-1, hp.num_v, data_generator.NLI.nli_info.num_classes]) soft_candidate = tf.nn.softmax(logits_candidate) active_arg = tf.cast(tf.argmin(soft_candidate[:, :, 0], axis=1), dtype=tf.int32) # [batch] indice = tf.stack([tf.range(batch_dyn), active_arg], axis=1) print(indice.shape) logits = tf.gather_nd(logits_candidate, indice) print(logits_candidate.shape) print(logits.shape) labels = tf.one_hot(Y, data_generator.NLI.nli_info.num_classes) preds = tf.to_int32(tf.argmax(logits, axis=-1)) self.acc = tf_module.accuracy(logits, Y) self.logits = logits tf.summary.scalar("acc", self.acc) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=labels) neg = tf.cast(tf.equal(preds, 0), tf.float32) * tf.constant(0.1) pos = tf.cast(tf.not_equal(preds, 0), tf.float32) weight_losses = self.loss_arr * (pos + neg) # TP : 1 # FN : 0.1 # FP : 1 # TN : 0.1 self.loss = tf.reduce_mean(weight_losses) tf.summary.scalar("loss", self.loss) return preds, self.loss else: return preds
def predict_ex(self, enc, Y, mode): feature_loc = 0 logits = tf.compat.v1.layers.dense(enc[:, feature_loc, :], self.num_classes, name="cls_dense") labels = tf.one_hot(Y, self.num_classes) preds = tf.cast(tf.argmax(input=logits, axis=-1), dtype=tf.int32) self.acc = tf_module.accuracy(logits, Y) self.logits = logits if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: self.loss_arr = tf.nn.softmax_cross_entropy_with_logits( logits=logits, labels=labels) self.loss = tf.reduce_mean(input_tensor=self.loss_arr) return preds, self.loss else: return preds
def __init__(self, hp, voca_size, method, is_training=True): config = bert.BertConfig( vocab_size=voca_size, hidden_size=hp.hidden_units, num_hidden_layers=hp.num_blocks, num_attention_heads=hp.num_heads, intermediate_size=hp.intermediate_size, type_vocab_size=hp.type_vocab_size, ) seq_length = hp.seq_max use_tpu = False input_ids = placeholder(tf.int64, [None, seq_length]) input_mask = placeholder(tf.int64, [None, seq_length]) segment_ids = placeholder(tf.int64, [None, seq_length]) label_ids = placeholder(tf.int64, [None]) if method in [0, 1, 3, 4, 5, 6]: self.rf_mask = placeholder(tf.float32, [None, seq_length]) elif method in [METHOD_CROSSENT, METHOD_HINGE]: self.rf_mask = placeholder(tf.int32, [None, seq_length]) self.x_list = [input_ids, input_mask, segment_ids] self.y = label_ids use_one_hot_embeddings = use_tpu self.model = bert.BertModel( config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) pooled = self.model.get_pooled_output() pooled = tf.nn.dropout(pooled, hp.dropout_rate) logits = tf.layers.dense(pooled, data_generator.NLI.nli_info.num_classes, name="cls_dense") labels = tf.one_hot(label_ids, data_generator.NLI.nli_info.num_classes) self.acc = tf_module.accuracy(logits, label_ids) self.logits = logits tf.summary.scalar("acc", self.acc) self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=labels) self.loss = tf.reduce_mean(self.loss_arr) tf.summary.scalar("loss", self.loss)
def predict_ex(self, enc, Y, mode): feature_loc = 0 logits = tf.layers.dense(enc[:,feature_loc,:], self.num_classes, name="cls_dense") labels = tf.one_hot(Y, self.num_classes) preds = tf.to_int32(tf.argmax(logits, axis=-1)) self.acc = tf_module.accuracy(logits, Y) self.logits = logits tf.summary.scalar("acc", self.acc) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits, labels=labels) self.loss = tf.reduce_mean(self.loss_arr) tf.summary.scalar("loss", self.loss) return preds, self.loss else: return preds
def __init__(self, hp, hp_aux, voca_size, is_training): # define decoder inputs self.x = tf.placeholder(dtype=tf.int32, shape=[None, hp.seq_max]) self.y = tf.placeholder(tf.int32, shape=(None, hp.seq_max)) with tf.variable_scope("aux"): self.y_aux = tf.placeholder(tf.int32, shape=(None, )) num_classes = 3 enc_aux = transformer_encode(self.x, hp_aux, voca_size, is_training) aux_logit = tf.layers.dense(enc_aux[:, 0, :], num_classes, name="cls_dense") labels = tf.one_hot(self.y_aux, num_classes) loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=aux_logit, labels=labels) self.aux_loss = tf.reduce_mean(loss_arr) self.aux_acc = tf_module.accuracy(aux_logit, self.y_aux) aux_v = tf.reshape(aux_logit, [-1, 1, num_classes]) added_dim = hp.hidden_units - num_classes aux_v = tf.pad(aux_v, [(0, 0), (0, 0), (0, added_dim)]) self.enc = transformer_aux(self.x, hp, voca_size, is_training, aux_v) # Decoder # Final linear projection self.logits = tf.layers.dense(self.enc[:, :-1], voca_size) self.preds = tf.to_int32(tf.argmax(self.logits, axis=-1)) self.istarget = tf.to_float(tf.not_equal(self.y, 0)) self.acc = tf.reduce_sum( tf.to_float(tf.equal(self.preds, self.y)) * self.istarget) / (tf.reduce_sum(self.istarget)) tf.summary.scalar('acc', self.acc) if is_training: # Loss loss_list, weight = padded_cross_entropy(self.logits, self.y, label_smoothing, reduce_sum=False) self.loss = tf.reduce_mean(loss_list) tf.summary.scalar('loss', self.loss)
def __init__(self, hp, voca_size, num_classes, is_training): # define decoder inputs input_len = hp.seq_max self.x = tf.placeholder(dtype=tf.int32, shape=[None, input_len]) self.y = tf.placeholder(tf.int32, shape=(None, )) def extract_feature(sent): enc = transformer_encode(sent, hp, voca_size, is_training) return tf.layers.dense(enc[:, 0], hp.feature_size) with tf.variable_scope("feature_encoder"): feature1 = extract_feature(self.x) self.feature1 = feature1 self.logits = tf.layers.dense(feature1, num_classes) #tf.summary.scalar('acc', self.acc) self.acc = tf_module.accuracy(self.logits, self.y) # Loss labels = tf.one_hot(self.y, num_classes) self.loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=labels) self.loss = tf.reduce_mean(self.loss_arr)
def __init__(self, hp, voca_size, num_classes, is_training, feature_loc=0): # define decoder inputs # Class 0 ~ num_classes-1 => label # y = num_classes implies that these items are from same users. self.x = tf.placeholder(dtype=tf.int32, shape=[None, hp.seq_max]) # Batch_size * Text_length self.y = tf.placeholder(tf.int32, shape=(None, )) self.x_pair = tf.placeholder(dtype=tf.int32, shape=[None, 2, hp.seq_max ]) # Batch_size * Text_length x_pair_flat = tf.reshape(self.x_pair, [-1, hp.seq_max]) with tf.variable_scope("consist_classifier"): enc = transformer_encode(self.x, hp, voca_size, is_training) self.logits = tf.layers.dense(enc[:, feature_loc, :], num_classes, name="cls_dense") with tf.variable_scope("consist_classifier", reuse=True): enc2 = transformer_encode(x_pair_flat, hp, voca_size, is_training) logits2 = tf.layers.dense(enc2[:, feature_loc, :], num_classes, name="cls_dense") #tf.summary.scalar('acc', self.acc) # Loss labels = tf.one_hot(self.y, num_classes) self.s_loss_arr = tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=labels) self.acc = tf_module.accuracy(self.logits, self.y) logit_pair = tf.reshape(logits2, [-1, 2, num_classes]) pred = tf.nn.softmax(logit_pair) self.pred_pair = pred def conflict_loss(pred): Pros = 1 Against = 2 l = pred[:, 0, Pros] * tf.log( pred[:, 1, Against]) + pred[:, 0, Against] * tf.log(pred[:, 1, Pros]) return -l tau = 0.6 #idk : I Don't Know def idk_loss(pred): s = 0 for i in range(3): pred_no = pred[:, :, i] p = 0.33 n_items = tf.reduce_sum(tf.ones_like(pred_no)) mean = n_items * p dev = tf.sqrt(n_items * p * (1 - p)) normal = tf.distributions.Normal(loc=mean, scale=dev) l = -tf.log(normal.prob(tf.reduce_sum(pred_no))) s += tf.maximum(l - 4, 0) return s self.idk_loss = idk_loss(pred) self.consist_loss = hp.alpha * (tf.reduce_mean(conflict_loss(pred)) + 0.2 * self.idk_loss) self.supervised_loss = (1 - hp.alpha) * tf.reduce_mean(self.s_loss_arr) self.loss = self.supervised_loss + self.consist_loss