def create_model(cf, input_ids, input_mask, segment_ids, labels, is_training=True): ''' 构建模型 :param cf: :param input_ids: :param input_mask: :param segment_ids: :param labels: :param is_training: :return: ''' bsz_per_core = tf.shape(input_ids)[0] inp = tf.transpose(input_ids, [1, 0]) seg_id = tf.transpose(segment_ids, [1, 0]) inp_mask = tf.transpose(input_mask, [1, 0]) label = tf.reshape(labels, [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=cf.model_config_path) run_config = xlnet.create_run_config(is_training, True, cf) xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out(cf.summary_type, cf.use_summ_proj) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): if cf.cls_scope is not None and cf.cls_scope: cls_scope = "classification_{}".format(cf.cls_scope) else: cls_scope = "classification_{}".format(cf.task_name.lower()) per_example_loss, logits = modeling.classification_loss( hidden=summary, labels=label, n_class=cf.num_labels, initializer=xlnet_model.get_initializer(), scope=cls_scope, return_logits=True) total_loss = tf.reduce_mean(per_example_loss) return total_loss, per_example_loss, logits
def make_xlnet_graph(input_ids, input_mask, segment_ids, label_ids, model_config_path, num_labels, is_training_placeholder, tune=False): xlnet_config = XLNetConfig(json_path=model_config_path) # 모두 기본값으로 세팅 kwargs = dict(is_training=is_training_placeholder, use_tpu=False, use_bfloat16=False, dropout=0.1, dropatt=0.1, init="normal", init_range=0.1, init_std=0.1, clamp_len=-1) run_config = RunConfig(**kwargs) xlnet_model = XLNetModel(xlnet_config=xlnet_config, run_config=run_config, input_ids=input_ids, seg_ids=segment_ids, input_mask=input_mask) # summary_type="last", 마지막 레이어 히든 벡터 시퀀스의 마지막 벡터 # summary_type="first", 마지막 레이어 히든 벡터 시퀀스의 첫번째 벡터 # summary_type="mean", 마지막 레이어 히든 벡터 시퀀스의 평균 벡터 # summary_type="attn", 마지막 레이어 히든 벡터 시퀀스에 멀티 헤드 어텐션 적용 # use_proj=True, 이미 만든 summary 벡터에 선형변환 + tanh 적용 (BERT와 동일) # use_proj=False, 이미 만든 summary 벡터를 그대로 리턴 summary = xlnet_model.get_pooled_out(summary_type="last", use_summ_proj=True) # summary 벡터에 활성함수(act_fn) 없이 선형변환 후 cross entropy loss 구함 per_example_loss, logits = classification_loss( hidden=summary, labels=label_ids, n_class=num_labels, initializer=xlnet_model.get_initializer(), scope="classification_layer", return_logits=True) if tune: # loss layer total_loss = tf.reduce_mean(per_example_loss) return logits, total_loss else: # prob Layer probs = tf.nn.softmax(logits, axis=-1, name='probs') return probs
def __init__(self, model_config_path, is_training, FLAGS, input_ids, segment_ids, input_mask, label, n_class): ''' :param model_config_path: :param is_training: :param FLAGS: :param input_ids: :param segment_ids: :param input_mask: :param label: :param n_class: ''' self.xlnet_config = xlnet.XLNetConfig(json_path=model_config_path) self.run_config = xlnet.create_run_config(is_training, True, FLAGS) self.input_ids = tf.transpose(input_ids, [1, 0]) self.segment_ids = tf.transpose(segment_ids, [1, 0]) self.input_mask = tf.transpose(input_mask, [1, 0]) self.model = xlnet.XLNetModel(xlnet_config=self.xlnet_config, run_config=self.run_config, input_ids=self.input_ids, seg_ids=self.segment_ids, input_mask=self.input_mask) cls_scope = FLAGS.cls_scope summary = self.model.get_pooled_out(FLAGS.summary_type, FLAGS.use_summ_proj) self.per_example_loss, self.logits = modeling.classification_loss( hidden=summary, labels=label, n_class=n_class, initializer=self.model.get_initializer(), scope=cls_scope, return_logits=True) self.total_loss = tf.reduce_mean(self.per_example_loss) with tf.name_scope("train_op"): self.train_op, _, _ = model_utils.get_train_op( FLAGS, self.total_loss) with tf.name_scope("acc"): one_hot_target = tf.one_hot(label, n_class) self.acc = self.accuracy(self.logits, one_hot_target)
def get_classification_loss( FLAGS, features, n_class, is_training): """Loss for downstream classification tasks.""" bsz_per_core = tf.shape(features["input_ids"])[0] inp = tf.transpose(features["input_ids"], [1, 0]) seg_id = tf.transpose(features["segment_ids"], [1, 0]) inp_mask = tf.transpose(features["input_mask"], [1, 0]) label = tf.reshape(features["label_ids"], [bsz_per_core]) xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path) run_config = xlnet.create_run_config(is_training, True, FLAGS) xlnet_model = xlnet.XLNetModel( xlnet_config=xlnet_config, run_config=run_config, input_ids=inp, seg_ids=seg_id, input_mask=inp_mask) summary = xlnet_model.get_pooled_out( FLAGS.summary_type, FLAGS.use_summ_proj) with tf.variable_scope("model", reuse=tf.AUTO_REUSE): if FLAGS.cls_scope is not None and FLAGS.cls_scope: cls_scope = "classification_{}".format(FLAGS.cls_scope) else: cls_scope = "classification_{}".format(FLAGS.task_name.lower()) per_example_loss, logits, p = modeling.classification_loss( hidden=summary, labels=label, n_class=n_class, initializer=xlnet_model.get_initializer(), scope=cls_scope, return_logits=True, FLAGS=FLAGS) total_loss = tf.reduce_mean(per_example_loss) return total_loss, per_example_loss, logits, p