def create_model(cf,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 is_training=True):
    '''
    构建模型
    :param cf:
    :param input_ids:
    :param input_mask:
    :param segment_ids:
    :param labels:
    :param is_training:
    :return:
    '''
    bsz_per_core = tf.shape(input_ids)[0]
    inp = tf.transpose(input_ids, [1, 0])
    seg_id = tf.transpose(segment_ids, [1, 0])
    inp_mask = tf.transpose(input_mask, [1, 0])
    label = tf.reshape(labels, [bsz_per_core])

    xlnet_config = xlnet.XLNetConfig(json_path=cf.model_config_path)
    run_config = xlnet.create_run_config(is_training, True, cf)

    xlnet_model = xlnet.XLNetModel(xlnet_config=xlnet_config,
                                   run_config=run_config,
                                   input_ids=inp,
                                   seg_ids=seg_id,
                                   input_mask=inp_mask)
    summary = xlnet_model.get_pooled_out(cf.summary_type, cf.use_summ_proj)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):

        if cf.cls_scope is not None and cf.cls_scope:
            cls_scope = "classification_{}".format(cf.cls_scope)
        else:
            cls_scope = "classification_{}".format(cf.task_name.lower())

        per_example_loss, logits = modeling.classification_loss(
            hidden=summary,
            labels=label,
            n_class=cf.num_labels,
            initializer=xlnet_model.get_initializer(),
            scope=cls_scope,
            return_logits=True)

        total_loss = tf.reduce_mean(per_example_loss)

        return total_loss, per_example_loss, logits
Beispiel #2
0
def make_xlnet_graph(input_ids,
                     input_mask,
                     segment_ids,
                     label_ids,
                     model_config_path,
                     num_labels,
                     is_training_placeholder,
                     tune=False):
    xlnet_config = XLNetConfig(json_path=model_config_path)
    # 모두 기본값으로 세팅
    kwargs = dict(is_training=is_training_placeholder,
                  use_tpu=False,
                  use_bfloat16=False,
                  dropout=0.1,
                  dropatt=0.1,
                  init="normal",
                  init_range=0.1,
                  init_std=0.1,
                  clamp_len=-1)
    run_config = RunConfig(**kwargs)
    xlnet_model = XLNetModel(xlnet_config=xlnet_config,
                             run_config=run_config,
                             input_ids=input_ids,
                             seg_ids=segment_ids,
                             input_mask=input_mask)
    # summary_type="last", 마지막 레이어 히든 벡터 시퀀스의 마지막 벡터
    # summary_type="first", 마지막 레이어 히든 벡터 시퀀스의 첫번째 벡터
    # summary_type="mean", 마지막 레이어 히든 벡터 시퀀스의 평균 벡터
    # summary_type="attn", 마지막 레이어 히든 벡터 시퀀스에 멀티 헤드 어텐션 적용
    # use_proj=True, 이미 만든 summary 벡터에 선형변환 + tanh 적용 (BERT와 동일)
    # use_proj=False, 이미 만든 summary 벡터를 그대로 리턴
    summary = xlnet_model.get_pooled_out(summary_type="last",
                                         use_summ_proj=True)
    # summary 벡터에 활성함수(act_fn) 없이 선형변환 후 cross entropy loss 구함
    per_example_loss, logits = classification_loss(
        hidden=summary,
        labels=label_ids,
        n_class=num_labels,
        initializer=xlnet_model.get_initializer(),
        scope="classification_layer",
        return_logits=True)
    if tune:
        # loss layer
        total_loss = tf.reduce_mean(per_example_loss)
        return logits, total_loss
    else:
        # prob Layer
        probs = tf.nn.softmax(logits, axis=-1, name='probs')
        return probs
    def __init__(self, model_config_path, is_training, FLAGS, input_ids,
                 segment_ids, input_mask, label, n_class):
        '''

        :param model_config_path:
        :param is_training:
        :param FLAGS:
        :param input_ids:
        :param segment_ids:
        :param input_mask:
        :param label:
        :param n_class:
        '''
        self.xlnet_config = xlnet.XLNetConfig(json_path=model_config_path)
        self.run_config = xlnet.create_run_config(is_training, True, FLAGS)
        self.input_ids = tf.transpose(input_ids, [1, 0])
        self.segment_ids = tf.transpose(segment_ids, [1, 0])
        self.input_mask = tf.transpose(input_mask, [1, 0])

        self.model = xlnet.XLNetModel(xlnet_config=self.xlnet_config,
                                      run_config=self.run_config,
                                      input_ids=self.input_ids,
                                      seg_ids=self.segment_ids,
                                      input_mask=self.input_mask)

        cls_scope = FLAGS.cls_scope
        summary = self.model.get_pooled_out(FLAGS.summary_type,
                                            FLAGS.use_summ_proj)
        self.per_example_loss, self.logits = modeling.classification_loss(
            hidden=summary,
            labels=label,
            n_class=n_class,
            initializer=self.model.get_initializer(),
            scope=cls_scope,
            return_logits=True)

        self.total_loss = tf.reduce_mean(self.per_example_loss)

        with tf.name_scope("train_op"):
            self.train_op, _, _ = model_utils.get_train_op(
                FLAGS, self.total_loss)

        with tf.name_scope("acc"):
            one_hot_target = tf.one_hot(label, n_class)
            self.acc = self.accuracy(self.logits, one_hot_target)
def get_classification_loss(
        FLAGS, features, n_class, is_training):
    """Loss for downstream classification tasks."""

    bsz_per_core = tf.shape(features["input_ids"])[0]

    inp = tf.transpose(features["input_ids"], [1, 0])
    seg_id = tf.transpose(features["segment_ids"], [1, 0])
    inp_mask = tf.transpose(features["input_mask"], [1, 0])
    label = tf.reshape(features["label_ids"], [bsz_per_core])

    xlnet_config = xlnet.XLNetConfig(json_path=FLAGS.model_config_path)
    run_config = xlnet.create_run_config(is_training, True, FLAGS)

    xlnet_model = xlnet.XLNetModel(
        xlnet_config=xlnet_config,
        run_config=run_config,
        input_ids=inp,
        seg_ids=seg_id,
        input_mask=inp_mask)

    summary = xlnet_model.get_pooled_out(
        FLAGS.summary_type, FLAGS.use_summ_proj)

    with tf.variable_scope("model", reuse=tf.AUTO_REUSE):

        if FLAGS.cls_scope is not None and FLAGS.cls_scope:
            cls_scope = "classification_{}".format(FLAGS.cls_scope)
        else:
            cls_scope = "classification_{}".format(FLAGS.task_name.lower())

        per_example_loss, logits, p = modeling.classification_loss(
            hidden=summary,
            labels=label,
            n_class=n_class,
            initializer=xlnet_model.get_initializer(),
            scope=cls_scope,
            return_logits=True, FLAGS=FLAGS)

        total_loss = tf.reduce_mean(per_example_loss)

        return total_loss, per_example_loss, logits, p