def _create_model_from_scratch(albert_config, is_training, input_ids,
                               input_mask, segment_ids, use_one_hot_embeddings):
  """Creates an ALBERT model from scratch (as opposed to hub)."""
  model = modeling.AlbertModel(
      config=albert_config,
      is_training=is_training,
      input_ids=input_ids,
      input_mask=input_mask,
      token_type_ids=segment_ids,
      use_one_hot_embeddings=use_one_hot_embeddings)
  output_layer = model.get_pooled_output()
  return output_layer
Exemple #2
0
    def __init__(self, is_training):
        # Training or not
        self.is_training = is_training
        # Placeholder
        self.input_ids = tf.placeholder(tf.int32,
                                        shape=[None, hp.sequence_length],
                                        name='input_ids')
        self.input_masks = tf.placeholder(tf.int32,
                                          shape=[None, hp.sequence_length],
                                          name='input_masks')
        self.segment_ids = tf.placeholder(tf.int32,
                                          shape=[None, hp.sequence_length],
                                          name='segment_ids')
        self.label_ids = tf.placeholder(tf.float32,
                                        shape=[None, hp.num_labels],
                                        name='label_ids')
        # Load BERT model
        self.model = modeling.AlbertModel(config=bert_config,
                                          is_training=self.is_training,
                                          input_ids=self.input_ids,
                                          input_mask=self.input_masks,
                                          token_type_ids=self.segment_ids,
                                          use_one_hot_embeddings=False)
        # Get the feature vector by BERT
        output_layer_init = self.model.get_sequence_output()
        # Cell TextCNN
        output_layer = cell_textcnn(output_layer_init, self.is_training)
        # Hidden size
        hidden_size = output_layer.shape[-1].value
        # Full-connection
        with tf.name_scope("Full-connection"):
            output_weights = tf.get_variable(
                "output_weights", [num_labels, hidden_size],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            output_bias = tf.get_variable("output_bias", [num_labels],
                                          initializer=tf.zeros_initializer())
            logits = tf.nn.bias_add(
                tf.matmul(output_layer, output_weights, transpose_b=True),
                output_bias)
            # Prediction sigmoid(Multi-label)
            self.probabilities = tf.nn.sigmoid(logits)
        with tf.variable_scope("Prediction"):
            # Prediction
            zero = tf.zeros_like(logits)
            one = tf.ones_like(logits)
            self.predictions = tf.where(logits < 0.5, x=zero, y=one)
        with tf.variable_scope("loss"):
            # Summary for tensorboard
            if self.is_training:
                self.accuracy = tf.reduce_mean(
                    tf.to_float(tf.equal(self.predictions, self.label_ids)))
                tf.summary.scalar('accuracy', self.accuracy)

            # Initial embedding by BERT
            ckpt = tf.train.get_checkpoint_state(hp.saved_model_path)
            checkpoint_suffix = ".index"
            if ckpt and tf.gfile.Exists(ckpt.model_checkpoint_path +
                                        checkpoint_suffix):
                print('=' * 10, 'Restoring model from checkpoint!', '=' * 10)
                print("%s - Restoring model from checkpoint ~%s" %
                      (time_now_string(), ckpt.model_checkpoint_path))
            else:
                print('=' * 10, 'First time load BERT model!', '=' * 10)
                tvars = tf.trainable_variables()
                if hp.init_checkpoint:
                    (assignment_map, initialized_variable_names) = \
                      modeling.get_assignment_map_from_checkpoint(tvars,
                                                                  hp.init_checkpoint)
                    tf.train.init_from_checkpoint(hp.init_checkpoint,
                                                  assignment_map)

            # Loss and Optimizer
            if self.is_training:
                # Global_step
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=self.label_ids, logits=logits)
                self.loss = tf.reduce_mean(per_example_loss)
                # Optimizer BERT
                train_examples = processor.get_train_examples(hp.data_dir)
                num_train_steps = int(
                    len(train_examples) / hp.batch_size * hp.num_train_epochs)
                num_warmup_steps = int(num_train_steps * hp.warmup_proportion)
                print('num_train_steps', num_train_steps)
                self.optimizer = optimization.create_optimizer(
                    self.loss,
                    hp.learning_rate,
                    num_train_steps,
                    num_warmup_steps,
                    hp.use_tpu,
                    Global_step=self.global_step)

                # Summary for tensorboard
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()