Beispiel #1
0
checkpoint_path = "bert16_ckpt/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True)

save_model = True


config = BertConfig(num_labels=3, return_dict=True, model_type='bert-base-uncased')

model = TFBertForSequenceClassification(config=config)

if save_model:
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
    model.compile(optimizer=optimizer, loss=model.compute_loss, metrics=['accuracy'])

    model.fit(
        train_dataset[0],
        np.array(y_list),
        epochs=5,
        batch_size=BATCH_SIZE,
        callbacks=[cp_callback]
        )
else:
    latest = tf.train.latest_checkpoint(checkpoint_dir)
    model.load_weights(latest)

preds = model.predict(val_dataset[0])["logits"]

preds_proba = tf.keras.backend.softmax(preds, axis=1)
Beispiel #2
0
    def protected_fit(self, training, validation, model_kwargs=None):
        """a function that accepts a training dataset and a validation dataset
        containing design values 'x' and prediction values 'y' in a model-based
        optimization problem and fits an approximate model

        Arguments:

        training: DatasetBuilder
            an instance of a subclass of the DatasetBuilder class which has
            a set of design values 'x' and prediction values 'y', and defines
            batching and sampling methods for those attributes
        validation: DatasetBuilder
            an instance of a subclass of the DatasetBuilder class which has
            a set of design values 'x' and prediction values 'y', and defines
            batching and sampling methods for those attributes
        model_kwargs: dict
            a dictionary of keyword arguments that parameterize the
            architecture and learning algorithm of the model

        Returns:

        model: Any
            any format of of machine learning model that will be stored
            in the self.params["model"] attribute for later use

        """

        # these parameters control the neural network architecture
        hidden_size = model_kwargs["hidden_size"]
        num_heads = model_kwargs["num_heads"]
        dropout_rate = model_kwargs["dropout_rate"]
        feed_forward_size = model_kwargs["feed_forward_size"]
        activation = model_kwargs["activation"]
        num_blocks = model_kwargs["num_blocks"]

        # these parameters control the model training
        epochs = model_kwargs["epochs"]
        shuffle_buffer = model_kwargs["shuffle_buffer"]
        learning_rate = model_kwargs["learning_rate"]

        # determine the vocab size to initialize the model with
        num_classes = 1
        if self.feature_extractor is not None:
            if self.feature_extractor.is_discrete(self.internal_dataset):
                num_classes = self.feature_extractor\
                    .num_classes(self.internal_dataset)
        elif isinstance(training, DiscreteDataset):
            num_classes = training.num_classes

        # obtain the expected shape of inputs to the model
        input_shape = training.input_shape
        if isinstance(training, DiscreteDataset) and training.is_logits:
            input_shape = input_shape[:-1]

        # if the feature extraction model is given, assume its input shape
        if self.feature_extractor is not None:
            input_shape = self.feature_extractor\
                .input_shape(self.internal_dataset)

        # build the hugging face model from a configuration
        model = TFBert(transformers.BertConfig(
            vocab_size=num_classes,
            num_labels=1,
            hidden_size=hidden_size,
            num_hidden_layers=num_blocks,
            num_attention_heads=num_heads,
            intermediate_size=feed_forward_size,
            hidden_act=activation,
            hidden_dropout_prob=dropout_rate,
            attention_probs_dropout_prob=dropout_rate,
            max_position_embeddings=input_shape[0],
            initializer_range=0.02,
            layer_norm_eps=1e-12,
            position_embedding_type='absolute'))

        # estimate the number of training steps per epoch
        steps = int(math.ceil(training.dataset_size
                              / self.internal_batch_size))

        # compile the tensorflow model for training
        lr = keras.experimental.CosineDecay(
            learning_rate, steps * epochs, alpha=0.0)
        optimizer = keras.optimizers.Adam(learning_rate=lr)
        model.compile(optimizer=optimizer,
                      loss=tf.keras.losses.MeanSquaredError())

        # if the feature extraction model is given, assume its format
        if self.feature_extractor is not None:
            input_key = ("input_ids" if self.feature_extractor
                         .is_discrete(self.internal_dataset)
                         else "inputs_embeds")
        elif isinstance(training, DiscreteDataset):
            input_key = "input_ids"
        else:
            input_key = "inputs_embeds"

        # create a tensorflow dataset generator for training
        training = self.create_tensorflow_dataset(
            training, batch_size=self.internal_batch_size,
            shuffle_buffer=shuffle_buffer, repeat=epochs)

        # create a tensorflow dataset generator for validation
        validation = self.create_tensorflow_dataset(
            validation, batch_size=self.internal_batch_size,
            shuffle_buffer=self.internal_batch_size, repeat=1)

        # convert to the huggingface transformer input format
        training = training.map(
            lambda x, y: ({input_key: x}, y),
            num_parallel_calls=tf.data.experimental.AUTOTUNE)

        validation = validation.map(
            lambda x, y: ({input_key: x}, y),
            num_parallel_calls=tf.data.experimental.AUTOTUNE)

        # fit the model to a tensorflow dataset
        model.fit(training, steps_per_epoch=steps,
                  epochs=epochs, validation_data=validation)

        # return the trained model and rank correlation
        return model