save_model = True config = BertConfig(num_labels=3, return_dict=True, model_type='bert-base-uncased') model = TFBertForSequenceClassification(config=config) if save_model: optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5) model.compile(optimizer=optimizer, loss=model.compute_loss, metrics=['accuracy']) model.fit( train_dataset[0], np.array(y_list), epochs=5, batch_size=BATCH_SIZE, callbacks=[cp_callback] ) else: latest = tf.train.latest_checkpoint(checkpoint_dir) model.load_weights(latest) preds = model.predict(val_dataset[0])["logits"] preds_proba = tf.keras.backend.softmax(preds, axis=1) classes = np.argmax(preds, axis=-1) score = classification_report(y_val, classes, digits=3) print(score)
def protected_fit(self, training, validation, model_kwargs=None): """a function that accepts a training dataset and a validation dataset containing design values 'x' and prediction values 'y' in a model-based optimization problem and fits an approximate model Arguments: training: DatasetBuilder an instance of a subclass of the DatasetBuilder class which has a set of design values 'x' and prediction values 'y', and defines batching and sampling methods for those attributes validation: DatasetBuilder an instance of a subclass of the DatasetBuilder class which has a set of design values 'x' and prediction values 'y', and defines batching and sampling methods for those attributes model_kwargs: dict a dictionary of keyword arguments that parameterize the architecture and learning algorithm of the model Returns: model: Any any format of of machine learning model that will be stored in the self.params["model"] attribute for later use """ # these parameters control the neural network architecture hidden_size = model_kwargs["hidden_size"] num_heads = model_kwargs["num_heads"] dropout_rate = model_kwargs["dropout_rate"] feed_forward_size = model_kwargs["feed_forward_size"] activation = model_kwargs["activation"] num_blocks = model_kwargs["num_blocks"] # these parameters control the model training epochs = model_kwargs["epochs"] shuffle_buffer = model_kwargs["shuffle_buffer"] learning_rate = model_kwargs["learning_rate"] # determine the vocab size to initialize the model with num_classes = 1 if self.feature_extractor is not None: if self.feature_extractor.is_discrete(self.internal_dataset): num_classes = self.feature_extractor\ .num_classes(self.internal_dataset) elif isinstance(training, DiscreteDataset): num_classes = training.num_classes # obtain the expected shape of inputs to the model input_shape = training.input_shape if isinstance(training, DiscreteDataset) and training.is_logits: input_shape = input_shape[:-1] # if the feature extraction model is given, assume its input shape if self.feature_extractor is not None: input_shape = self.feature_extractor\ .input_shape(self.internal_dataset) # build the hugging face model from a configuration model = TFBert(transformers.BertConfig( vocab_size=num_classes, num_labels=1, hidden_size=hidden_size, num_hidden_layers=num_blocks, num_attention_heads=num_heads, intermediate_size=feed_forward_size, hidden_act=activation, hidden_dropout_prob=dropout_rate, attention_probs_dropout_prob=dropout_rate, max_position_embeddings=input_shape[0], initializer_range=0.02, layer_norm_eps=1e-12, position_embedding_type='absolute')) # estimate the number of training steps per epoch steps = int(math.ceil(training.dataset_size / self.internal_batch_size)) # compile the tensorflow model for training lr = keras.experimental.CosineDecay( learning_rate, steps * epochs, alpha=0.0) optimizer = keras.optimizers.Adam(learning_rate=lr) model.compile(optimizer=optimizer, loss=tf.keras.losses.MeanSquaredError()) # if the feature extraction model is given, assume its format if self.feature_extractor is not None: input_key = ("input_ids" if self.feature_extractor .is_discrete(self.internal_dataset) else "inputs_embeds") elif isinstance(training, DiscreteDataset): input_key = "input_ids" else: input_key = "inputs_embeds" # create a tensorflow dataset generator for training training = self.create_tensorflow_dataset( training, batch_size=self.internal_batch_size, shuffle_buffer=shuffle_buffer, repeat=epochs) # create a tensorflow dataset generator for validation validation = self.create_tensorflow_dataset( validation, batch_size=self.internal_batch_size, shuffle_buffer=self.internal_batch_size, repeat=1) # convert to the huggingface transformer input format training = training.map( lambda x, y: ({input_key: x}, y), num_parallel_calls=tf.data.experimental.AUTOTUNE) validation = validation.map( lambda x, y: ({input_key: x}, y), num_parallel_calls=tf.data.experimental.AUTOTUNE) # fit the model to a tensorflow dataset model.fit(training, steps_per_epoch=steps, epochs=epochs, validation_data=validation) # return the trained model and rank correlation return model