def train( image_files, labels, domain, image_width=224, image_height=224, epochs=1, batch_size=16, test_ratio=0.2, seed=None, ): """ Train VGG16 model on provided image files. This will create a new MLflow run and log all parameters, metrics and the resulting model with MLflow. The resulting model is an instance of KerasImageClassifierPyfunc - a custom python function model that embeds all necessary preprocessing together with the VGG16 Keras model. The resulting model can be applied directly to image base64 encoded image data. :param image_height: Height of the input image in pixels. :param image_width: Width of the input image in pixels. :param image_files: List of image files to be used for training. :param labels: List of labels for the image files. :param domain: Dictionary representing the domain of the reponse. Provides mapping label-name -> label-id. :param epochs: Number of epochs to train the model for. :param batch_size: Batch size used during training. :param test_ratio: Fraction of dataset to be used for validation. This data will not be used during training. :param seed: Random seed. Used e.g. when splitting the dataset into train / validation. """ assert len(set(labels)) == len(domain) input_shape = (image_width, image_height, 3) with mlflow.start_run() as run: mlflow.log_param("epochs", str(epochs)) mlflow.log_param("batch_size", str(batch_size)) mlflow.log_param("validation_ratio", str(test_ratio)) if seed: mlflow.log_param("seed", str(seed)) def _read_image(filename): with open(filename, "rb") as f: return f.read() with tf.Graph().as_default() as g: with tf.Session(graph=g).as_default(): dims = input_shape[:2] x = np.array([decode_and_resize_image(_read_image(x), dims) for x in image_files]) y = np_utils.to_categorical(np.array(labels), num_classes=len(domain)) train_size = 1 - test_ratio x_train, x_valid, y_train, y_valid = train_test_split( x, y, random_state=seed, train_size=train_size ) model = _create_model(input_shape=input_shape, classes=len(domain)) model.compile( optimizer=keras.optimizers.SGD(decay=1e-5, nesterov=True, momentum=0.9), loss=keras.losses.categorical_crossentropy, metrics=["accuracy"], ) sorted_domain = sorted(domain.keys(), key=lambda x: domain[x]) model.fit( x=x_train, y=y_train, validation_data=(x_valid, y_valid), epochs=epochs, batch_size=batch_size, callbacks=[ MLflowLogger( model=model, x_train=x_train, y_train=y_train, x_valid=x_valid, y_valid=y_valid, artifact_path="model", domain=sorted_domain, image_dims=input_shape, ) ], )
def train(image_files, labels, domain, image_width=224, image_height=224, epochs=1, batch_size=16, test_ratio=0.2, seed=None): """ Train VGG16 model on provided image files. This will create a new MLflow run and log all parameters, metrics and the resulting model with MLflow. The resulting model is an instance of KerasImageClassifierPyfunc - a custom python function model that embeds all necessary preprocessing together with the VGG16 Keras model. The resulting model can be applied directly to image base64 encoded image data. :param image_height: Height of the input image in pixels. :param image_width: Width of the input image in pixels. :param image_files: List of image files to be used for training. :param labels: List of labels for the image files. :param domain: Dictionary representing the domain of the reponse. Provides mapping label-name -> label-id. :param epochs: Number of epochs to train the model for. :param batch_size: Batch size used during training. :param test_ratio: Fraction of dataset to be used for validation. This data will not be used during training. :param seed: Random seed. Used e.g. when splitting the dataset into train / validation. """ assert len(set(labels)) == len(domain) input_shape = (image_width, image_height, 3) #mlflow.set_tracking_uri('http://mlflow-tracking-host:port') # This will create and set the experiment mlflow.set_experiment(str(int(time.time()))[2:] + 'flower-v1') with mlflow.start_run() as run: mlflow.log_param("epochs", str(epochs)) mlflow.log_param("batch_size", str(batch_size)) mlflow.log_param("validation_ratio", str(test_ratio)) if seed: mlflow.log_param("seed", str(seed)) def _read_image(filename): with open(filename, "rb") as f: return f.read() with tf.Graph().as_default() as g: with tf.Session(graph=g).as_default(): dims = input_shape[:2] x = np.array([ decode_and_resize_image(_read_image(x), dims) for x in image_files ]) y = np_utils.to_categorical(np.array(labels), num_classes=len(domain)) train_size = 1 - test_ratio x_train, x_valid, y_train, y_valid = train_test_split( x, y, random_state=seed, train_size=train_size) model = _create_model(input_shape=input_shape, classes=len(domain)) model.compile(optimizer=keras.optimizers.SGD(decay=1e-5, nesterov=True, momentum=.9), loss=keras.losses.categorical_crossentropy, metrics=["accuracy"]) sorted_domain = sorted(domain.keys(), key=lambda x: domain[x]) slack_update = SlackUpdate( channel='#slack-after-dark', slack_webhook_url='https://hooks.slack.com/services/T/B/G') history = model.fit(x=x_train, y=y_train, validation_data=(x_valid, y_valid), epochs=epochs, batch_size=batch_size, callbacks=[ MLflowLogger(model=model, x_train=x_train, y_train=y_train, x_valid=x_valid, y_valid=y_valid, artifact_path="model", domain=sorted_domain, image_dims=input_shape), slack_update ]) # From the following: https://keras.io/visualization/ # Plot training & validation accuracy values plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('Model accuracy') plt.ylabel('Accuracy') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() plt.savefig('training_accuracy.png') # Plot training & validation loss values plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Train', 'Test'], loc='upper left') plt.show() # plot_history(history.history) plt.savefig('training_loss.png')