def fine_tune_model(model_file, weights_file, model_type, image_dir, nb_gpu): # No kruft plz clear_session() # Config height = constants.SIZES['basic'] width = height if nb_gpu <= 1: print("[INFO] training with 1 GPU...") model = init_model(model_file, model_type=model_type, weights_file=weights_file) else: print("[INFO] training with {} GPUs...".format(nb_gpu)) # we'll store a copy of the model on *every* GPU and then combine # the results from the gradient updates on the CPU with tf.device("/cpu:0"): # initialize the model model = init_model(model_file) # make the model parallel model = multi_gpu_model(model, gpus=nb_gpu) # # add dense layer for merged model # from keras import backend as K # x = model.output # predictions = K.bias_add(x, 0) # # predictions = Dense(2)(x) # model = Model(inputs = model.input, outputs=predictions) if not os.path.exists(weights_file): weights_file = "weights.tune.{}.{}.gpu{}.hdf5".format( model_type, height, nb_gpu) # Get all model callbacks callbacks_list = callbacks.make_callbacks(weights_file) print('Compile model') opt = SGD(momentum=.9) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # Get training/validation data via generators train_generator, validation_generator = generators.create_generators(\ height, width, image_dir=image_dir, nb_gpu=nb_gpu) print('Start training!') start = time.time() history = model.fit_generator( train_generator, callbacks=callbacks_list, epochs=constants.TOTAL_EPOCHS, # steps_per_epoch=constants.STEPS_PER_EPOCH, steps_per_epoch=train_generator.samples // (constants.GENERATOR_BATCH_SIZE * nb_gpu), shuffle=True, # having crazy threading issues # set workers to zero if you see an error like: # `freeze_support()` max_queue_size=100, workers=NUM_CPU, use_multiprocessing=True, validation_data=validation_generator, validation_steps=constants.VALIDATION_STEPS) print('Total time:', time.time() - start) # Save it for later print('Saving Model ...') model.save("nsfw.{}x{}.{}.gpu{}.h5".format(width, height, model_type, nb_gpu)) # grab the history object dictionary H = history.history # plot the training loss and accuracy N = np.arange(0, len(H["loss"])) plt.style.use("ggplot") plt.figure() plt.plot(N, H["loss"], label="train_loss") plt.plot(N, H["val_loss"], label="test_loss") plt.plot(N, H["acc"], label="train_acc") plt.plot(N, H["val_acc"], label="test_acc") plt.title("Inception Model on NSFW Data") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend() # save the figure plt.savefig('gpu_{}_tune_lines.jpg'.format(nb_gpu)) plt.close()
# Load checkpoint if one is found if os.path.exists(weights_file): print("loading ", weights_file) model.load_weights(weights_file) # Get all model callbacks callbacks_list = callbacks.make_callbacks(weights_file) print('Compile model') opt = SGD(momentum=.9) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # Get training/validation data via generators train_generator, validation_generator = generators.create_generators( height, width) print('Start training!') history = model.fit_generator( train_generator, callbacks=callbacks_list, epochs=constants.TOTAL_EPOCHS, steps_per_epoch=constants.STEPS_PER_EPOCH, shuffle=True, # having crazy threading issues # set workers to zero if you see an error like: # `freeze_support()` workers=0, use_multiprocessing=True, validation_data=validation_generator, validation_steps=constants.VALIDATION_STEPS)
def train_model(model_type, weights_file, image_dir, batch_size, total_epochs, nb_classes, nb_gpu, output_filename=None): # No kruft plz clear_session() # Config height = constants.SIZES['basic'] width = height if nb_gpu <= 1: print("[INFO] training with 1 GPU...") model = build_model(weights_file, type=model_type, shape=(height, width, 3), nb_output=nb_classes) else: print("[INFO] training with {} GPUs...".format(nb_gpu)) # we'll store a copy of the model on *every* GPU and then combine # the results from the gradient updates on the CPU with tf.device("/cpu:0"): # initialize the model model = build_model(weights_file, type=model_type, shape=(height, width, 3), nb_output=nb_classes) # make the model parallel model = multi_gpu_model(model, gpus=nb_gpu) print(model.summary()) x = model.output print('multi gpu output:', x) x = Flatten()(x) predictions = Dense(nb_output, activation='softmax', name='new_outputs')(x) model = Model(inputs=model.input, outputs=predictions) if not os.path.exists(weights_file): weights_file = "weights.{}.{}.gpu{}.epoch{}.batch{}.cls{}.hdf5".format( model_type, height, nb_gpu, total_epochs, batch_size, nb_classes) # Get all model callbacks callbacks_list = callbacks.make_callbacks(weights_file) print('Compile model') # originally adam, but research says SGD with scheduler # opt = Adam(lr=0.001, amsgrad=True) opt = SGD(momentum=.9) model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) # Get training/validation data via generators if model_type in ['resnet50']: mode = 'caffe' else: mode = 'tf' train_generator, validation_generator = generators.create_generators( \ height, width, image_dir=image_dir, batch_s=batch_size, mode=mode, nb_gpu=nb_gpu) print('Start training!') cpu_count = multiprocessing.cpu_count() start = time.time() history = model.fit_generator( train_generator, callbacks=callbacks_list, epochs=total_epochs, steps_per_epoch=train_generator.samples // (batch_size * nb_gpu), shuffle=True, # having crazy threading issues # set workers to zero if you see an error like: # `freeze_support()` max_queue_size=100, workers=cpu_count, use_multiprocessing=True, validation_data=validation_generator, validation_steps=constants.VALIDATION_STEPS) print('Total time:', time.time() - start) # Save it for later print('Saving Model ...') output = "nsfw.{}x{}.{}.gpu{}.h5".format(width, height, model_type, nb_gpu) if output_filename is not None: output = output_filename model.save(output) # grab the history object dictionary H = history.history # plot the training loss and accuracy N = np.arange(0, len(H["loss"])) plt.style.use("ggplot") plt.figure() plt.plot(N, H["loss"], label="train_loss") plt.plot(N, H["val_loss"], label="test_loss") plt.plot(N, H["acc"], label="train_acc") plt.plot(N, H["val_acc"], label="test_acc") plt.title("Inception Model on NSFW Data") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend() # save the figure plt.savefig('gpu_{}_train_lines.jpg'.format(nb_gpu)) plt.close()