def train(cfg: Config, tub_paths: str, model: str, model_type: str) -> \ tf.keras.callbacks.History: """ Train the model """ model_name, model_ext = os.path.splitext(model) is_tflite = model_ext == '.tflite' if is_tflite: model = f'{model_name}.h5' if not model_type: model_type = cfg.DEFAULT_MODEL_TYPE tubs = tub_paths.split(',') all_tub_paths = [os.path.expanduser(tub) for tub in tubs] output_path = os.path.expanduser(model) train_type = 'linear' if 'linear' in model_type else model_type kl = get_model_by_type(train_type, cfg) if cfg.PRINT_MODEL_SUMMARY: print(kl.model.summary()) dataset = TubDataset(cfg, all_tub_paths) training_records, validation_records = dataset.train_test_split() print(f'Records # Training {len(training_records)}') print(f'Records # Validation {len(validation_records)}') training_pipe = BatchSequence(kl, cfg, training_records, is_train=True) validation_pipe = BatchSequence(kl, cfg, validation_records, is_train=False) dataset_train = training_pipe.create_tf_data().prefetch( tf.data.experimental.AUTOTUNE) dataset_validate = validation_pipe.create_tf_data().prefetch( tf.data.experimental.AUTOTUNE) train_size = len(training_pipe) val_size = len(validation_pipe) assert val_size > 0, "Not enough validation data, decrease the batch " \ "size or add more data." history = kl.train(model_path=output_path, train_data=dataset_train, train_steps=train_size, batch_size=cfg.BATCH_SIZE, validation_data=dataset_validate, validation_steps=val_size, epochs=cfg.MAX_EPOCHS, verbose=cfg.VERBOSE_TRAIN, min_delta=cfg.MIN_DELTA, patience=cfg.EARLY_STOP_PATIENCE) if is_tflite: tf_lite_model_path = f'{os.path.splitext(output_path)[0]}.tflite' keras_model_to_tflite(output_path, tf_lite_model_path) return history
def main(): args = docopt(__doc__) cfg = donkeycar.load_config() tubs = args['--tubs'] model = args['--model'] model_type = args['--type'] print(model_type) model_name, model_ext = os.path.splitext(model) is_tflite = model_ext == '.tflite' if is_tflite: model = f'{model_name}.h5' if not model_type: model_type = cfg.DEFAULT_MODEL_TYPE tubs = tubs.split(',') data_paths = [ Path(os.path.expanduser(tub)).absolute().as_posix() for tub in tubs ] output_path = os.path.expanduser(model) history = train(cfg, data_paths, output_path, model_type) if is_tflite: tflite_model_path = f'{os.path.splitext(output_path)[0]}.tflite' keras_model_to_tflite(output_path, tflite_model_path)
def train(cfg: Config, tub_paths: str, model: str = None, model_type: str = None, transfer: str = None, comment: str = None) \ -> tf.keras.callbacks.History: """ Train the model """ database = PilotDatabase(cfg) model_name, model_num, train_type, is_tflite = \ get_model_train_details(cfg, database, model, model_type) output_path = os.path.join(cfg.MODELS_PATH, model_name + '.h5') kl = get_model_by_type(train_type, cfg) if transfer: kl.load(transfer) if cfg.PRINT_MODEL_SUMMARY: print(kl.model.summary()) tubs = tub_paths.split(',') all_tub_paths = [os.path.expanduser(tub) for tub in tubs] dataset = TubDataset(cfg, all_tub_paths) training_records, validation_records = dataset.train_test_split() print(f'Records # Training {len(training_records)}') print(f'Records # Validation {len(validation_records)}') training_pipe = BatchSequence(kl, cfg, training_records, is_train=True) validation_pipe = BatchSequence(kl, cfg, validation_records, is_train=False) dataset_train = training_pipe.create_tf_data().prefetch( tf.data.experimental.AUTOTUNE) dataset_validate = validation_pipe.create_tf_data().prefetch( tf.data.experimental.AUTOTUNE) train_size = len(training_pipe) val_size = len(validation_pipe) assert val_size > 0, "Not enough validation data, decrease the batch " \ "size or add more data." history = kl.train(model_path=output_path, train_data=dataset_train, train_steps=train_size, batch_size=cfg.BATCH_SIZE, validation_data=dataset_validate, validation_steps=val_size, epochs=cfg.MAX_EPOCHS, verbose=cfg.VERBOSE_TRAIN, min_delta=cfg.MIN_DELTA, patience=cfg.EARLY_STOP_PATIENCE, show_plot=cfg.SHOW_PLOT) if is_tflite: tf_lite_model_path = f'{os.path.splitext(output_path)[0]}.tflite' keras_model_to_tflite(output_path, tf_lite_model_path) database_entry = { 'Number': model_num, 'Name': model_name, 'Type': str(kl), 'Tubs': tub_paths, 'Time': time(), 'History': history.history, 'Transfer': os.path.basename(transfer) if transfer else None, 'Comment': comment, 'Config': str(cfg) } database.add_entry(database_entry) database.write() return history
def go_train(kl, cfg, train_gen, val_gen, gen_records, model_name, steps_per_epoch, val_steps, continuous, verbose, save_best=None): start = time.time() model_path = os.path.expanduser(model_name) send_model_cb = on_best_model if cfg.SEND_BEST_MODEL_TO_PI else None # checkpoint to save model after each epoch and send best to the pi. if save_best is None: save_best = MyCPCallback(send_model_cb=send_model_cb, filepath=model_path, monitor='val_loss', verbose=verbose, save_best_only=True, mode='min', cfg=cfg) #stop training if the validation error stops improving. early_stop = keras.callbacks.EarlyStopping( monitor='val_loss', min_delta=cfg.MIN_DELTA, patience=cfg.EARLY_STOP_PATIENCE, verbose=verbose, mode='auto') if steps_per_epoch < 2: raise Exception( "Too little data to train. Please record more records.") if continuous: epochs = 100000 else: epochs = cfg.MAX_EPOCHS workers_count = 1 use_multiprocessing = False callbacks_list = [save_best] if cfg.USE_EARLY_STOP and not continuous: callbacks_list.append(early_stop) history = kl.model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=cfg.VERBOSE_TRAIN, validation_data=val_gen, callbacks=callbacks_list, validation_steps=val_steps, workers=workers_count, use_multiprocessing=use_multiprocessing) full_model_val_loss = min(history.history['val_loss']) max_val_loss = full_model_val_loss + cfg.PRUNE_VAL_LOSS_DEGRADATION_LIMIT duration_train = time.time() - start print("Training completed in %s." % str(datetime.timedelta(seconds=round(duration_train)))) print("\n\n----------- Best Eval Loss :%f ---------" % save_best.best) if cfg.SHOW_PLOT: try: if do_plot: plt.figure(1) # Only do accuracy if we have that data (e.g. categorical outputs) if 'angle_out_acc' in history.history: plt.subplot(121) # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'validate'], loc='upper right') # summarize history for acc if 'angle_out_acc' in history.history: plt.subplot(122) plt.plot(history.history['angle_out_acc']) plt.plot(history.history['val_angle_out_acc']) plt.title('model angle accuracy') plt.ylabel('acc') plt.xlabel('epoch') #plt.legend(['train', 'validate'], loc='upper left') plt.savefig(model_path + '_loss_acc_%f.%s' % (save_best.best, figure_format)) plt.show() else: print("not saving loss graph because matplotlib not set up.") except Exception as ex: print("problems with loss graph: {}".format(ex)) #Save tflite, optionally in the int quant format for Coral TPU if "tflite" in cfg.model_type: print("\n\n--------- Saving TFLite Model ---------") tflite_fnm = model_path.replace(".h5", ".tflite") assert (".tflite" in tflite_fnm) prepare_for_coral = "coral" in cfg.model_type if prepare_for_coral: #compile a list of records to calibrate the quantization data_list = [] max_items = 1000 for key, _record in gen_records.items(): data_list.append(_record) if len(data_list) == max_items: break stride = 1 num_calibration_steps = len(data_list) // stride #a generator function to help train the quantizer with the expected range of data from inputs def representative_dataset_gen(): start = 0 end = stride for _ in range(num_calibration_steps): batch_data = data_list[start:end] inputs = [] for record in batch_data: filename = record['image_path'] img_arr = load_scaled_image_arr(filename, cfg) inputs.append(img_arr) start += stride end += stride # Get sample input data as a numpy array in a method of your choosing. yield [ np.array(inputs, dtype=np.float32).reshape( stride, cfg.TARGET_H, cfg.TARGET_W, cfg.TARGET_D) ] else: representative_dataset_gen = None from donkeycar.parts.tflite import keras_model_to_tflite keras_model_to_tflite(model_path, tflite_fnm, representative_dataset_gen) print("Saved TFLite model:", tflite_fnm) if prepare_for_coral: print("compile for Coral w: edgetpu_compiler", tflite_fnm) os.system("edgetpu_compiler " + tflite_fnm) #Save tensorrt if "tensorrt" in cfg.model_type: print("\n\n--------- Saving TensorRT Model ---------")
def go_train(kl, cfg, train_gen, val_gen, gen_records, model_name, steps_per_epoch, val_steps, continuous, verbose, save_best=None): start = time.time() model_path = os.path.expanduser(model_name) #checkpoint to save model after each epoch and send best to the pi. if save_best is None: save_best = MyCPCallback(send_model_cb=on_best_model, filepath=model_path, monitor='val_loss', verbose=verbose, save_best_only=True, mode='min', cfg=cfg) #stop training if the validation error stops improving. early_stop = keras.callbacks.EarlyStopping( monitor='val_loss', min_delta=cfg.MIN_DELTA, patience=cfg.EARLY_STOP_PATIENCE, verbose=verbose, mode='auto') if steps_per_epoch < 2: raise Exception( "Too little data to train. Please record more records.") if continuous: epochs = 100000 else: epochs = cfg.MAX_EPOCHS workers_count = 1 use_multiprocessing = False callbacks_list = [save_best] if cfg.USE_EARLY_STOP and not continuous: callbacks_list.append(early_stop) history = kl.model.fit_generator(train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=cfg.VEBOSE_TRAIN, validation_data=val_gen, callbacks=callbacks_list, validation_steps=val_steps, workers=workers_count, use_multiprocessing=use_multiprocessing) full_model_val_loss = min(history.history['val_loss']) max_val_loss = full_model_val_loss + cfg.PRUNE_VAL_LOSS_DEGRADATION_LIMIT duration_train = time.time() - start print("Training completed in %s." % str(datetime.timedelta(seconds=round(duration_train)))) print("\n\n----------- Best Eval Loss :%f ---------" % save_best.best) if cfg.SHOW_PLOT: try: if do_plot: plt.figure(1) # Only do accuracy if we have that data (e.g. categorical outputs) if 'angle_out_acc' in history.history: plt.subplot(121) # summarize history for loss plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'validate'], loc='upper right') # summarize history for acc if 'angle_out_acc' in history.history: plt.subplot(122) plt.plot(history.history['angle_out_acc']) plt.plot(history.history['val_angle_out_acc']) plt.title('model angle accuracy') plt.ylabel('acc') plt.xlabel('epoch') #plt.legend(['train', 'validate'], loc='upper left') plt.savefig(model_path + '_loss_acc_%f.png' % save_best.best) plt.show() else: print("not saving loss graph because matplotlib not set up.") except Exception as ex: print("problems with loss graph: {}".format(ex)) #Save tflite, optionally in the int quant format for Coral TPU if "tflite" in cfg.model_type: print("\n\n--------- Saving TFLite Model ---------") tflite_fnm = model_path.replace(".h5", ".tflite") assert (".tflite" in tflite_fnm) prepare_for_coral = "coral" in cfg.model_type if prepare_for_coral: #compile a list of records to calibrate the quantization data_list = [] max_items = 1000 for key, _record in gen_records.items(): data_list.append(_record) if len(data_list) == max_items: break stride = 1 num_calibration_steps = len(data_list) // stride #a generator function to help train the quantizer with the expected range of data from inputs def representative_dataset_gen(): start = 0 end = stride for _ in range(num_calibration_steps): batch_data = data_list[start:end] inputs = [] for record in batch_data: filename = record['image_path'] img_arr = load_scaled_image_arr(filename, cfg) inputs.append(img_arr) start += stride end += stride # Get sample input data as a numpy array in a method of your choosing. yield [ np.array(inputs).reshape(stride, cfg.TARGET_H, cfg.TARGET_W, cfg.TARGET_D) ] else: representative_dataset_gen = None from donkeycar.parts.tflite import keras_model_to_tflite keras_model_to_tflite(model_path, tflite_fnm, representative_dataset_gen) print("Saved TFLite model:", tflite_fnm) if prepare_for_coral: print("compile for Coral w: edgetpu_compiler", tflite_fnm) os.system("edgetpu_compiler " + tflite_fnm) #Save tensorrt if "tensorrt" in cfg.model_type: print("\n\n--------- Saving TensorRT Model ---------") # TODO RAHUL # flatten model_path # convert to uff # print("Saved TensorRT model:", uff_filename) if cfg.PRUNE_CNN: base_model_path = splitext(model_name)[0] cnn_channels = get_total_channels(kl.model) print('original model with {} channels'.format(cnn_channels)) prune_gen = SequencePredictionGenerator(gen_records, cfg) target_channels = int(cnn_channels * (1 - (float(cfg.PRUNE_PERCENT_TARGET) / 100.0))) print( 'Target channels of {0} remaining with {1:.00%} percent removal per iteration' .format(target_channels, cfg.PRUNE_PERCENT_PER_ITERATION / 100)) from keras.models import load_model prune_loss = 0 while cnn_channels > target_channels: save_best.reset_best() model, channels_deleted = prune(kl.model, prune_gen, 1, cfg) cnn_channels -= channels_deleted kl.model = model kl.compile() kl.model.summary() #stop training if the validation error stops improving. early_stop = keras.callbacks.EarlyStopping( monitor='val_loss', min_delta=cfg.MIN_DELTA, patience=cfg.EARLY_STOP_PATIENCE, verbose=verbose, mode='auto') history = kl.model.fit_generator( train_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=cfg.VEBOSE_TRAIN, validation_data=val_gen, validation_steps=val_steps, workers=workers_count, callbacks=[early_stop], use_multiprocessing=use_multiprocessing) prune_loss = min(history.history['val_loss']) print('prune val_loss this iteration: {}'.format(prune_loss)) # If loss breaks the threshhold if prune_loss < max_val_loss: model.save('{}_prune_{}_filters.h5'.format( base_model_path, cnn_channels)) else: break print('pruning stopped at {} with a target of {}'.format( cnn_channels, target_channels))
''' Usage: tflite_convert.py --model="mymodel.h5" --out="mymodel.tflite" Note: may require tensorflow > 1.11 or pip install tf-nightly ''' import os from docopt import docopt from donkeycar.parts.tflite import keras_model_to_tflite args = docopt(__doc__) in_model = os.path.expanduser(args['--model']) out_model = os.path.expanduser(args['--out']) keras_model_to_tflite(in_model, out_model)