def evaluate_model(args, model, input_shape): # eval data generator eval_datagen = ImageDataGenerator(preprocessing_function=preprocess) eval_generator = eval_datagen.flow_from_directory( args.val_data_path, target_size=input_shape, batch_size=args.batch_size) # get optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=None) # start evaluate model.compile(optimizer=optimizer, metrics=['accuracy', 'top_k_categorical_accuracy'], loss='categorical_crossentropy') print('Evaluate on {} samples, with batch size {}.'.format( eval_generator.samples, args.batch_size)) scores = model.evaluate_generator(eval_generator, steps=eval_generator.samples // args.batch_size, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=1) print('Evaluate loss:', scores[0]) print('Top-1 accuracy:', scores[1]) print('Top-k accuracy:', scores[2])
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) print('classes_path =', classes_path) print('class_names = ', class_names) print('num_classes = ', num_classes) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) print('num_train = ', num_train) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) # recompile multi gpu model model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch ##################################################################################################### epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: if template_model is not None: template_model = sparsity.strip_pruning(template_model) else: model = sparsity.strip_pruning(model) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): log_dir = 'logs/' class_names = get_classes(args.classes_path) num_classes = len(class_names) if args.matchpoint_path: matchpoints = get_matchpoints(args.matchpoint_path) else: matchpoints = None # choose model type if args.tiny: num_channels = 128 #input_size = (192, 192) else: num_channels = 256 #input_size = (256, 256) input_size = args.model_image_size # get train/val dataset train_dataset = hourglass_dataset(args.dataset_path, class_names, input_size=input_size, is_train=True, matchpoints=matchpoints) val_dataset = hourglass_dataset(args.dataset_path, class_names, input_size=input_size, is_train=False) train_gen = train_dataset.generator(args.batch_size, args.num_stacks, sigma=1, is_shuffle=True, rot_flag=True, scale_flag=True, h_flip_flag=True, v_flip_flag=True) model_type = get_model_type(args.num_stacks, args.mobile, args.tiny, input_size) # callbacks for training process tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') eval_callback = EvalCallBack(log_dir, val_dataset, class_names, input_size, model_type) terminate_on_nan = TerminateOnNaN() callbacks = [tensorboard, eval_callback, terminate_on_nan] # prepare optimizer #optimizer = RMSprop(lr=5e-4) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model, doesn't specify input size model = get_hourglass_model(num_classes, args.num_stacks, num_channels, mobile=args.mobile) print( 'Create {} Stacked Hourglass model with stack number {}, channel number {}. train input size {}' .format('Mobile' if args.mobile else '', args.num_stacks, num_channels, input_size)) model.summary() if args.weights_path: model.load_weights(args.weights_path, by_name=True) #, skip_mismatch=True) print('Load weights {}.'.format(args.weights_path)) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) model.compile(optimizer=optimizer, loss=mean_squared_error) # start training model.fit_generator(generator=train_gen, steps_per_epoch=train_dataset.get_dataset_size() // args.batch_size, epochs=args.total_epoch, initial_epoch=args.init_epoch, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5')) return
def main(args): #데이터 annotation 파일 경로 annotation_file = args.annotation_file # 결과 log 및 weight가 저장될 경로 log_dir = os.path.join('logs', '000') #클래스 파일 경로 classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) # anchors 받아오는 라인 anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # 데이터셋 로딩 dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # 모델종류에 따른 data generator 및 모델 생성 if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # 성능향상을 위해 초반 일부 epoch은 Transfer Learning 진행 (Initial Epoch ~ Transfer Epoch) model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) # Transfer Learning 이후 나머지 Epoch에 대하여 학습 진행 (Transfer Epoch ~ Total Epoch) # 이 부분이 필요없거나 학습 시간이 너무 오래 걸릴 경우 Total Epoch을 Transfer와 동일하게 두고, 아래 학습을 진행하지 않고 넘어갈 수 있음 # 본인 컴퓨터 사양에 맞춰서 진행 model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def train(args, model, input_shape, strategy): log_dir = os.path.join('logs', '000') # callbacks for training process checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-val_loss{val_loss:.3f}-val_accuracy{val_accuracy:.3f}-val_top_k_categorical_accuracy{val_top_k_categorical_accuracy:.3f}.h5' ), monitor='val_accuracy', mode='max', verbose=1, save_weights_only=False, save_best_only=True, period=1) logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') terminate_on_nan = TerminateOnNaN() learn_rates = [0.05, 0.01, 0.005, 0.001, 0.0005] lr_scheduler = LearningRateScheduler( lambda epoch: learn_rates[epoch // 30]) checkpoint_clean = CheckpointCleanCallBack(log_dir, max_val_keep=3) callbacks = [ logging, checkpoint, lr_scheduler, terminate_on_nan, checkpoint_clean ] # data generator train_datagen = ImageDataGenerator( preprocessing_function=preprocess, #featurewise_center=False, #samplewise_center=False, #featurewise_std_normalization=False, #samplewise_std_normalization=False, #zca_whitening=False, #zca_epsilon=1e-06, zoom_range=0.25, brightness_range=[0.5, 1.5], channel_shift_range=0.1, shear_range=0.2, rotation_range=30, width_shift_range=0.05, height_shift_range=0.05, vertical_flip=True, horizontal_flip=True, #rescale=1./255, #validation_split=0.1, fill_mode='constant', cval=0., data_format=None, dtype=None) test_datagen = ImageDataGenerator(preprocessing_function=preprocess) train_generator = train_datagen.flow_from_directory( args.train_data_path, target_size=input_shape, batch_size=args.batch_size, color_mode='rgb', classes=None, class_mode='categorical', shuffle=True, #save_to_dir='check', #save_prefix='augmented_', #save_format='jpg', interpolation='nearest') test_generator = test_datagen.flow_from_directory( args.val_data_path, target_size=input_shape, batch_size=args.batch_size, color_mode='rgb', classes=None, class_mode='categorical', shuffle=True, #save_to_dir='check', #save_prefix='augmented_', #save_format='jpg', interpolation='nearest') # get optimizer if args.decay_type: callbacks.remove(lr_scheduler) steps_per_epoch = max(1, train_generator.samples // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=args.decay_type, decay_steps=decay_steps) # get loss losses = CategoricalCrossentropy(label_smoothing=args.label_smoothing) # model compile if strategy: with strategy.scope(): model.compile(optimizer=optimizer, metrics=['accuracy', 'top_k_categorical_accuracy'], loss=losses) else: model.compile(optimizer=optimizer, metrics=['accuracy', 'top_k_categorical_accuracy'], loss=losses) # start training print('Train on {} samples, val on {} samples, with batch size {}.'.format( train_generator.samples, test_generator.samples, args.batch_size)) model.fit_generator( train_generator, steps_per_epoch=train_generator.samples // args.batch_size, epochs=args.total_epoch, workers=cpu_count() - 1, #Try to parallized feeding image data but leave one cpu core idle initial_epoch=args.init_epoch, use_multiprocessing=True, max_queue_size=10, validation_data=test_generator, validation_steps=test_generator.samples // args.batch_size, callbacks=callbacks) # Finally store model model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if args.model_type.startswith( 'scaled_yolo4_') or args.model_type.startswith('yolo5_'): # Scaled-YOLOv4 & YOLOv5 entrance, use yolo5 submodule but now still yolo3 data generator # TODO: create new yolo5 data generator to apply YOLOv5 anchor assignment get_train_model = get_yolo5_train_model data_generator = yolo5_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo5DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo5DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith('yolo3_') or args.model_type.startswith( 'yolo4_'): #if num_anchors == 9: # YOLOv3 & v4 entrance, use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif args.model_type.startswith( 'tiny_yolo3_') or args.model_type.startswith('tiny_yolo4_'): #elif num_anchors == 6: # Tiny YOLOv3 & v4 entrance, use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif args.model_type.startswith('yolo2_') or args.model_type.startswith( 'tiny_yolo2_'): #elif num_anchors == 5: # YOLOv2 & Tiny YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported model type') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) else: # get normal train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type or args.average_type: # rebuild optimizer to apply learning rate decay or weights averager, # only after unfreeze all layers if args.decay_type: callbacks.remove(reduce_lr) if args.average_type == 'ema' or args.average_type == 'swa': # weights averager need tensorflow-addons, # which request TF 2.x and have version compatibility import tensorflow_addons as tfa callbacks.remove(checkpoint) avg_checkpoint = tfa.callbacks.AverageModelCheckpoint( filepath=os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), update_weights=True, monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1) callbacks.append(avg_checkpoint) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=args.average_type, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, multi_anchor_assign=args.multi_anchor_assign), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator( dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): annotation_file = args.annotation_file classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) log_dir_path = args.log_directory try: log_dir = os.path.join('logs', log_dir_path) except TypeError: date_now = datetime.now() log_dir_folder_name = f'{date_now.strftime("%Y_%m_%d_%H%M%S")}_{args.model_type}_TransferEp_{args.transfer_epoch}_TotalEP_{args.total_epoch}' log_dir = os.path.realpath(os.path.join( 'logs', log_dir_folder_name )) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # How many percentage of layers to unfreeze in fine tuning unfreeze_level = args.unfreeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint( filepath=log_dir + os.sep + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', mode='min', verbose=1, save_weights_only=False, save_best_only=True, period=1 ) reduce_lr = ReduceLROnPlateau( monitor='val_loss', factor=0.5, mode='min', patience=10, verbose=1, cooldown=0, min_lr=1e-10 ) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1, mode='min') terminate_on_nan = TerminateOnNaN() callbacks = [logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 # Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'model_image_size should be multiples of 32' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval, args.multi_anchor_assign) # val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes, multi_anchor_assign=args.multi_anchor_assign) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator # train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) # val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( model_type=args.model_type, annotation_lines=dataset[num_train:], anchors=anchors, class_names=class_names, model_image_size=args.model_image_size, model_pruning=args.model_pruning, log_dir=log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint, elim_grid_sense=args.elim_grid_sense ) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype(np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0)] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list = ["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) else: # get normal train model model = get_train_model( model_type=args.model_type, anchors=anchors, num_classes=num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, elim_grid_sense=args.elim_grid_sense, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step ) if args.show_history: model.summary() layers_count = len(model.layers) print(f'Total layers: {layers_count}') # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) # model.fit_generator(train_data_generator, """ Transfer training steps, train with freeze layers """ model.fit( data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body fine_tune_layers = int(layers_count * unfreeze_level) print(f"Unfreeze {unfreeze_level * 100}% of layers and continue training, to fine-tune.") print(f"Unfroze {fine_tune_layers} layers of {layers_count}") if args.gpu_num >= 2: with strategy.scope(): for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change else: for i in range(layers_count - fine_tune_layers, layers_count): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, input_shape)) """ Fine-tuning steps, more memory will be used. LR (Learning Rate) will be decayed """ # model.fit_generator(train_data_generator, model.fit( # The YOLO data augmentation generator tool data_generator( annotation_lines=dataset[:num_train], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, enhance_augment=args.enhance_augment, rescale_interval=rescale_interval, multi_anchor_assign=args.multi_anchor_assign ), steps_per_epoch=max(1, num_train // args.batch_size), # validation_data=val_data_generator, # Validation generator validation_data=data_generator( annotation_lines=dataset[num_train:], batch_size=args.batch_size, input_shape=input_shape, anchors=anchors, num_classes=num_classes, multi_anchor_assign=args.multi_anchor_assign ), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, # verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks ) # Finally store model if args.model_pruning: model = sparsity.strip_pruning(model) model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): model_type = "yolo3_darknet_spp" # yolo3_darknet_spp, yolo3_darknet current_dir = os.path.dirname(__file__) + "/" print("current_dir == ", current_dir) annotation_file = current_dir + "sample/trainval/train.txt" val_annotation_file = current_dir + "sample/trainval/val.txt" classes_path = current_dir + "sample/trainval/train_classes.txt" anchors_path = current_dir + "sample/trainval/yolo_anchors.txt" weights_path = current_dir + "weights/yolov3-spp.h5" load_weights_path = None # None or "{weights path}" is_one_stage_train = True learning_rate_1 = 1e-4 learning_rate_2 = 1e-5 epoch_1 = args.max_epochs_1 epoch_2 = args.max_epochs_2 batch_size_1 = args.batch_size_1 batch_size_2 = args.batch_size_2 freeze_level = 2 model_image_size = (416, 416) val_split = 0.1 label_smoothing = 0 enhance_augment = None # enhance data augmentation type (None/mosaic) rescale_interval = 0 # Number of iteration(batches) interval to rescale input size, default=10 log_dir = os.path.join('logs', '20200602') class_names = get_classes(classes_path) num_classes = len(class_names) anchors = get_anchors(anchors_path) logging = TensorBoard(log_dir=log_dir, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', verbose=1, save_weights_only=True, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) # terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, ModertFileToObs(log_dir, args) ] # callbacks = [logging, checkpoint, reduce_lr] # get train&val dataset dataset = get_dataset(annotation_file) dataset = [current_dir + d for d in dataset] if val_annotation_file != "": val_dataset = get_dataset(val_annotation_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # num_val = 100 # num_train = 200 # model input shape check input_shape = model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # get train model model = get_train_model(model_type, anchors, num_classes, input_shape, weights_path=weights_path, freeze_level=freeze_level, label_smoothing=label_smoothing) if load_weights_path: model.load_weights(load_weights_path) print("reload weights: {}".format(load_weights_path)) if is_one_stage_train: model.compile(optimizer=get_optimizer(learning_rate_1), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) print( 'One stage Train on {} samples, val on {} samples, with batch size {}, ' 'input_shape {}.'.format(num_train, num_val, batch_size_1, input_shape)) model.fit_generator( data_generator(dataset[:num_train], batch_size_1, input_shape, anchors, num_classes, enhance_augment), steps_per_epoch=max(1, num_train // batch_size_1), validation_data=data_generator(dataset[:num_val], batch_size_1, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size_1), epochs=epoch_1, initial_epoch=0, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) model.save_weights(os.path.join(log_dir, 'trained_weights_stage_1.h5')) if True: print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=get_optimizer(learning_rate_2), loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) print( 'Two stage Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, batch_size_2, input_shape)) model.fit_generator(data_generator(dataset[:num_train], batch_size_2, input_shape, anchors, num_classes, enhance_augment, rescale_interval), steps_per_epoch=max(1, num_train // batch_size_2), validation_data=data_generator( dataset[:num_val], batch_size_2, input_shape, anchors, num_classes), validation_steps=max(1, num_val // batch_size_2), epochs=epoch_2, initial_epoch=epoch_1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) model.save_weights(os.path.join(log_dir, 'trained_weights_final.h5')) gen_model_dir(log_dir, args, classes_path, anchors_path)
def main(): parser = argparse.ArgumentParser( description='train a simple CNN classifier with PyTorch') log_dir = os.path.join('logs', '000') # Model definition options parser.add_argument( '--model_type', type=str, required=False, default='mobilenetv2', help= 'backbone model type: mobilenetv3/v2/simple_cnn, default=%(default)s') parser.add_argument( '--model_input_shape', type=str, required=False, default='224x224', help="model image input shape as <height>x<width>, default=%(default)s" ) parser.add_argument( '--head_conv_channel', type=int, required=False, default=128, help="channel number for head part convolution, default=%(default)s") parser.add_argument('--weights_path', type=str, required=False, default=None, help="Pretrained model/weights file for fine tune") # Data options parser.add_argument('--train_data_path', type=str, required=True, help='path to train image dataset') parser.add_argument('--val_data_path', type=str, required=True, help='path to validation image dataset') # Training settings parser.add_argument('--batch_size', type=int, required=False, default=64, help="batch size for train, default=%(default)s") parser.add_argument( '--optimizer', type=str, required=False, default='adam', choices=['adam', 'rmsprop', 'sgd'], help="optimizer for training (adam/rmsprop/sgd), default=%(default)s") parser.add_argument('--learning_rate', type=float, required=False, default=1e-3, help="Initial learning rate, default=%(default)s") parser.add_argument( '--decay_type', type=str, required=False, default=None, choices=[None, 'cosine', 'plateau', 'exponential', 'step'], help="Learning rate decay type, default=%(default)s") parser.add_argument('--weight_decay', type=float, required=False, default=5e-4, help="Weight decay for optimizer, default=%(default)s") parser.add_argument( '--init_epoch', type=int, required=False, default=0, help= "Initial training epochs for fine tune training, default=%(default)s") parser.add_argument( '--transfer_epoch', type=int, required=False, default=5, help= "Transfer training (from Imagenet) stage epochs, default=%(default)s") parser.add_argument('--total_epoch', type=int, required=False, default=100, help="Total training epochs, default=%(default)s") #parser.add_argument('--gpu_num', type=int, required=False, default=1, #help='Number of GPU to use, default=%(default)s') parser.add_argument('--no_cuda', action='store_true', default=False, help='disables CUDA training') args = parser.parse_args() height, width = args.model_input_shape.split('x') args.model_input_shape = (int(height), int(width)) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") torch.manual_seed(1) # prepare train&val dataset loader train_loader = get_dataloader(args.train_data_path, args.model_input_shape, args.batch_size, use_cuda=use_cuda, mode='train') val_loader = get_dataloader(args.val_data_path, args.model_input_shape, args.batch_size, use_cuda=use_cuda, mode='val') # get tensorboard summary writer summary_writer = SummaryWriter(os.path.join(log_dir, 'tensorboard')) # check if classes match on train & val dataset assert train_loader.dataset.classes == val_loader.dataset.classes, 'class mismatch between train & val dataset' num_classes = len(train_loader.dataset.classes) print('Classes:', train_loader.dataset.classes) # get train model model = Classifier(args.model_type, num_classes, args.head_conv_channel).to(device) summary(model, input_size=(3, ) + args.model_input_shape) if args.weights_path: model.load_state_dict( torch.load(args.weights_path, map_location=device)) print('Load weights {}.'.format(args.weights_path)) optimizer = get_optimizer(args.optimizer, model, args.learning_rate, args.weight_decay) # Freeze feature extractor part for transfer learning print('Freeze feature extractor part.') for child in model.features.children(): for param in child.parameters(): param.requires_grad = False # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = args.init_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(len(train_loader.dataset), len(val_loader.dataset), args.batch_size, args.model_input_shape)) # Transfer train loop for epoch in range(initial_epoch, epochs): print('Epoch %d/%d' % (epoch, epochs)) train(args, epoch, model, device, train_loader, optimizer, None, summary_writer) validate(args, epoch, epoch * len(train_loader), model, device, val_loader, log_dir, summary_writer) checkpoint_clean(log_dir, max_keep=5) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for child in model.children(): for param in child.parameters(): param.requires_grad = True # apply learning rate decay only after unfreeze all layers # NOTE: PyTorch apply learning rate scheduler for every epoch, not batch #steps_per_epoch = max(1, len(train_loader.dataset)//args.batch_size) #decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) decay_steps = args.total_epoch - args.init_epoch - args.transfer_epoch lr_scheduler = get_lr_scheduler(args.decay_type, optimizer, decay_steps) # Fine tune train loop for epoch in range(epochs, args.total_epoch): print('Epoch %d/%d' % (epoch, args.total_epoch)) train(args, epoch, model, device, train_loader, optimizer, lr_scheduler, summary_writer) validate(args, epoch, epoch * len(train_loader), model, device, val_loader, log_dir, summary_writer) checkpoint_clean(log_dir, max_keep=5) # Finally store model torch.save(model, os.path.join(log_dir, 'trained_final.pth'))
def main(args): log_dir = 'logs/000/' # get class info, add background class to match model & GT class_names = get_classes(args.classes_path) assert len(class_names) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names num_classes = len(class_names) # callbacks for training process monitor = 'Jaccard' tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join(log_dir, 'ep{epoch:03d}-loss{loss:.3f}-Jaccard{Jaccard:.3f}-val_loss{val_loss:.3f}-val_Jaccard{val_Jaccard:.3f}.h5'), monitor='val_{}'.format(monitor), mode='max', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_{}'.format(monitor), factor=0.5, mode='max', patience=5, verbose=1, cooldown=0, min_lr=1e-6) early_stopping = EarlyStopping(monitor='val_{}'.format(monitor), min_delta=0, patience=100, verbose=1, mode='max') terminate_on_nan = TerminateOnNaN() callbacks=[tensorboard, checkpoint, reduce_lr, early_stopping, terminate_on_nan] # get train&val dataset dataset = get_data_list(args.dataset_file) if args.val_dataset_file: val_dataset = get_data_list(args.val_dataset_file) num_train = len(dataset) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset)*val_split) num_train = len(dataset) - num_val # prepare train&val data generator train_generator = SegmentationGenerator(args.dataset_path, dataset[:num_train], args.batch_size, num_classes, resize_shape=args.model_input_shape[::-1], crop_shape=None, weighted_type=args.weighted_type, augment=True, do_ahisteq=False) valid_generator = SegmentationGenerator(args.dataset_path, dataset[num_train:], args.batch_size, num_classes, resize_shape=args.model_input_shape[::-1], crop_shape=None, weighted_type=args.weighted_type, augment=False, do_ahisteq=False) # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack(args.dataset_path, dataset[num_train:], class_names, args.model_input_shape, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare optimizer #optimizer = Adam(lr=7e-4, epsilon=1e-8, decay=1e-6) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # prepare loss according to loss type & weigted type if args.weighted_type == 'balanced': classes_weights_path = os.path.join(args.dataset_path, 'classes_weights.txt') if os.path.isfile(classes_weights_path): weights = load_class_weights(classes_weights_path) else: weights = calculate_weigths_labels(train_generator, num_classes, save_path=args.dataset_path) losses = WeightedSparseCategoricalCrossEntropy(weights) sample_weight_mode = None elif args.weighted_type == 'adaptive': losses = sparse_crossentropy sample_weight_mode = 'temporal' elif args.weighted_type == None: losses = sparse_crossentropy sample_weight_mode = None else: raise ValueError('invalid weighted_type {}'.format(args.weighted_type)) if args.loss == 'focal': warnings.warn("Focal loss doesn't support weighted class balance, will ignore related config") losses = softmax_focal_loss sample_weight_mode = None elif args.loss == 'crossentropy': # using crossentropy will keep the weigted type setting pass else: raise ValueError('invalid loss type {}'.format(args.loss)) # prepare metric #metrics = {'pred_mask' : [Jaccard, sparse_accuracy_ignoring_last_label]} metrics = {'pred_mask' : Jaccard} # support multi-gpu training if args.gpu_num >= 2: # devices_list=["/gpu:0", "/gpu:1"] devices_list=["/gpu:{}".format(n) for n in range(args.gpu_num)] strategy = tf.distribute.MirroredStrategy(devices=devices_list) print ('Number of devices: {}'.format(strategy.num_replicas_in_sync)) with strategy.scope(): # get multi-gpu train model model = get_deeplabv3p_model(args.model_type, num_classes, args.model_input_shape, args.output_stride, args.freeze_level, weights_path=args.weights_path) # compile model model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) else: # get normal train model model = get_deeplabv3p_model(args.model_type, num_classes, args.model_input_shape, args.output_stride, args.freeze_level, weights_path=args.weights_path) # compile model model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, args.model_input_shape)) model.fit_generator(generator=train_generator, steps_per_epoch=len(train_generator), validation_data=valid_generator, validation_steps=len(valid_generator), epochs=epochs, initial_epoch=initial_epoch, verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks = callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, len(train_generator)) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") if args.gpu_num >= 2: with strategy.scope(): for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) # recompile to apply the change else: for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, sample_weight_mode=sample_weight_mode, loss = losses, metrics = metrics) # recompile to apply the change print('Train on {} samples, val on {} samples, with batch size {}, input_shape {}.'.format(num_train, num_val, args.batch_size, args.model_input_shape)) model.fit_generator(generator=train_generator, steps_per_epoch=len(train_generator), validation_data=valid_generator, validation_steps=len(valid_generator), epochs=args.total_epoch, initial_epoch=epochs, verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks = callbacks) # Finally store model model.save(os.path.join(log_dir, 'trained_final.h5'))
def main(args): log_dir = 'logs/000' # get class info if args.classes_path: class_names = get_classes(args.classes_path) else: class_names = None # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-acc{acc:.3f}-val_loss{val_loss:.3f}-val_acc{val_acc:.3f}.h5' ), monitor='val_acc', mode='max', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_acc', mode='max', factor=0.5, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_acc', mode='max', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() checkpoint_clean = CheckpointCleanCallBack(log_dir, max_keep=5) #learn_rates = [0.05, 0.01, 0.005, 0.001, 0.0005] #lr_scheduler = LearningRateScheduler(lambda epoch: learn_rates[epoch // 30]) callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan, checkpoint_clean ] # prepare train&val data generator train_generator = get_data_generator(args.train_data_path, args.model_input_shape, args.batch_size, class_names, mode='train') val_generator = get_data_generator(args.val_data_path, args.model_input_shape, args.batch_size, class_names, mode='val') # check if classes match on train & val dataset assert train_generator.class_indices == val_generator.class_indices, 'class mismatch between train & val dataset' if not class_names: class_names = list(train_generator.class_indices.keys()) print('Classes:', class_names) # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=None) # get train model model, backbone_len = get_model(args.model_type, len(class_names), args.model_input_shape, args.head_conv_channel, args.weights_path) model.summary() # Freeze backbone part for transfer learning for i in range(backbone_len): model.layers[i].trainable = False print('Freeze the first {} layers of total {} layers.'.format( backbone_len, len(model.layers))) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(train_generator.samples, val_generator.samples, args.batch_size, args.model_input_shape)) model.fit_generator( train_generator, steps_per_epoch=train_generator.samples // args.batch_size, validation_data=val_generator, validation_steps=val_generator.samples // args.batch_size, epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, train_generator.samples // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, average_type=None, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body for i in range(len(model.layers)): model.layers[i].trainable = True print("Unfreeze and continue training, to fine-tune.") model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator( train_generator, steps_per_epoch=train_generator.samples // args.batch_size, validation_data=val_generator, validation_steps=val_generator.samples // args.batch_size, epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model model.save(os.path.join(log_dir, 'trained_final.h5'))
num_epochs = args.num_epochs batch_size = args.batch_size decay_epochs = args.decay_epochs initial_learning_rate = args.initial_learning_rate end_learning_rate = args.end_learning_rate weight_decay = args.weight_decay warmup_proportion = args.warmup_proportion num_train_steps = int( (train_df.shape[0] * 1 - 1 / num_folds) / batch_size * decay_epochs) num_warmup_steps = int(num_train_steps * warmup_proportion) dropout_rate = args.dropout_rate rnn_units = args.rnn_units num_hidden_states = args.num_hidden_states optimizer = model_utils.get_optimizer(initial_learning_rate, end_learning_rate, weight_decay, num_train_steps, num_warmup_steps) loss_fn = model_utils.get_loss_function(from_logits=False) transformer.Model.NUM_HIDDEN_STATES = num_hidden_states transformer.Model.DROPOUT_RATE = dropout_rate transformer.Model.RNN_UNITS = rnn_units config = dataset.Config.from_pretrained(dataset.PATH, output_hidden_states=True) model = transformer.Model.from_pretrained(dataset.PATH, config=config) kfold = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42) for fold_num, (train_idx, valid_idx) in enumerate(