model = SSD300(input_shape, NUM_CLASS) model.load_weights("model_data/ssd_weights.h5", by_name = True, skip_mismatch=True) log = TensorBoard(log_dir = log_path) checkpoint = ModelCheckpoint(log_path + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='val_loss', save_weights_only=True, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=6, verbose=1) batch_size = 16 priors = pickle.load(open('model_data/prior_boxes_ssd300.pkl','rb')) bbox_util = BBoxUtility(NUM_CLASS,priors) gen = Generator(bbox_util, batch_size, lines[:num_train],lines[num_train:],(input_shape[0],input_shape[1]), NUM_CLASS, do_crop=True) if True: model.compile(optimizer=Adam(1e-5),loss=MultiboxLoss(NUM_CLASS,neg_pos_ratio=2.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train/batch_size, validation_data=gen.generate(False), validation_steps=num_val/batch_size, epochs=30, initial_epoch=0, callbacks=[log, checkpoint, reduce_lr, early_stopping])
model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('logs/ep052-loss0.561-val_loss0.968.h5', by_name=True,skip_mismatch=True) # 训练参数设置 logging = tf.keras.callbacks.TensorBoard() checkpoint = tf.keras.callbacks.ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', monitor='loss', save_weights_only=True, save_best_only=True, period=1) reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2, verbose=1) early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0, patience=16, verbose=1) BATCH_SIZE = 12 gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:], (input_shape[0], input_shape[1]),NUM_CLASSES, do_crop=True) if False: model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-4),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=5.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=20, initial_epoch=0, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) if True: model.compile(optimizer=tf.keras.optimizers.Adamax(lr=1e-5),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=250,
Freeze_epoch = 50 BATCH_SIZE = 16 learning_rate_base = 5e-4 gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:], (input_shape[0], input_shape[1]), NUM_CLASSES) epoch_size = num_train // BATCH_SIZE epoch_size_val = num_val // BATCH_SIZE if epoch_size == 0 or epoch_size_val == 0: raise ValueError("数据集过小,无法进行训练,请扩充数据集。") model.compile(optimizer=Adam(lr=learning_rate_base), loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=epoch_size, validation_data=gen.generate(False), validation_steps=epoch_size_val, epochs=Freeze_epoch, initial_epoch=Init_epoch, callbacks=[ logging, checkpoint, reduce_lr, early_stopping, loss_history ]) for i in range(80): model.layers[i].trainable = True if True: Freeze_epoch = 50
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, verbose=1) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=6, verbose=1) BATCH_SIZE = 4 gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:], (input_shape[0], input_shape[1]),NUM_CLASSES) #------------------------------------------------------# # 主干特征提取网络特征通用,冻结训练可以加快训练速度 # 也可以在训练初期防止权值被破坏。、 # 提示OOM或者显存不足请调小Batch_size #------------------------------------------------------# for i in range(21): model.layers[i].trainable = False if True: model.compile(optimizer=Adam(lr=5e-4),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=15, initial_epoch=0, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) if True: model.compile(optimizer=Adam(lr=2e-4),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=30, initial_epoch=15,
# Freeze_Epoch为冻结训练的世代 # Epoch总训练世代 # 提示OOM或者显存不足请调小Batch_size #------------------------------------------------------# for i in range(21): model.layers[i].trainable = False if True: Init_epoch = 0 Freeze_epoch = 50 BATCH_SIZE = 16 learning_rate_base = 5e-4 gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:], (input_shape[0], input_shape[1]),NUM_CLASSES) model.compile(optimizer=Adam(lr=learning_rate_base),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=Freeze_epoch, initial_epoch=Init_epoch, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) for i in range(21): model.layers[i].trainable = True if True: Freeze_epoch = 50 Epoch = 100 BATCH_SIZE = 8 learning_rate_base = 1e-4
else: no_load_key.append(k) model_dict.update(temp_dict) model.load_state_dict(model_dict) #------------------------------------------------------# # 显示没有匹配上的Key #------------------------------------------------------# if local_rank == 0: print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key)) print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key)) print("\n\033[1;33;44m温馨提示,head部分没有载入是正常现象,Backbone部分没有载入是错误的。\033[0m") #----------------------# # 获得损失函数 #----------------------# criterion = MultiboxLoss(num_classes, neg_pos_ratio=3.0) #----------------------# # 记录Loss #----------------------# if local_rank == 0: time_str = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S') log_dir = os.path.join(save_dir, "loss_" + str(time_str)) loss_history = LossHistory(log_dir, model, input_shape=input_shape) else: loss_history = None #------------------------------------------------------------------# # torch 1.2不支持amp,建议使用torch 1.7.1及以上正确使用fp16 # 因此torch1.2这里显示"could not be resolve" #------------------------------------------------------------------# if fp16:
BATCH_SIZE = 16 gen = Generator(bbox_util, BATCH_SIZE, lines[:num_train], lines[num_train:], (input_shape[0], input_shape[1]),NUM_CLASSES) #------------------------------------------------------# # 主干特征提取网络特征通用,冻结训练可以加快训练速度 # 也可以在训练初期防止权值被破坏。 # 提示OOM或者显存不足请调小Batch_size #------------------------------------------------------# for i in range(81): model.layers[i].trainable = False if True: model.compile(optimizer=Adam(lr=5e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0), loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE, validation_data=gen.generate(False), validation_steps=num_val//BATCH_SIZE, epochs=50, initial_epoch=0, callbacks=[logging, checkpoint, reduce_lr, early_stopping]) for i in range(81): model.layers[i].trainable = True if True: model.compile(optimizer=Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) # model.compile(optimizer=SGD(lr=1e-4,momentum=0.9,decay=5e-4),loss=MultiboxLoss(NUM_CLASSES, neg_pos_ratio=3.0).compute_loss) model.fit_generator(gen.generate(True), steps_per_epoch=num_train//BATCH_SIZE,
print('Load weights {}.'.format(model_path)) model.load_weights(model_path, by_name=True, skip_mismatch=True) else: model = SSD300((input_shape[0], input_shape[1], 3), num_classes, weight_decay=weight_decay) if model_path != '': #------------------------------------------------------# # 载入预训练权重 #------------------------------------------------------# print('Load weights {}.'.format(model_path)) model.load_weights(model_path, by_name=True, skip_mismatch=True) multiloss = MultiboxLoss(num_classes, neg_pos_ratio=3.0).compute_loss #---------------------------# # 读取数据集对应的txt #---------------------------# with open(train_annotation_path, encoding='utf-8') as f: train_lines = f.readlines() with open(val_annotation_path, encoding='utf-8') as f: val_lines = f.readlines() num_train = len(train_lines) num_val = len(val_lines) show_config( classes_path = classes_path, model_path = model_path, input_shape = input_shape, \ Init_Epoch = Init_Epoch, Freeze_Epoch = Freeze_Epoch, UnFreeze_Epoch = UnFreeze_Epoch, Freeze_batch_size = Freeze_batch_size, Unfreeze_batch_size = Unfreeze_batch_size, Freeze_Train = Freeze_Train, \ Init_lr = Init_lr, Min_lr = Min_lr, optimizer_type = optimizer_type, momentum = momentum, lr_decay_type = lr_decay_type, \