Exemple #1
0
        val_dataloader = PSPnetDataset(val_lines, input_shape, batch_size,
                                       num_classes, aux_branch, False,
                                       VOCdevkit_path)

        #-------------------------------------------------------------------------------#
        #   训练参数的设置
        #   logging         用于设置tensorboard的保存地址
        #   checkpoint      用于设置权值保存的细节,period用于修改多少epoch保存一次
        #   lr_scheduler       用于设置学习率下降的方式
        #   early_stopping  用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
        #-------------------------------------------------------------------------------#
        time_str = datetime.datetime.strftime(datetime.datetime.now(),
                                              '%Y_%m_%d_%H_%M_%S')
        log_dir = os.path.join(save_dir, "loss_" + str(time_str))
        logging = TensorBoard(log_dir)
        loss_history = LossHistory(log_dir)
        checkpoint = ModelCheckpoint(os.path.join(
            save_dir,
            "ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5"),
                                     monitor='val_loss',
                                     save_weights_only=True,
                                     save_best_only=False,
                                     period=save_period)
        early_stopping = EarlyStopping(monitor='val_loss',
                                       min_delta=0,
                                       patience=10,
                                       verbose=1)
        lr_scheduler = LearningRateScheduler(lr_scheduler_func, verbose=1)
        callbacks = [logging, loss_history, checkpoint, lr_scheduler]

        if start_epoch < end_epoch:
Exemple #2
0
    #-------------------------------------------------------------------------------#
    #   训练参数的设置
    #   logging表示tensorboard的保存地址
    #   checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
    #   reduce_lr用于设置学习率下降的方式
    #   early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
    #-------------------------------------------------------------------------------#
    logging         = TensorBoard(log_dir = 'logs/')
    checkpoint      = ModelCheckpoint('logs/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
                            monitor = 'val_loss', save_weights_only = True, save_best_only = False, period = 1)
    if Cosine_scheduler:
        reduce_lr   = WarmUpCosineDecayScheduler(T_max = 5, eta_min = 1e-5, verbose = 1)
    else:
        reduce_lr   = ExponentDecayScheduler(decay_rate = 0.94, verbose = 1)
    early_stopping  = EarlyStopping(monitor='val_loss', min_delta = 0, patience = 10, verbose = 1)
    loss_history    = LossHistory('logs/')

    #---------------------------#
    #   读取数据集对应的txt
    #---------------------------#
    with open(train_annotation_path) as f:
        train_lines = f.readlines()
    with open(val_annotation_path) as f:
        val_lines   = f.readlines()
    num_train   = len(train_lines)
    num_val     = len(val_lines)

    if Freeze_Train:
        freeze_layers = 249
        for i in range(freeze_layers): model_body.layers[i].trainable = False
        print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
Exemple #3
0
            k: v
            for k, v in pretrained_dict.items()
            if np.shape(model_dict[k]) == np.shape(v)
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    model_train = model.train()
    if Cuda:
        model_train = torch.nn.DataParallel(model)
        cudnn.benchmark = True
        model_train = model_train.cuda()

    yolo_loss = YOLOLoss(anchors, num_classes, input_shape, Cuda, anchors_mask,
                         label_smoothing)
    loss_history = LossHistory("logs/")

    #---------------------------#
    #   读取数据集对应的txt
    #---------------------------#
    with open(train_annotation_path) as f:
        train_lines = f.readlines()
    with open(val_annotation_path) as f:
        val_lines = f.readlines()
    num_train = len(train_lines)
    num_val = len(val_lines)

    #------------------------------------------------------#
    #   主干特征提取网络特征通用,冻结训练可以加快训练速度
    #   也可以在训练初期防止权值被破坏。
    #   Init_Epoch为起始世代
    #   checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
    #   reduce_lr用于设置学习率下降的方式
    #   early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
    #-------------------------------------------------------------------------------#
    logging = TensorBoard(log_dir='logs/')
    checkpoint = ModelCheckpoint('logs/ep{epoch:03d}-loss{loss:.3f}.h5',
                                 monitor='loss',
                                 save_weights_only=True,
                                 save_best_only=False,
                                 period=1)
    reduce_lr = ExponentDecayScheduler(decay_rate=0.96, verbose=1)
    early_stopping = EarlyStopping(monitor='loss',
                                   min_delta=0,
                                   patience=10,
                                   verbose=1)
    loss_history = LossHistory('logs/', val_loss_flag=False)

    if focal_loss:
        if dice_loss:
            loss = dice_loss_with_Focal_Loss(cls_weights)
        else:
            loss = Focal_Loss(cls_weights)
    else:
        if dice_loss:
            loss = dice_loss_with_CE(cls_weights)
        else:
            loss = CE(cls_weights)

    #------------------------------------#
    #   冻结一定部分训练
    #------------------------------------#
Exemple #5
0
    if not pretrained:
        weights_init(model)
    if model_path != '':
        #------------------------------------------------------#
        #   权值文件请看README,百度网盘下载
        #------------------------------------------------------#
        print('Load weights {}.'.format(model_path))
        device          = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        model_dict      = model.state_dict()
        pretrained_dict = torch.load(model_path, map_location = device)
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    yolo_loss    = YOLOLoss(anchors, num_classes, input_shape, Cuda, anchors_mask, label_smoothing)
    loss_history = LossHistory("logs/", model, input_shape=input_shape)
    
    model_train = model.train()
    if Cuda:
        model_train = torch.nn.DataParallel(model)
        cudnn.benchmark = True
        model_train = model_train.cuda()

    #---------------------------#
    #   读取数据集对应的txt
    #---------------------------#
    with open(train_annotation_path, encoding='utf-8') as f:
        train_lines = f.readlines()
    with open(val_annotation_path, encoding='utf-8') as f:
        val_lines   = f.readlines()
    num_train   = len(train_lines)
Exemple #6
0
        if local_rank == 0:
            print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key))
            print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key))
            print("\n\033[1;33;44m温馨提示,head部分没有载入是正常现象,Backbone部分没有载入是错误的。\033[0m")

    #----------------------#
    #   获得损失函数
    #----------------------#
    criterion       = MultiboxLoss(num_classes, neg_pos_ratio=3.0)
    #----------------------#
    #   记录Loss
    #----------------------#
    if local_rank == 0:
        time_str        = datetime.datetime.strftime(datetime.datetime.now(),'%Y_%m_%d_%H_%M_%S')
        log_dir         = os.path.join(save_dir, "loss_" + str(time_str))
        loss_history    = LossHistory(log_dir, model, input_shape=input_shape)
    else:
        loss_history    = None
        
    #------------------------------------------------------------------#
    #   torch 1.2不支持amp,建议使用torch 1.7.1及以上正确使用fp16
    #   因此torch1.2这里显示"could not be resolve"
    #------------------------------------------------------------------#
    if fp16:
        from torch.cuda.amp import GradScaler as GradScaler
        scaler = GradScaler()
    else:
        scaler = None

    model_train     = model.train()
    #----------------------------#
        pretrained_dict = torch.load(model_path, map_location=device)
        pretrained_dict = {
            k: v
            for k, v in pretrained_dict.items()
            if np.shape(model_dict[k]) == np.shape(v)
        }
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

    model_train = model.train()
    if Cuda:
        model_train = torch.nn.DataParallel(model)
        cudnn.benchmark = True
        model_train = model_train.cuda()

    loss_history = LossHistory(os.path.join("logs", backbone), model_train,
                               input_shape)
    #----------------------------------------------------#
    #   驗證集的劃分在train.py代碼里面進行
    #----------------------------------------------------#
    with open(annotation_path, "r") as f:
        lines = f.readlines()
    np.random.seed(10101)
    np.random.shuffle(lines)
    np.random.seed(None)
    num_val = int(len(lines) * val_split)
    num_train = len(lines) - num_val

    #------------------------------------------------------#
    #   訓練分為兩個階段,分別是凍結階段和解凍階段。
    #   顯存不足與數據集大小無關,提示顯存不足請調小batch_size。
    #   受到BatchNorm層影響,batch_size最小為1。
Exemple #8
0
    def fit_model(model,
                  Lr,
                  Batch_size,
                  Init_Epoch,
                  run_Epoch,
                  warmup_proportion=0.1,
                  min_scale=1e-2,
                  max_objects=100):
        # -------------------------------------------------------------------------------#
        #   训练参数的设置
        #   logging表示tensorboard的保存地址
        #   checkpoint用于设置权值保存的细节,period用于修改多少epoch保存一次
        #   reduce_lr用于设置学习率下降的方式
        #   early_stopping用于设定早停,val_loss多次不下降自动结束训练,表示模型基本收敛
        # -------------------------------------------------------------------------------#
        logs = path + '/' + datetime.now().strftime("%Y%m%d-%H%M%S")
        logging = TensorBoard(log_dir=logs, profile_batch=(2, 5))
        loss_history = LossHistory(logs)
        checkpoint = ModelCheckpoint(
            path + '/ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
            monitor='val_loss',
            save_weights_only=True,
            save_best_only=False,
            period=1)
        Epoch = Init_Epoch + run_Epoch
        train_dataloader = OneNetDatasets(lines[:num_train],
                                          input_shape,
                                          Batch_size,
                                          num_classes,
                                          train=True,
                                          max_objects=max_objects)
        val_dataloader = OneNetDatasets(lines[num_train:],
                                        input_shape,
                                        Batch_size,
                                        num_classes,
                                        train=False,
                                        max_objects=max_objects)

        print('Train on {} samples, val on {} samples, with batch size {}.'.
              format(num_train, num_val, Batch_size))
        # gen = Generator(Batch_size, lines[:num_train], lines[num_train:], input_shape, num_classes, max_objects=max_objects)
        optimizer = tfa.optimizers.RectifiedAdam(
            learning_rate=Lr,
            total_steps=num_train // Batch_size * (Epoch - Init_Epoch),
            warmup_proportion=warmup_proportion,
            weight_decay=1e-4,
            min_lr=Lr * min_scale)
        loss_list = {
            'cls': lambda y_true, y_pred: y_pred,
            'loc': lambda y_true, y_pred: y_pred,
            'giou': lambda y_true, y_pred: y_pred
        }
        loss_weights = [2, 5, 2]

        model.compile(loss=loss_list,
                      loss_weights=loss_weights,
                      optimizer=optimizer)

        histogram = model.fit(train_dataloader,
                              steps_per_epoch=num_train // Batch_size,
                              validation_data=val_dataloader,
                              validation_steps=num_val // Batch_size,
                              epochs=Epoch,
                              verbose=1,
                              initial_epoch=Init_Epoch,
                              callbacks=[logging, checkpoint, loss_history])
        return histogram