def Val_Dataset(self, root_dir, coco_dir, img_dir, set_dir):
        self.system_dict["dataset"]["val"]["status"] = True
        self.system_dict["dataset"]["val"]["root_dir"] = root_dir
        self.system_dict["dataset"]["val"]["coco_dir"] = coco_dir
        self.system_dict["dataset"]["val"]["img_dir"] = img_dir
        self.system_dict["dataset"]["val"]["set_dir"] = set_dir

        self.system_dict["local"]["val_params"] = {
            "batch_size": self.system_dict["params"]["batch_size"],
            "shuffle": False,
            "drop_last": False,
            "collate_fn": collater,
            "num_workers": self.system_dict["params"]["num_workers"]
        }

        self.system_dict["local"]["val_set"] = CocoDataset(
            root_dir=self.system_dict["dataset"]["val"]["root_dir"] + "/" +
            self.system_dict["dataset"]["val"]["coco_dir"],
            img_dir=self.system_dict["dataset"]["val"]["img_dir"],
            set_dir=self.system_dict["dataset"]["val"]["set_dir"],
            transform=transforms.Compose([Normalizer(),
                                          Resizer()]))

        self.system_dict["local"]["test_generator"] = DataLoader(
            self.system_dict["local"]["val_set"],
            **self.system_dict["local"]["val_params"])
    def Train_Dataset(self,
                      root_dir,
                      coco_dir,
                      img_dir,
                      set_dir,
                      batch_size=8,
                      image_size=512,
                      use_gpu=True,
                      num_workers=3):
        self.system_dict["dataset"]["train"]["root_dir"] = root_dir
        self.system_dict["dataset"]["train"]["coco_dir"] = coco_dir
        self.system_dict["dataset"]["train"]["img_dir"] = img_dir
        self.system_dict["dataset"]["train"]["set_dir"] = set_dir

        self.system_dict["params"]["batch_size"] = batch_size
        self.system_dict["params"]["image_size"] = image_size
        self.system_dict["params"]["use_gpu"] = use_gpu
        self.system_dict["params"]["num_workers"] = num_workers

        if (self.system_dict["params"]["use_gpu"]):
            if torch.cuda.is_available():
                self.system_dict["local"][
                    "num_gpus"] = torch.cuda.device_count()
                torch.cuda.manual_seed(123)
            else:
                torch.manual_seed(123)

        self.system_dict["local"]["training_params"] = {
            "batch_size":
            self.system_dict["params"]["batch_size"] *
            self.system_dict["local"]["num_gpus"],
            "shuffle":
            True,
            "drop_last":
            True,
            "collate_fn":
            collater,
            "num_workers":
            self.system_dict["params"]["num_workers"]
        }

        self.system_dict["local"]["training_set"] = CocoDataset(
            root_dir=self.system_dict["dataset"]["train"]["root_dir"] + "/" +
            self.system_dict["dataset"]["train"]["coco_dir"],
            img_dir=self.system_dict["dataset"]["train"]["img_dir"],
            set_dir=self.system_dict["dataset"]["train"]["set_dir"],
            transform=transforms.Compose(
                [Normalizer(), Augmenter(),
                 Resizer()]))

        self.system_dict["local"]["training_generator"] = DataLoader(
            self.system_dict["local"]["training_set"],
            **self.system_dict["local"]["training_params"])
예제 #3
0
def train():
    if config.num_gpus == 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
    else:
        torch.manual_seed(42)

    config.saved_path = config.saved_path + '/{0}/'.format(config.dataset_name)
    config.log_path = config.log_path + '/{0}/'.format(config.dataset_name)
    os.makedirs(config.log_path, exist_ok=True)
    os.makedirs(config.saved_path, exist_ok=True)

    training_params = {
        'batch_size': config.batch_size,
        'shuffle': True,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': config.num_workers
    }

    val_params = {
        'batch_size': config.batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': config.num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]
    if ("coco" in config.dataset_name):
        DS = CocoDataset
    else:
        DS = PascalVocDataset
    training_set = DS(root_dir=os.path.join(config.data_path,
                                            config.dataset_name),
                      set=config.train_set,
                      img_size=input_sizes[config.compound_coef],
                      anchor_free_mode=config.anchor_free_mode,
                      transform=transforms.Compose([
                          Normalizer(mean=config.mean, std=config.std),
                          Augmenter(),
                          Resizer(input_sizes[config.compound_coef])
                      ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = DS(root_dir=os.path.join(config.data_path, config.dataset_name),
                 set=config.val_set,
                 img_size=input_sizes[config.compound_coef],
                 anchor_free_mode=config.anchor_free_mode,
                 transform=transforms.Compose([
                     Normalizer(mean=config.mean, std=config.std),
                     Resizer(input_sizes[config.compound_coef])
                 ]))
    val_generator = DataLoader(val_set, **val_params)

    model = EfficientDetBackbone(num_classes=len(config.obj_list),
                                 compound_coef=config.compound_coef,
                                 load_weights=False,
                                 anchor_free_mode=config.anchor_free_mode,
                                 ratios=eval(config.anchors_ratios),
                                 scales=eval(config.anchors_scales))

    init_weights(model)
    last_step = 0
    # load last weights
    if config.load_weights:
        # 首先使用init_weights来初始化网络参数,然后再restore,
        # 使得网络中未restore的参数可以正常初始化

        if config.pret_weight_path.endswith('.pth'):
            weights_path = config.pret_weight_path
        try:
            model_dict = torch.load(weights_path)
            new_dict = {}
            for k, v in model_dict.items():
                if 'header' not in k:
                    new_dict[k] = v
            ret = model.load_state_dict(new_dict, strict=False)
        except RuntimeError as e:
            print('[Warning] Ignoring {0}'.format(e))
            print(
                '[Warning] Don\'t panic if you see this, this might be because you load a pretrained weights with different number of classes. The rest of the weights should be loaded already.'
            )

        print('[Info] loaded pretrained weights: {0},'.format(weights_path))

    if config.head_only:

        def freeze_backbone(m):
            classname = m.__class__.__name__
            for ntl in ['EfficientNet']:
                if ntl in classname:
                    for param in m.parameters():
                        param.requires_grad = False

        model.apply(freeze_backbone)
        print('[Info] freezed backbone')

    # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
    # apply sync_bn when using multiple gpu and batch_size per gpu is lower than 4
    #  useful when gpu memory is limited.
    # because when bn is disable, the training will be very unstable or slow to converge,
    # apply sync_bn can solve it,
    # by packing all mini-batch across all gpus as one batch and normalize, then send it back to all gpus.
    # but it would also slow down the training by a little bit.
    if config.num_gpus > 1 and config.batch_size // config.num_gpus < 4:
        model.apply(replace_w_sync_bn)
        use_sync_bn = True
    else:
        use_sync_bn = False

    writer = SummaryWriter(
        config.log_path +
        '/{0}/'.format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")))

    # warp the model with loss function, to reduce the memory usage on gpu0 and speedup
    model = ModelWithLoss(model, debug=config.debug)

    if config.num_gpus > 0:
        model = model.cuda()
        if config.num_gpus > 1:
            model = CustomDataParallel(model, config.num_gpus)
            if use_sync_bn:
                patch_replication_callback(model)

    if config.optim == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), config.lr)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    config.lr,
                                    momentum=0.9,
                                    nesterov=True)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=config.patience,
        verbose=True,
        factor=config.factor,
        min_lr=config.min_lr)

    epoch = 0
    step = max(0, last_step)
    model.train()

    num_iter_per_epoch = len(training_generator)

    try:
        for epoch in range(config.num_epochs):
            last_epoch = step // num_iter_per_epoch
            if epoch < last_epoch:
                continue

            epoch_loss = []
            for iter, data in enumerate(training_generator):
                try:
                    imgs = data['img']
                    annot = data['annot']

                    if config.num_gpus == 1:
                        # if only one gpu, just send it to cuda:0
                        # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
                        imgs = imgs.cuda()
                        annot = annot.cuda()

                    optimizer.zero_grad()
                    cls_loss, reg_loss = model(imgs,
                                               annot,
                                               obj_list=config.obj_list)
                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss = cls_loss + reg_loss
                    if loss == 0 or not torch.isfinite(loss):
                        continue

                    loss.backward()
                    # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                    optimizer.step()

                    epoch_loss.append(float(loss))

                    print(
                        'Step: {}. Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Total loss: {:.5f}'
                        .format(step, epoch, config.num_epochs, iter + 1,
                                num_iter_per_epoch, cls_loss.item(),
                                reg_loss.item(), loss.item()))
                    writer.add_scalars('Loss', {'train': loss}, step)
                    writer.add_scalars('Regression_loss', {'train': reg_loss},
                                       step)
                    writer.add_scalars('Classfication_loss',
                                       {'train': cls_loss}, step)

                    # log learning_rate
                    current_lr = optimizer.param_groups[0]['lr']
                    writer.add_scalar('learning_rate', current_lr, step)

                    step += 1

                except Exception as e:
                    print('[Error]', traceback.format_exc())
                    print(e)
                    continue
            scheduler.step(np.mean(epoch_loss))

            if epoch % config.val_interval == 0 and epoch > config.start_interval:

                model.eval()
                loss_regression_ls = []
                loss_classification_ls = []
                for iter, data in enumerate(val_generator):
                    with torch.no_grad():
                        imgs = data['img']
                        annot = data['annot']

                        if config.num_gpus == 1:
                            imgs = imgs.cuda()
                            annot = annot.cuda()

                        cls_loss, reg_loss = model(imgs,
                                                   annot,
                                                   obj_list=config.obj_list)
                        cls_loss = cls_loss.mean()
                        reg_loss = reg_loss.mean()

                        loss = cls_loss + reg_loss
                        if loss == 0 or not torch.isfinite(loss):
                            continue

                        loss_classification_ls.append(cls_loss.item())
                        loss_regression_ls.append(reg_loss.item())

                cls_loss = np.mean(loss_classification_ls)
                reg_loss = np.mean(loss_regression_ls)
                loss = cls_loss + reg_loss

                print(
                    'Val. Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                    .format(epoch, config.num_epochs, cls_loss, reg_loss,
                            loss))
                writer.add_scalars('Loss', {'val': loss}, step)
                writer.add_scalars('Regression_loss', {'val': reg_loss}, step)
                writer.add_scalars('Classfication_loss', {'val': cls_loss},
                                   step)

                save_checkpoint(
                    model, 'efficientdet-d{0}_{1}_{2}.pth'.format(
                        config.compound_coef, epoch, step))

                model.train()

    except KeyboardInterrupt:
        save_checkpoint(
            model,
            'efficientdet-d{0}_{1}_{2}.pth'.format(config.compound_coef, epoch,
                                                   step))
        writer.close()
    writer.close()
예제 #4
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
    else:
        raise Exception('no GPU')

    cudnn.benchmark = True

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": opt.worker
    }

    training_set = TrainDataset(root_dir=opt.data_path,
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    opt.num_classes = training_set.num_classes

    model = EfficientDet(opt)
    if opt.resume:
        print('Loading model...')
        model.load_state_dict(
            torch.load(os.path.join(opt.saved_path, opt.network + '.pth')))

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    model = model.cuda()
    model = nn.DataParallel(model)

    optimizer = torch.optim.AdamW(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = np.inf
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        print('Epoch: {}/{}:'.format(epoch + 1, opt.num_epochs))
        model.train()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            optimizer.zero_grad()
            cls_loss, reg_loss = model(
                [data['img'].cuda().float(), data['annot'].cuda()])
            cls_loss = cls_loss.mean()
            reg_loss = reg_loss.mean()
            loss = cls_loss + reg_loss
            if loss == 0:
                continue
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            epoch_loss.append(float(loss))
            total_loss = np.mean(epoch_loss)

            progress_bar.set_description(
                'Epoch: {}/{}. Iteration: {}/{}'.format(
                    epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch))

            progress_bar.write(
                'Cls loss: {:.5f}\tReg loss: {:.5f}\tBatch loss: {:.5f}\tTotal loss: {:.5f}'
                .format(cls_loss, reg_loss, loss, total_loss))

        loss = np.mean(epoch_loss)
        scheduler.step(loss)

        if loss + opt.es_min_delta < best_loss:
            print('Saving model...')
            best_loss = loss
            torch.save(model.module.state_dict(),
                       os.path.join(opt.saved_path, opt.network + '.pth'))
예제 #5
0
    }
    val_params = {
        'batch_size': batch_size,
        'shuffle': False,
        'drop_last': True,
        'collate_fn': collater,
        'num_workers': num_workers
    }

    input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536]

    training_set = CocoDataset(root_dir=os.path.join(data_path,
                                                     params.project_name),
                               set=params.train_set,
                               transform=transforms.Compose([
                                   Normalizer(mean=params.mean,
                                              std=params.std),
                                   Augmenter(),
                                   Resizer(input_sizes[compound_coef])
                               ]))
    training_generator = DataLoader(training_set, **training_params)

    val_set = CocoDataset(root_dir=os.path.join(data_path,
                                                params.project_name),
                          set=params.val_set,
                          transform=transforms.Compose([
                              Normalizer(mean=params.mean, std=params.std),
                              Resizer(input_sizes[compound_coef])
                          ]))
    val_generator = DataLoader(val_set, **val_params)

    print("OK")
예제 #6
0
import cv2
import numpy as np
import os
#测试图片读取的数据管道
print(os.getcwd())
# os.chdir(os.path.join('..','REMOTE','datasets','FLIR_pt_efficientDet'))
# category={0:'person',1:'bicycle',2:'car'}

root_dir = ''
set_name = 'train2017'
mspath='mean_std.txt'
batch_size = 1
#先求一下均值和方差
training_set = FlirDataset(root_dir, set_name, mean_std_path=mspath
, cal_mean_std=True, transform=transforms.Compose([
    Normalizer(mspath),Augmenter(),Resizer(1280)]))
# a = training_set[0]  #取一个样本看看
# cv2.imshow('image', a['img'].numpy())
# cv2.waitKey(0)
# cv2.destroyAllWindows()
training_params = {'batch_size': batch_size,
                    'shuffle': True,
                    'drop_last': True,
                    'collate_fn': collater,
                    'num_workers': 0}
training_generator = DataLoader(training_set, **training_params)
progress_bar=tqdm(training_generator)
for iter, data in enumerate(progress_bar):
    imgs = data['img']
    annot = data['annot']
    print(imgs.shape,annot.shape)
예제 #7
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {"batch_size": opt.batch_size * num_gpus,
                       "shuffle": True,
                       "drop_last": True,
                       "collate_fn": collate_fn,
                       "num_workers": 12}

    test_params = {"batch_size": opt.batch_size,
                   "shuffle": False,
                   "drop_last": False,
                   "collate_fn": collate_fn,
                   "num_workers": 12}

    training_set = WaymoDataset(
        cameras=[opt.cam_view],scope='training',
        transform=transforms.Compose([Normalizer(), Resizer()]),
        mod='fast_rcnn')
    training_generator = DataLoader(training_set, **training_params)

    test_set = WaymoDataset(
        cameras=[opt.cam_view], scope='validation',
        transform=transforms.Compose([Normalizer(), Resizer()]),
        mod='fast_rcnn')
    test_generator = DataLoader(test_set, **test_params)
    
    
    print(f'Using pretrained model? {opt.pretrained_model}')
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=opt.pretrained_model)
    # num_classes which is user-defined
    num_classes = training_set.num_classes()
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one, this will really need to be trained!
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    model.load_state_dict(torch.load('trained_models/fasterrcnn_resnet50_waymo.pth'))
    
    # only if we use the pretrained model
    if opt.pretrained_model:
        transfer_learning(model, opt.freeze_layers)

    # Chosing the device/cpu or gpu
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)


    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    # writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        model.train()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            optimizer.zero_grad()
            images = data[0]
            targets = data[1]
            images = list(image for image in images)
            targets = [{k: v for k, v in t.items()} for t in targets]

            if torch.cuda.is_available():
                losses = model(images.cuda(), targets.cuda())
                cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']
            else:
                losses = model(images, targets)
                cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']

            cls_loss = cls_loss.mean()
            reg_loss = reg_loss.mean()
            loss = cls_loss + reg_loss
            
            if loss == 0:
                continue
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            epoch_loss.append(float(loss))
            total_loss = np.mean(epoch_loss)
            if iter % 5 == 0:
                print(f'Total loss at iteration {iter}: {total_loss}')
            progress_bar.set_description(
                'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'.format(
                    epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss,
                    total_loss))
            # writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter)
            # writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter)
            # writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter)
            # Save every 100 samples
            if iter % 200 ==0:
                print(f"Saving model at :{opt.saved_path}/fasterrcnn_resnet50_waymo.pth")
                torch.save(model.state_dict(), os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))
#                 torch.save(model, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))

        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    images = data[0]
                    targets = data[1]
                    images = list(image for image in images)
                    targets = [{k: v for k, v in t.items()} for t in targets]

                    if torch.cuda.is_available():
                        losses = model(images.cuda(), targets.cuda())
                        cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']
                    else:
                        losses = model(images, targets)
                        cls_loss, reg_loss = losses['loss_classifier'], losses['loss_box_reg']

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'.format(
                    epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                    np.mean(loss)))
            # writer.add_scalar('Test/Total_loss', loss, epoch)
            # writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            # writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(model.state_dict(), os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))
#                 torch.save(model, os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.pth"))

                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model.module, dummy_input,
                                      os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.onnx"),
                                      verbose=False)
                    model.module.backbone_net.model.set_swish(memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model, dummy_input,
                                      os.path.join(opt.saved_path, "fasterrcnn_resnet50_waymo.onnx"),
                                      verbose=False)
                    model.backbone_net.model.set_swish(memory_efficient=True)

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print("Stop training at epoch {}. The lowest loss achieved is {}".format(epoch, loss))
                break