Esempio n. 1
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    channels_map = {
        'efficientnet-b0': [40, 80, 192],
        'efficientnet-b1': [40, 80, 192],
        'efficientnet-b2': [48, 88, 208],
        'efficientnet-b3': [48, 96, 232],
        'efficientnet-b4': [56, 112, 272],
        'efficientnet-b5': [64, 128, 304],
        'efficientnet-b6': [72, 144, 344],
        'efficientnet-b7': [80, 160, 384],
        'efficientnet-b8': [80, 160, 384]
    }

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)

    if opt.resume:
        resume_path = os.path.join(opt.saved_path,
                                   'signatrix_efficientdet_coco_latest.pth')
        model = torch.load(resume_path).module
        print("model loaded from {}".format(resume_path))
    else:
        model = EfficientDet(
            num_classes=training_set.num_classes(),
            network=opt.backbone_network,
            remote_loading=opt.remote_loading,
            advprop=opt.advprop,
            conv_in_channels=channels_map[opt.backbone_network])
        print("model created with backbone {}, advprop {}".format(
            opt.backbone_network, opt.advprop))

    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    if opt.resume:
        m = round(opt.start_epoch / 100)
        opt.lr = opt.lr * (0.1**m)
    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)

    start_epoch = 0
    if opt.resume:
        start_epoch = opt.start_epoch
    for epoch in range(start_epoch, opt.num_epochs):
        model.train()
        # if torch.cuda.is_available():
        #     model.module.freeze_bn()
        # else:
        #     model.freeze_bn()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    '{} Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                    .format(datetime.now(), epoch + 1, opt.num_epochs,
                            iter + 1, num_iter_per_epoch, cls_loss, reg_loss,
                            loss, total_loss))
                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classfication_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                '{} Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                .format(datetime.now(), epoch + 1, opt.num_epochs, cls_loss,
                        reg_loss, np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model,
                    os.path.join(
                        opt.saved_path,
                        "signatrix_efficientdet_coco_best_epoch{}.pth".format(
                            epoch)))
                ''' 
                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(memory_efficient=False)
                    
                    torch.onnx.export(model.module, dummy_input,
                                      os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"),
                                      verbose=False)
                    
                    model.module.backbone_net.model.set_swish(memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)
                    
                    torch.onnx.export(model, dummy_input,
                                      os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"),
                                      verbose=False)
                    
                    model.backbone_net.model.set_swish(memory_efficient=True)
                '''
            print("epoch:", epoch, "best_epoch:", best_epoch,
                  "epoch - best_epoch=", epoch - best_epoch)
            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
        if epoch % opt.save_interval == 0:
            torch.save(
                model,
                os.path.join(opt.saved_path,
                             "signatrix_efficientdet_coco_latest.pth"))
    writer.close()
Esempio n. 2
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    model = EfficientDet(num_classes=training_set.num_classes())

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        model.train()
        # if torch.cuda.is_available():
        #     model.module.freeze_bn()
        # else:
        #     model.freeze_bn()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                    .format(epoch + 1, opt.num_epochs, iter + 1,
                            num_iter_per_epoch, cls_loss, reg_loss, loss,
                            total_loss))
                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classfication_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                        np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model,
                    os.path.join(opt.saved_path,
                                 "signatrix_efficientdet_coco.pth"))

                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(
                        memory_efficient=False)

                    torch.onnx.export(model.module,
                                      dummy_input,
                                      os.path.join(
                                          opt.saved_path,
                                          "signatrix_efficientdet_coco.onnx"),
                                      verbose=False,
                                      opset_version=11)
                    model.module.backbone_net.model.set_swish(
                        memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model,
                                      dummy_input,
                                      os.path.join(
                                          opt.saved_path,
                                          "signatrix_efficientdet_coco.onnx"),
                                      verbose=False,
                                      opset_version=11)
                    model.backbone_net.model.set_swish(memory_efficient=True)

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
    writer.close()
Esempio n. 3
0
def train(opt):
    if not os.path.isdir(opt.data_path):
        print(f"Data for dataset not found at {opt.data_path}")
        return

    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = OpenImagesDataset(
        root_dir=opt.data_path,
        set_name="train",
        transform=transforms.Compose([Normalizer(),
                                      Augmenter(),
                                      Resizer()]))
    training_loader = DataLoader(training_set, **training_params)

    test_set = OpenImagesDataset(root_dir=opt.data_path,
                                 set_name="val",
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    test_loader = DataLoader(test_set, **test_params)

    model = EfficientDet(num_classes=training_set.num_classes())

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_loader)
    for epoch in range(opt.num_epochs):
        model.train()
        epoch_loss = []
        progress_bar = tqdm(training_loader)

        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    f'Epoch: {epoch + 1}/{opt.num_epochs} | '
                    f'Iteration: {iter + 1}/{num_iter_per_epoch} | '
                    f'Cls loss: {cls_loss:.5f} | Reg loss: {reg_loss:.5f} | '
                    f'Batch loss: {loss:.5f} | Total loss: {total_loss:.5f}')

                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classification_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_loader):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                f'Epoch: {epoch + 1}/{opt.num_epochs} | '
                f'Classification loss: {cls_loss:1.5f} | '
                f'Regression loss: {reg_loss:1.5f} | Total loss: {np.mean(loss):1.5f}'
            )

            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classification_loss (focal loss)',
                              cls_loss, epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model, os.path.join(opt.saved_path,
                                        f'{opt.model_name}.pth'))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    f"Stop training at epoch {epoch}. The lowest loss achieved is {loss}"
                )
                break

    torch.save(model,
               os.path.join(opt.saved_path, f'{opt.model_name}-final.pth'))
    writer.flush()
    writer.close()