def Model(self,gpu_devices=[0]):
        '''
        User function: Set Model parameters

        Args:
            gpu_devices (list): List of GPU Device IDs to be used in training

        Returns:
            None
        '''
        num_classes = self.system_dict["local"]["training_set"].num_classes();
        efficientdet = EfficientDet(num_classes=num_classes)

        if self.system_dict["params"]["use_gpu"]:
            self.system_dict["params"]["gpu_devices"] = gpu_devices
            if len(self.system_dict["params"]["gpu_devices"])==1:
                os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0])
            else:
                os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]])
            self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
            efficientdet = efficientdet.to(self.system_dict["local"]["device"])
            efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"])

        self.system_dict["local"]["model"] = efficientdet;
        self.system_dict["local"]["model"].train();
    def Model(self,gpu_devices=[0]):
        num_classes = self.system_dict["local"]["training_set"].num_classes();
        efficientdet = EfficientDet(num_classes=num_classes)

        if self.system_dict["params"]["use_gpu"]:
            self.system_dict["params"]["gpu_devices"] = gpu_devices
            if len(self.system_dict["params"]["gpu_devices"])==1:
                os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0])
            else:
                os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]])
            self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
            efficientdet = efficientdet.to(self.system_dict["local"]["device"])
            efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"])

        self.system_dict["local"]["model"] = efficientdet;
        self.system_dict["local"]["model"].train();
		def Model(self, model_name="efficientnet-b0", gpu_devices=[0], load_pretrained_model_from=None):
				'''
				User function: Set Model parameters

				Args:
						gpu_devices (list): List of GPU Device IDs to be used in training

				Returns:
						None
				'''
				if(not load_pretrained_model_from):
						num_classes = self.system_dict["local"]["training_set"].num_classes();
						coeff = int(model_name[-1])
						efficientdet = EfficientDet(num_classes=num_classes, compound_coef=coeff, model_name=model_name);

						if self.system_dict["params"]["use_gpu"]:
								self.system_dict["params"]["gpu_devices"] = gpu_devices
								if len(self.system_dict["params"]["gpu_devices"])==1:
										os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0])
								else:
										os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]])
								self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
								efficientdet = efficientdet.to(self.system_dict["local"]["device"])
								efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"])

						self.system_dict["local"]["model"] = efficientdet;
						self.system_dict["local"]["model"].train();
				else:
						efficientdet = torch.load(load_pretrained_model_from).module
						if self.system_dict["params"]["use_gpu"]:
								self.system_dict["params"]["gpu_devices"] = gpu_devices
								if len(self.system_dict["params"]["gpu_devices"])==1:
										os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0])
								else:
										os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]])
								self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu'
								efficientdet = efficientdet.to(self.system_dict["local"]["device"])
								efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"])
						
						self.system_dict["local"]["model"] = efficientdet;
						self.system_dict["local"]["model"].train();
Esempio n. 4
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    model = EfficientDet(num_classes=training_set.num_classes())

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)
    for epoch in range(opt.num_epochs):
        model.train()
        # if torch.cuda.is_available():
        #     model.module.freeze_bn()
        # else:
        #     model.freeze_bn()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                    .format(epoch + 1, opt.num_epochs, iter + 1,
                            num_iter_per_epoch, cls_loss, reg_loss, loss,
                            total_loss))
                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classfication_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss,
                        np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model,
                    os.path.join(opt.saved_path,
                                 "signatrix_efficientdet_coco.pth"))

                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(
                        memory_efficient=False)

                    torch.onnx.export(model.module,
                                      dummy_input,
                                      os.path.join(
                                          opt.saved_path,
                                          "signatrix_efficientdet_coco.onnx"),
                                      verbose=False,
                                      opset_version=11)
                    model.module.backbone_net.model.set_swish(
                        memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)

                    torch.onnx.export(model,
                                      dummy_input,
                                      os.path.join(
                                          opt.saved_path,
                                          "signatrix_efficientdet_coco.onnx"),
                                      verbose=False,
                                      opset_version=11)
                    model.backbone_net.model.set_swish(memory_efficient=True)

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
    writer.close()
Esempio n. 5
0
def train(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = CocoDataset(root_dir=opt.data_path,
                               set="train2017",
                               transform=transforms.Compose(
                                   [Normalizer(),
                                    Augmenter(),
                                    Resizer()]))
    training_generator = DataLoader(training_set, **training_params)

    test_set = CocoDataset(root_dir=opt.data_path,
                           set="val2017",
                           transform=transforms.Compose(
                               [Normalizer(), Resizer()]))
    test_generator = DataLoader(test_set, **test_params)

    channels_map = {
        'efficientnet-b0': [40, 80, 192],
        'efficientnet-b1': [40, 80, 192],
        'efficientnet-b2': [48, 88, 208],
        'efficientnet-b3': [48, 96, 232],
        'efficientnet-b4': [56, 112, 272],
        'efficientnet-b5': [64, 128, 304],
        'efficientnet-b6': [72, 144, 344],
        'efficientnet-b7': [80, 160, 384],
        'efficientnet-b8': [80, 160, 384]
    }

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)

    if opt.resume:
        resume_path = os.path.join(opt.saved_path,
                                   'signatrix_efficientdet_coco_latest.pth')
        model = torch.load(resume_path).module
        print("model loaded from {}".format(resume_path))
    else:
        model = EfficientDet(
            num_classes=training_set.num_classes(),
            network=opt.backbone_network,
            remote_loading=opt.remote_loading,
            advprop=opt.advprop,
            conv_in_channels=channels_map[opt.backbone_network])
        print("model created with backbone {}, advprop {}".format(
            opt.backbone_network, opt.advprop))

    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    if opt.resume:
        m = round(opt.start_epoch / 100)
        opt.lr = opt.lr * (0.1**m)
    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_generator)

    start_epoch = 0
    if opt.resume:
        start_epoch = opt.start_epoch
    for epoch in range(start_epoch, opt.num_epochs):
        model.train()
        # if torch.cuda.is_available():
        #     model.module.freeze_bn()
        # else:
        #     model.freeze_bn()
        epoch_loss = []
        progress_bar = tqdm(training_generator)
        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    '{} Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}'
                    .format(datetime.now(), epoch + 1, opt.num_epochs,
                            iter + 1, num_iter_per_epoch, cls_loss, reg_loss,
                            loss, total_loss))
                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classfication_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_generator):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                '{} Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}'
                .format(datetime.now(), epoch + 1, opt.num_epochs, cls_loss,
                        reg_loss, np.mean(loss)))
            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss,
                              epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model,
                    os.path.join(
                        opt.saved_path,
                        "signatrix_efficientdet_coco_best_epoch{}.pth".format(
                            epoch)))
                ''' 
                dummy_input = torch.rand(opt.batch_size, 3, 512, 512)
                if torch.cuda.is_available():
                    dummy_input = dummy_input.cuda()
                if isinstance(model, nn.DataParallel):
                    model.module.backbone_net.model.set_swish(memory_efficient=False)
                    
                    torch.onnx.export(model.module, dummy_input,
                                      os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"),
                                      verbose=False)
                    
                    model.module.backbone_net.model.set_swish(memory_efficient=True)
                else:
                    model.backbone_net.model.set_swish(memory_efficient=False)
                    
                    torch.onnx.export(model, dummy_input,
                                      os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"),
                                      verbose=False)
                    
                    model.backbone_net.model.set_swish(memory_efficient=True)
                '''
            print("epoch:", epoch, "best_epoch:", best_epoch,
                  "epoch - best_epoch=", epoch - best_epoch)
            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    "Stop training at epoch {}. The lowest loss achieved is {}"
                    .format(epoch, loss))
                break
        if epoch % opt.save_interval == 0:
            torch.save(
                model,
                os.path.join(opt.saved_path,
                             "signatrix_efficientdet_coco_latest.pth"))
    writer.close()
Esempio n. 6
0
def train(opt):
    if not os.path.isdir(opt.data_path):
        print(f"Data for dataset not found at {opt.data_path}")
        return

    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "collate_fn": collater,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size,
        "shuffle": False,
        "drop_last": False,
        "collate_fn": collater,
        "num_workers": 12
    }

    training_set = OpenImagesDataset(
        root_dir=opt.data_path,
        set_name="train",
        transform=transforms.Compose([Normalizer(),
                                      Augmenter(),
                                      Resizer()]))
    training_loader = DataLoader(training_set, **training_params)

    test_set = OpenImagesDataset(root_dir=opt.data_path,
                                 set_name="val",
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]))
    test_loader = DataLoader(test_set, **test_params)

    model = EfficientDet(num_classes=training_set.num_classes())

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    if torch.cuda.is_available():
        model = model.cuda()
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), opt.lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    best_loss = 1e5
    best_epoch = 0
    model.train()

    num_iter_per_epoch = len(training_loader)
    for epoch in range(opt.num_epochs):
        model.train()
        epoch_loss = []
        progress_bar = tqdm(training_loader)

        for iter, data in enumerate(progress_bar):
            try:
                optimizer.zero_grad()
                if torch.cuda.is_available():
                    cls_loss, reg_loss = model(
                        [data['img'].cuda().float(), data['annot'].cuda()])
                else:
                    cls_loss, reg_loss = model(
                        [data['img'].float(), data['annot']])

                cls_loss = cls_loss.mean()
                reg_loss = reg_loss.mean()
                loss = cls_loss + reg_loss
                if loss == 0:
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))
                total_loss = np.mean(epoch_loss)

                progress_bar.set_description(
                    f'Epoch: {epoch + 1}/{opt.num_epochs} | '
                    f'Iteration: {iter + 1}/{num_iter_per_epoch} | '
                    f'Cls loss: {cls_loss:.5f} | Reg loss: {reg_loss:.5f} | '
                    f'Batch loss: {loss:.5f} | Total loss: {total_loss:.5f}')

                writer.add_scalar('Train/Total_loss', total_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Regression_loss', reg_loss,
                                  epoch * num_iter_per_epoch + iter)
                writer.add_scalar('Train/Classification_loss (focal loss)',
                                  cls_loss, epoch * num_iter_per_epoch + iter)

            except Exception as e:
                print(e)
                continue
        scheduler.step(np.mean(epoch_loss))

        if epoch % opt.test_interval == 0:
            model.eval()
            loss_regression_ls = []
            loss_classification_ls = []
            for iter, data in enumerate(test_loader):
                with torch.no_grad():
                    if torch.cuda.is_available():
                        cls_loss, reg_loss = model(
                            [data['img'].cuda().float(), data['annot'].cuda()])
                    else:
                        cls_loss, reg_loss = model(
                            [data['img'].float(), data['annot']])

                    cls_loss = cls_loss.mean()
                    reg_loss = reg_loss.mean()

                    loss_classification_ls.append(float(cls_loss))
                    loss_regression_ls.append(float(reg_loss))

            cls_loss = np.mean(loss_classification_ls)
            reg_loss = np.mean(loss_regression_ls)
            loss = cls_loss + reg_loss

            print(
                f'Epoch: {epoch + 1}/{opt.num_epochs} | '
                f'Classification loss: {cls_loss:1.5f} | '
                f'Regression loss: {reg_loss:1.5f} | Total loss: {np.mean(loss):1.5f}'
            )

            writer.add_scalar('Test/Total_loss', loss, epoch)
            writer.add_scalar('Test/Regression_loss', reg_loss, epoch)
            writer.add_scalar('Test/Classification_loss (focal loss)',
                              cls_loss, epoch)

            if loss + opt.es_min_delta < best_loss:
                best_loss = loss
                best_epoch = epoch
                torch.save(
                    model, os.path.join(opt.saved_path,
                                        f'{opt.model_name}.pth'))

            # Early stopping
            if epoch - best_epoch > opt.es_patience > 0:
                print(
                    f"Stop training at epoch {epoch}. The lowest loss achieved is {loss}"
                )
                break

    torch.save(model,
               os.path.join(opt.saved_path, f'{opt.model_name}-final.pth'))
    writer.flush()
    writer.close()