コード例 #1
0
def train():
    device = torch.device(conf.cuda if torch.cuda.is_available() else "cpu")
    dataset = Training_Dataset(conf.data_path_train, conf.gaussian_noise_param,
                               conf.crop_img_size)
    dataset_length = len(dataset)
    train_loader = DataLoader(dataset,
                              batch_size=4,
                              shuffle=True,
                              num_workers=4)
    model = UNet(in_channels=conf.img_channel, out_channels=conf.img_channel)
    criterion = nn.MSELoss()
    model = model.to(device)
    optim = Adam(model.parameters(),
                 lr=conf.learning_rate,
                 betas=(0.9, 0.999),
                 eps=1e-8,
                 weight_decay=0,
                 amsgrad=True)
    scheduler = lr_scheduler.StepLR(optim, step_size=100, gamma=0.5)
    model.train()
    print(model)
    print("Starting Training Loop...")
    since = time.time()
    for epoch in range(conf.max_epoch):
        print('Epoch {}/{}'.format(epoch, conf.max_epoch - 1))
        print('-' * 10)
        running_loss = 0.0
        scheduler.step()
        for batch_idx, (source, target) in enumerate(train_loader):

            source = source.to(device)
            target = target.to(device)
            optim.zero_grad()

            denoised_source = model(source)
            loss = criterion(denoised_source, target)
            loss.backward()
            optim.step()

            running_loss += loss.item() * source.size(0)
            print('Current loss {} and current batch idx {}'.format(
                loss.item(), batch_idx))
        epoch_loss = running_loss / dataset_length
        print('{} Loss: {:.4f}'.format('current ' + str(epoch), epoch_loss))
        if (epoch + 1) % conf.save_per_epoch == 0:
            save_model(model, epoch + 1)
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
コード例 #2
0
ファイル: training_SRRF.py プロジェクト: ahmedmishfaq/PySIM
        for i,p in enumerate(loss_all):
            for j,q in enumerate(p):
                table.write(i,j,q)
        file.save('/home/star/0_code_lhj/DL-SIM-github/Training_codes/UNet/loss_UNet_SRRF_microtubule.xls')

        lr = get_learning_rate(epoch)
        for p in optimizer.param_groups:
            p['lr'] = lr
            print("learning rate = {}".format(p['lr']))
            
        for batch_idx, items in enumerate(train_dataloader):
            
            image = items['image_in']
            gt = items['groundtruth']
            
            model.train()
            
            image = np.swapaxes(image, 1,3)
            image = np.swapaxes(image, 2,3)
            image = image.float()
            image = image.cuda(cuda)    
            
            gt = gt.squeeze()
            gt = gt.float()
            gt = gt.cuda(cuda)
            
            pred = model(image).squeeze()

            loss = (pred-gt).abs().mean() + 5 * ((pred-gt)**2).mean()
            
            optimizer.zero_grad()
コード例 #3
0
        'ssim': []
    }
    data, target = get_train_data(X_test, y_test, batch_size)
    test_data = torch.from_numpy(data.astype(int)).float()
    test_target = torch.from_numpy(target.astype(int)).float()

    for epoch in range(1, NUM_EPOCHS + 1):
        running_results = {
            'batch_sizes': 0,
            'd_loss': 0,
            'g_loss': 0,
            'd_score': 0,
            'g_score': 0
        }

        netG.train()
        netD.train()
        data, target = get_train_data(X_train, y_train, batch_size)
        data = torch.from_numpy(data.astype(int)).float()
        target = torch.from_numpy(target.astype(int)).float()
        g_update_first = True
        batch_size = data.size(0)
        running_results['batch_sizes'] += batch_size

        ############################
        # (1) Update D network: maximize D(x)-1-D(G(z))
        ###########################
        real_img = Variable(target)
        if torch.cuda.is_available():
            real_img = real_img.cuda()
        z = Variable(data)
コード例 #4
0
class Train(object):
    def __init__(self, configs):
        self.batch_size = configs.get("batch_size", "16")
        self.epochs = configs.get("epochs", "100")
        self.lr = configs.get("lr", "0.0001")

        device_args = configs.get("device", "cuda")
        self.device = torch.device(
            "cpu" if not torch.cuda.is_available() else device_args)

        self.workers = configs.get("workers", "4")

        self.vis_images = configs.get("vis_images", "200")
        self.vis_freq = configs.get("vis_freq", "10")

        self.weights = configs.get("weights", "./weights")
        if not os.path.exists(self.weights):
            os.mkdir(self.weights)

        self.logs = configs.get("logs", "./logs")
        if not os.path.exists(self.weights):
            os.mkdir(self.weights)

        self.images_path = configs.get("images_path", "./data")

        self.is_resize = config.get("is_resize", False)
        self.image_short_side = config.get("image_short_side", 256)

        self.is_padding = config.get("is_padding", False)

        is_multi_gpu = config.get("DateParallel", False)

        pre_train = config.get("pre_train", False)
        model_path = config.get("model_path", './weights/unet_idcard_adam.pth')

        # self.image_size = configs.get("image_size", "256")
        # self.aug_scale = configs.get("aug_scale", "0.05")
        # self.aug_angle = configs.get("aug_angle", "15")

        self.step = 0

        self.dsc_loss = DiceLoss()
        self.model = UNet(in_channels=Dataset.in_channels,
                          out_channels=Dataset.out_channels)
        if pre_train:
            self.model.load_state_dict(torch.load(model_path,
                                                  map_location=self.device),
                                       strict=False)

        if is_multi_gpu:
            self.model = nn.DataParallel(self.model)

        self.model.to(self.device)

        self.best_validation_dsc = 0.0

        self.loader_train, self.loader_valid = self.data_loaders()

        self.params = [p for p in self.model.parameters() if p.requires_grad]

        self.optimizer = optim.Adam(self.params,
                                    lr=self.lr,
                                    weight_decay=0.0005)
        # self.optimizer = torch.optim.SGD(self.params, lr=self.lr, momentum=0.9, weight_decay=0.0005)
        self.scheduler = lr_scheduler.LR_Scheduler_Head(
            'poly', self.lr, self.epochs, len(self.loader_train))

    def datasets(self):
        train_datasets = Dataset(
            images_dir=self.images_path,
            # image_size=self.image_size,
            subset="train",  # train
            transform=get_transforms(train=True),
            is_resize=self.is_resize,
            image_short_side=self.image_short_side,
            is_padding=self.is_padding)
        # valid_datasets = train_datasets

        valid_datasets = Dataset(
            images_dir=self.images_path,
            # image_size=self.image_size,
            subset="validation",  # validation
            transform=get_transforms(train=False),
            is_resize=self.is_resize,
            image_short_side=self.image_short_side,
            is_padding=False)
        return train_datasets, valid_datasets

    def data_loaders(self):
        dataset_train, dataset_valid = self.datasets()

        loader_train = DataLoader(
            dataset_train,
            batch_size=self.batch_size,
            shuffle=True,
            drop_last=True,
            num_workers=self.workers,
        )
        loader_valid = DataLoader(
            dataset_valid,
            batch_size=1,
            drop_last=False,
            num_workers=self.workers,
        )

        return loader_train, loader_valid

    @staticmethod
    def dsc_per_volume(validation_pred, validation_true):
        assert len(validation_pred) == len(validation_true)
        dsc_list = []
        for p in range(len(validation_pred)):
            y_pred = np.array([validation_pred[p]])
            y_true = np.array([validation_true[p]])
            dsc_list.append(dsc(y_pred, y_true))
        return dsc_list

    @staticmethod
    def get_logger(filename, verbosity=1, name=None):
        level_dict = {0: logging.DEBUG, 1: logging.INFO, 2: logging.WARNING}
        formatter = logging.Formatter(
            "[%(asctime)s][%(filename)s][line:%(lineno)d][%(levelname)s] %(message)s"
        )
        logger = logging.getLogger(name)
        logger.setLevel(level_dict[verbosity])

        fh = logging.FileHandler(filename, "w")
        fh.setFormatter(formatter)
        logger.addHandler(fh)

        sh = logging.StreamHandler()
        sh.setFormatter(formatter)
        logger.addHandler(sh)

        return logger

    def train_one_epoch(self, epoch):

        self.model.train()
        loss_train = []
        for i, data in enumerate(self.loader_train):
            self.scheduler(self.optimizer, i, epoch, self.best_validation_dsc)
            x, y_true = data
            x, y_true = x.to(self.device), y_true.to(self.device)

            y_pred = self.model(x)
            # print('1111', y_pred.size())
            # print('2222', y_true.size())
            loss = self.dsc_loss(y_pred, y_true)

            loss_train.append(loss.item())

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # lr_scheduler.step()
            if self.step % 200 == 0:
                print('Epoch:[{}/{}]\t iter:[{}]\t loss={:.5f}\t '.format(
                    epoch, self.epochs, i, loss))

            self.step += 1

    def eval_model(self, patience):
        self.model.eval()
        loss_valid = []

        validation_pred = []
        validation_true = []
        # early_stopping = EarlyStopping(patience=patience, verbose=True)

        for i, data in enumerate(self.loader_valid):
            x, y_true = data
            x, y_true = x.to(self.device), y_true.to(self.device)

            # print(x.size())
            # print(333,x[0][2])
            with torch.no_grad():
                y_pred = self.model(x)
                loss = self.dsc_loss(y_pred, y_true)

            # print(y_pred.shape)
            mask = y_pred > 0.5
            mask = mask * 255
            mask = mask.cpu().numpy()[0][0]
            # print(mask)
            # print(mask.shape())
            cv2.imwrite('result.png', mask)

            loss_valid.append(loss.item())

            y_pred_np = y_pred.detach().cpu().numpy()

            validation_pred.extend(
                [y_pred_np[s] for s in range(y_pred_np.shape[0])])
            y_true_np = y_true.detach().cpu().numpy()
            validation_true.extend(
                [y_true_np[s] for s in range(y_true_np.shape[0])])

        # early_stopping(loss_valid, self.model)
        # if early_stopping.early_stop:
        #     print('Early stopping')
        #     import sys
        #     sys.exit(1)
        mean_dsc = np.mean(
            self.dsc_per_volume(
                validation_pred,
                validation_true,
            ))
        # print('mean_dsc:', mean_dsc)
        if mean_dsc > self.best_validation_dsc:
            self.best_validation_dsc = mean_dsc
            torch.save(self.model.state_dict(),
                       os.path.join(self.weights, "unet_xia_adam.pth"))
            print("Best validation mean DSC: {:4f}".format(
                self.best_validation_dsc))

    def main(self):
        # print('train is begin.....')
        # print('load data end.....')

        # loaders = {"train": loader_train, "valid": loader_valid}

        for epoch in tqdm(range(self.epochs), total=self.epochs):
            self.train_one_epoch(epoch)
            self.eval_model(patience=10)

        torch.save(self.model.state_dict(),
                   os.path.join(self.weights, "unet_final.pth"))
コード例 #5
0
def train(cont=False):

    # for tensorboard tracking
    logger = get_logger()
    logger.info("(1) Initiating Training ... ")
    logger.info("Training on device: {}".format(device))
    writer = SummaryWriter()

    # init model
    aux_layers = None
    if net == "SETR-PUP":
        aux_layers, model = get_SETR_PUP()
    elif net == "SETR-MLA":
        aux_layers, model = get_SETR_MLA()
    elif net == "TransUNet-Base":
        model = get_TransUNet_base()
    elif net == "TransUNet-Large":
        model = get_TransUNet_large()
    elif net == "UNet":
        model = UNet(CLASS_NUM)

    # prepare dataset
    cluster_model = get_clustering_model(logger)
    train_dataset = CityscapeDataset(img_dir=data_dir,
                                     img_dim=IMG_DIM,
                                     mode="train",
                                     cluster_model=cluster_model)
    valid_dataset = CityscapeDataset(img_dir=data_dir,
                                     img_dim=IMG_DIM,
                                     mode="val",
                                     cluster_model=cluster_model)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False)

    logger.info("(2) Dataset Initiated. ")

    # optimizer
    epochs = epoch_num if epoch_num > 0 else iteration_num // len(
        train_loader) + 1
    optim = SGD(model.parameters(),
                lr=lrate,
                momentum=momentum,
                weight_decay=wdecay)
    # optim = Adam(model.parameters(), lr=lrate)
    scheduler = lr_scheduler.MultiStepLR(
        optim, milestones=[int(epochs * fine_tune_ratio)], gamma=0.1)

    cur_epoch = 0
    best_loss = float('inf')
    epochs_since_improvement = 0

    # for continue training
    if cont:
        model, optim, cur_epoch, best_loss = load_ckpt_continue_training(
            best_ckpt_src, model, optim, logger)
        logger.info("Current best loss: {0}".format(best_loss))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            for i in range(cur_epoch):
                scheduler.step()
    else:
        model = nn.DataParallel(model)
        model = model.to(device)

    logger.info("(3) Model Initiated ... ")
    logger.info("Training model: {}".format(net) + ". Training Started.")

    # loss
    ce_loss = CrossEntropyLoss()
    if use_dice_loss:
        dice_loss = DiceLoss(CLASS_NUM)

    # loop over epochs
    iter_count = 0
    epoch_bar = tqdm.tqdm(total=epochs,
                          desc="Epoch",
                          position=cur_epoch,
                          leave=True)
    logger.info("Total epochs: {0}. Starting from epoch {1}.".format(
        epochs, cur_epoch + 1))

    for e in range(epochs - cur_epoch):
        epoch = e + cur_epoch

        # Training.
        model.train()
        trainLossMeter = LossMeter()
        train_batch_bar = tqdm.tqdm(total=len(train_loader),
                                    desc="TrainBatch",
                                    position=0,
                                    leave=True)

        for batch_num, (orig_img, mask_img) in enumerate(train_loader):
            orig_img, mask_img = orig_img.float().to(
                device), mask_img.float().to(device)

            if net == "TransUNet-Base" or net == "TransUNet-Large":
                pred = model(orig_img)
            elif net == "SETR-PUP" or net == "SETR-MLA":
                if aux_layers is not None:
                    pred, _ = model(orig_img)
                else:
                    pred = model(orig_img)
            elif net == "UNet":
                pred = model(orig_img)

            loss_ce = ce_loss(pred, mask_img[:].long())
            if use_dice_loss:
                loss_dice = dice_loss(pred, mask_img, softmax=True)
                loss = 0.5 * (loss_ce + loss_dice)
            else:
                loss = loss_ce

            # Backward Propagation, Update weight and metrics
            optim.zero_grad()
            loss.backward()
            optim.step()

            # update learning rate
            for param_group in optim.param_groups:
                orig_lr = param_group['lr']
                param_group['lr'] = orig_lr * (1.0 -
                                               iter_count / iteration_num)**0.9
            iter_count += 1

            # Update loss
            trainLossMeter.update(loss.item())

            # print status
            if (batch_num + 1) % print_freq == 0:
                status = 'Epoch: [{0}][{1}/{2}]\t' \
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(train_loader), loss=trainLossMeter)
                logger.info(status)

            # log loss to tensorboard
            if (batch_num + 1) % tensorboard_freq == 0:
                writer.add_scalar(
                    'Train_Loss_{0}'.format(tensorboard_freq),
                    trainLossMeter.avg,
                    epoch * (len(train_loader) / tensorboard_freq) +
                    (batch_num + 1) / tensorboard_freq)
            train_batch_bar.update(1)

        writer.add_scalar('Train_Loss_epoch', trainLossMeter.avg, epoch)

        # Validation.
        model.eval()
        validLossMeter = LossMeter()
        valid_batch_bar = tqdm.tqdm(total=len(valid_loader),
                                    desc="ValidBatch",
                                    position=0,
                                    leave=True)
        with torch.no_grad():
            for batch_num, (orig_img, mask_img) in enumerate(valid_loader):
                orig_img, mask_img = orig_img.float().to(
                    device), mask_img.float().to(device)

                if net == "TransUNet-Base" or net == "TransUNet-Large":
                    pred = model(orig_img)
                elif net == "SETR-PUP" or net == "SETR-MLA":
                    if aux_layers is not None:
                        pred, _ = model(orig_img)
                    else:
                        pred = model(orig_img)
                elif net == "UNet":
                    pred = model(orig_img)

                loss_ce = ce_loss(pred, mask_img[:].long())
                if use_dice_loss:
                    loss_dice = dice_loss(pred, mask_img, softmax=True)
                    loss = 0.5 * (loss_ce + loss_dice)
                else:
                    loss = loss_ce

                # Update loss
                validLossMeter.update(loss.item())

            # print status
            if (batch_num + 1) % print_freq == 0:
                status = 'Validation: [{0}][{1}/{2}]\t' \
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(epoch+1, batch_num+1, len(valid_loader), loss=validLossMeter)
                logger.info(status)

            # log loss to tensorboard
            if (batch_num + 1) % tensorboard_freq == 0:
                writer.add_scalar(
                    'Valid_Loss_{0}'.format(tensorboard_freq),
                    validLossMeter.avg,
                    epoch * (len(valid_loader) / tensorboard_freq) +
                    (batch_num + 1) / tensorboard_freq)
            valid_batch_bar.update(1)

        valid_loss = validLossMeter.avg
        writer.add_scalar('Valid_Loss_epoch', valid_loss, epoch)
        logger.info("Validation Loss of epoch [{0}/{1}]: {2}\n".format(
            epoch + 1, epochs, valid_loss))

        # update optim scheduler
        scheduler.step()

        # save checkpoint
        is_best = valid_loss < best_loss
        best_loss_tmp = min(valid_loss, best_loss)
        if not is_best:
            epochs_since_improvement += 1
            logger.info("Epochs since last improvement: %d\n" %
                        (epochs_since_improvement, ))
            if epochs_since_improvement == early_stop_tolerance:
                break  # early stopping.
        else:
            epochs_since_improvement = 0
            state = {
                'epoch': epoch,
                'loss': best_loss_tmp,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optim.state_dict(),
            }
            torch.save(state, ckpt_src)
            logger.info("Checkpoint updated.")
            best_loss = best_loss_tmp
        epoch_bar.update(1)
    writer.close()
コード例 #6
0
class UNetObjPrior(nn.Module):
    """ 
    Wrapper around UNet that takes object priors (gaussians) and images 
    as input.
    """
    def __init__(self, params, depth=5):
        super(UNetObjPrior, self).__init__()
        self.in_channels = 4
        self.model = UNet(1, self.in_channels, depth, cuda=params['cuda'])
        self.params = params
        self.device = torch.device('cuda' if params['cuda'] else 'cpu')

    def forward(self, im, obj_prior):
        x = torch.cat((im, obj_prior), dim=1)
        return self.model(x)

    def train(self, dataloader_train, dataloader_val):

        since = time.time()
        best_loss = float("inf")

        dataloader_train.mode = 'train'
        dataloader_val.mode = 'val'
        dataloaders = {'train': dataloader_train, 'val': dataloader_val}

        optimizer = optim.SGD(self.model.parameters(),
                              momentum=self.params['momentum'],
                              lr=self.params['lr'],
                              weight_decay=self.params['weight_decay'])

        train_logger = LossLogger('train', self.params['batch_size'],
                                  len(dataloader_train),
                                  self.params['out_dir'])

        val_logger = LossLogger('val', self.params['batch_size'],
                                len(dataloader_val), self.params['out_dir'])

        loggers = {'train': train_logger, 'val': val_logger}

        # self.criterion = WeightedMSE(dataloader_train.get_classes_weights(),
        #                              cuda=self.params['cuda'])
        self.criterion = nn.MSELoss()

        for epoch in range(self.params['num_epochs']):
            print('Epoch {}/{}'.format(epoch, self.params['num_epochs'] - 1))
            print('-' * 10)

            # Each epoch has a training and validation phase
            for phase in ['train', 'val']:
                if phase == 'train':
                    #scheduler.step()
                    self.model.train()
                else:
                    self.model.eval()  # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0

                # Iterate over data.
                samp = 1
                for i, data in enumerate(dataloaders[phase]):
                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        out = self.forward(data.image, data.obj_prior)
                        loss = self.criterion(out, data.truth)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    loggers[phase].update(epoch, samp, loss.item())

                    samp += 1

                loggers[phase].print_epoch(epoch)

                # Generate train prediction for check
                if phase == 'train':
                    path = os.path.join(self.params['out_dir'], 'previews',
                                        'epoch_{:04d}.jpg'.format(epoch))
                    data = dataloaders['val'].sample_uniform()
                    pred = self.forward(data.image, data.obj_prior)
                    im_ = data.image[0]
                    truth_ = data.truth[0]
                    pred_ = pred[0, ...]
                    utls.save_tensors(im_, pred_, truth_, path)

                if phase == 'val' and (loggers['val'].get_loss(epoch) <
                                       best_loss):
                    best_loss = loggers['val'].get_loss(epoch)

                loggers[phase].save('log_{}.csv'.format(phase))

                # save checkpoint
                if phase == 'val':
                    is_best = loggers['val'].get_loss(epoch) <= best_loss
                    path = os.path.join(self.params['out_dir'],
                                        'checkpoint.pth.tar')
                    utls.save_checkpoint(
                        {
                            'epoch': epoch + 1,
                            'state_dict': self.model.state_dict(),
                            'best_loss': best_loss,
                            'optimizer': optimizer.state_dict()
                        },
                        is_best,
                        path=path)

    def load_checkpoint(self, path, device='gpu'):

        if (device != 'gpu'):
            checkpoint = torch.load(path,
                                    map_location=lambda storage, loc: storage)
        else:
            checkpoint = torch.load(path)

        self.model.load_state_dict(checkpoint['state_dict'])
コード例 #7
0
        return image, label

    def __len__(self):
        # get the size of data set
        return len(self.imgs_path)


if __name__ == "__main__":
    dataset = DataLoader("data/train10/")
    train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                               batch_size=1,
                                               shuffle=True)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    net = UNet(n_channels=1, n_classes=1)
    net.to(device=device)

    net.train()
    for image, label in train_loader:
        image = image.to(device=device, dtype=torch.float32)
        label = label.to(device=device, dtype=torch.float32)
        pred = net(image)
        loss = F.cross_entropy(pred, label.to(torch.long))
        print('Loss/train', loss.item())
        if loss < best_loss:
            best_loss = loss
            torch.save(net.state_dict(), 'best_model.pth')
        loss.backward()
        optimizer.step()

        print(pred.shape, image.shape, label.shape)