예제 #1
0
def main(args=None):
    args, no_config_file = get_args(args)
    setup_log(args.verbosity)
    logger = logging.getLogger(__name__)
    try:
        # set random seeds for reproducibility
        torch.manual_seed(args.seed)
        np.random.seed(args.seed)

        # since prediction only uses one gpu (at most), make the batch size small enough to fit
        if args.n_gpus > 1: args.batch_size = args.batch_size // args.n_gpus

        learner = Learner.predict_setup(args)

        # determine how many samples we will use in prediction
        nsyn = args.monte_carlo or 1

        # get relevant prediction directories and determine extension
        predict_dir = args.predict_dir or args.valid_source_dir
        output_dir = args.predict_out or os.getcwd() + '/syn_'
        ext = determine_ext(predict_dir[0])

        # setup and start prediction loop
        axis = args.sample_axis or 0
        if axis < 0 or axis > 2 and not isinstance(axis, int):
            raise ValueError(
                'sample_axis must be an integer between 0 and 2 inclusive')
        n_imgs = len(glob_imgs(predict_dir[0], ))
        if n_imgs == 0:
            raise SynthNNError(
                'Prediction directory does not contain valid images.')
        if any([len(glob_imgs(pd, ext)) != n_imgs for pd in predict_dir]):
            raise SynthNNError(
                'Number of images in prediction directories must have an equal number of images in each '
                'directory (e.g., so that img_t1_1 aligns with img_t2_1 etc. for multimodal synth)'
            )
        predict_fns = zip(*[glob_imgs(pd, ext) for pd in predict_dir])

        if args.net3d and args.patch_size > 0 and args.calc_var:
            raise SynthNNError(
                'Patch-based 3D variance calculation not currently supported.')

        for k, fn in enumerate(predict_fns):
            _, base, ext = split_filename(fn[0])
            logger.info(
                f'Starting synthesis of image: {base} ({k+1}/{n_imgs})')
            out_imgs = learner.predict(fn, nsyn, args.temperature_map,
                                       args.calc_var)
            for i, oin in enumerate(out_imgs):
                out_fn = output_dir + f'{k}_{i}{ext}'
                if hasattr(oin, 'to_filename'):
                    oin.to_filename(out_fn)
                else:
                    oin.save(out_fn)
                logger.info(f'Finished synthesis. Saved as: {out_fn}')

        return 0
    except Exception as e:
        logger.exception(e)
        return 1
예제 #2
0
 def setUp(self):
     wd = os.path.dirname(os.path.abspath(__file__))
     self.nii_dir = os.path.join(wd, 'test_data', 'nii')
     self.mask_dir = os.path.join(wd, 'test_data', 'masks')
     self.tif_dir = os.path.join(wd, 'test_data', 'tif')
     self.png_dir = os.path.join(wd, 'test_data', 'png')
     self.out_dir = tempfile.mkdtemp()
     os.mkdir(os.path.join(self.out_dir, 'models'))
     self.train_dir = os.path.join(self.out_dir, 'imgs')
     os.mkdir(self.train_dir)
     os.mkdir(os.path.join(self.train_dir, 'mask'))
     os.mkdir(os.path.join(self.train_dir, 'tif'))
     os.mkdir(os.path.join(self.train_dir, 'png'))
     nii = glob_imgs(self.nii_dir)[0]
     msk = glob_imgs(self.mask_dir)[0]
     tif = os.path.join(self.tif_dir, 'test.tif')
     png = os.path.join(self.png_dir, 'test.png')
     path, base, ext = split_filename(nii)
     for i in range(8):
         shutil.copy(nii, os.path.join(self.train_dir, base + str(i) + ext))
         shutil.copy(
             msk, os.path.join(self.train_dir, 'mask', base + str(i) + ext))
         shutil.copy(
             tif, os.path.join(self.train_dir, 'tif',
                               base + str(i) + '.tif'))
         shutil.copy(
             png, os.path.join(self.train_dir, 'png',
                               base + str(i) + '.png'))
     self.train_args = f'-s {self.train_dir} -t {self.train_dir}'.split()
     self.predict_args = f'-s {self.train_dir} -o {self.out_dir}/test'.split(
     )
     self.jsonfn = f'{self.out_dir}/test.json'
예제 #3
0
    def fit(self,
            n_epochs,
            clip: float = None,
            checkpoint: int = None,
            trained_model: str = None):
        """ training loop for neural network """
        self.model.train()
        use_valid = self.valid_loader is not None
        train_losses, valid_losses = [], []
        for t in range(1, n_epochs + 1):
            # training
            t_losses = []
            if use_valid: self.model.train(True)
            for src, tgt in self.train_loader:
                src, tgt = src.to(self.device), tgt.to(self.device)
                self.optimizer.zero_grad()
                out = self.model(src)
                loss = self._criterion(out, tgt)
                t_losses.append(loss.item())
                if hasattr(self, 'amp_handle'):
                    with self.amp_handle.scale_loss(
                            loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()
                if clip is not None:
                    nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()
            train_losses.append(t_losses)
            if hasattr(self, 'scheduler'): self.scheduler.step()

            if checkpoint is not None:
                if t % checkpoint == 0:
                    path, base, ext = split_filename(trained_model)
                    fn = os.path.join(path, base + f'_chk_{t}' + ext)
                    self.save(fn, t)

            # validation
            v_losses = []
            if use_valid: self.model.train(False)
            with torch.set_grad_enabled(False):
                for src, tgt in self.valid_loader:
                    src, tgt = src.to(self.device), tgt.to(self.device)
                    out = self.model(src)
                    loss = self._criterion(out, tgt)
                    v_losses.append(loss.item())
                valid_losses.append(v_losses)

            if np.any(np.isnan(t_losses)):
                raise SynthNNError(
                    'NaN in training loss, cannot recover. Exiting.')
            if logger is not None:
                log = f'Epoch: {t} - Training Loss: {np.mean(t_losses):.2e}'
                if use_valid:
                    log += f', Validation Loss: {np.mean(v_losses):.2e}'
                if hasattr(self, 'scheduler'):
                    log += f', LR: {self.scheduler.get_lr()[0]:.2e}'
                logger.info(log)
        return train_losses, valid_losses
예제 #4
0
    def fit(self, n_epochs, clip:float=None, checkpoint:int=None, trained_model:str=None):
        """ training loop for neural network """
        self.model.train()
        use_valid = self.valid_loader is not None
        use_scheduler = hasattr(self, 'scheduler')
        use_restarts = self.config.lr_scheduler == 'cosinerestarts'
        fp16 = hasattr(self, 'amp_handle')
        train_losses, valid_losses = [], []
        n_batches = len(self.train_loader)
        for t in range(1, n_epochs + 1):
            # training
            t_losses = []
            if use_valid: self.model.train(True)
            for i, (src, tgt) in enumerate(self.train_loader):
                src, tgt = src.to(self.device), tgt.to(self.device)
                self.optimizer.zero_grad()
                out = self.model(src)
                loss = self._criterion(out, tgt)
                t_losses.append(loss.item())
                if fp16:
                    with self.amp_handle.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()
                if clip is not None: nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()
                if use_scheduler: self.scheduler.step(((t - 1) + (i / n_batches)) if use_restarts else None)
                del loss  # save memory by removing ref to gradient tree
            train_losses.append(t_losses)

            if checkpoint is not None:
                if t % checkpoint == 0:
                    path, base, ext = split_filename(trained_model)
                    fn = os.path.join(path, base + f'_chk_{t}' + ext)
                    self.save(fn, t)

            # validation
            v_losses = []
            if use_valid: self.model.train(False)
            with torch.no_grad():
                for src, tgt in self.valid_loader:
                    src, tgt = src.to(self.device), tgt.to(self.device)
                    out = self.model(src)
                    loss = self._criterion(out, tgt)
                    v_losses.append(loss.item())
                valid_losses.append(v_losses)

            if not np.all(np.isfinite(t_losses)): raise SynthtorchError('NaN or Inf in training loss, cannot recover. Exiting.')
            if logger is not None:
                log = f'Epoch: {t} - Training Loss: {np.mean(t_losses):.2e}'
                if use_valid: log += f', Validation Loss: {np.mean(v_losses):.2e}'
                if use_scheduler: log += f', LR: {self.scheduler.get_lr()[0]:.2e}'
                logger.info(log)

        self.record = Record(train_losses, valid_losses)
예제 #5
0
    def fit(self, n_epochs, clip:float=None, checkpoint:int=None, trained_model:str=None):
        """ training loop for neural network """
        self.model.train()
        use_tb = self.config.tensorboard and SummaryWriter is not None
        if use_tb: writer = SummaryWriter()
        use_valid = self.valid_loader is not None
        use_scheduler = hasattr(self, 'scheduler')
        use_restarts = self.config.lr_scheduler == 'cosinerestarts'
        train_losses, valid_losses = [], []
        n_batches = len(self.train_loader)
        for t in range(1, n_epochs + 1):
            # training
            t_losses = []
            if use_valid: self.model.train(True)
            for i, (src, tgt) in enumerate(self.train_loader):
                src, tgt = src.to(self.device), tgt.to(self.device)
                self.optimizer.zero_grad()
                out = self.model(src)
                loss = self._criterion(out, tgt)
                t_losses.append(loss.item())
                if self.use_fp16:
                    with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()
                if clip is not None: nn.utils.clip_grad_norm_(self.model.parameters(), clip)
                self.optimizer.step()
                if use_scheduler: self.scheduler.step(((t-1)+(i/n_batches)) if use_restarts else None)
                if use_tb:
                    if i % 20 == 0: writer.add_scalar('Loss/train', loss.item(), ((t-1)*n_batches)+i)

                del loss  # save memory by removing ref to gradient tree
            train_losses.append(t_losses)

            if checkpoint is not None:
                if t % checkpoint == 0:
                    path, base, ext = split_filename(trained_model)
                    fn = os.path.join(path, base + f'_chk_{t}' + ext)
                    self.save(fn, t)

            # validation
            v_losses = []
            if use_valid:
                self.model.train(False)
                with torch.no_grad():
                    for i, (src, tgt) in enumerate(self.valid_loader):
                        src, tgt = src.to(self.device), tgt.to(self.device)
                        out = self.model(src)
                        loss = self._criterion(out, tgt)
                        if use_tb:
                            if i % 20 == 0: writer.add_scalar('Loss/valid', loss.item(), ((t-1)*n_batches)+i)
                            do_plot = i == 0 and ((t - 1) % 5) == 0
                            if do_plot and self.model.dim == 2:
                                writer.add_images('source', src[:8], t, dataformats='NCHW')
                                outimg = out[0][:8] if isinstance(out, tuple) else out[:8]
                                if self.config.color: outimg = torch.round(outimg)
                                writer.add_images('target', outimg, t, dataformats='NCHW')
                            if do_plot: self._histogram_weights(writer, t)
                        v_losses.append(loss.item())
                    valid_losses.append(v_losses)

            if not np.all(np.isfinite(t_losses)): raise SynthtorchError('NaN or Inf in training loss, cannot recover. Exiting.')
            if logger is not None:
                log = f'Epoch: {t} - Training Loss: {np.mean(t_losses):.2e}'
                if use_valid: log += f', Validation Loss: {np.mean(v_losses):.2e}'
                if use_scheduler: log += f', LR: {self.scheduler.get_lr()[0]:.2e}'
                logger.info(log)

        self.record = Record(train_losses, valid_losses)
        if use_tb: writer.close()