Example #1
0
    def __init__(self, model, loss, optimizer, resume, config):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config['n_gpu'])
        self.model = model.to(self.device)
        if len(device_ids) > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)

        self.loss = loss
        self.optimizer = optimizer

        self.steps = config['trainer']['steps']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']
        self.start_step = 0

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime('%m%d_%H%M%S')
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'], config['name'], start_time)
        # setup visualization writer instance
        writer_dir = os.path.join(config['visualization']['log_dir'], config['name'], start_time)
        self.writer = WriterTensorboardX(writer_dir, self.logger, config['visualization']['tensorboardX'])

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #2
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 optimizer,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config['n_gpu'],
                                                       config.get('gpu_list'))
        self.model = model.to(self.device)
        if len(device_ids) > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)

        self.loss = loss
        self.metrics = metrics
        self.optimizer = optimizer
        self.train_logger = train_logger

        cfg_trainer = config['trainer']
        self.epochs = cfg_trainer['epochs']
        self.save_period = cfg_trainer['save_period']
        self.verbosity = cfg_trainer['verbosity']
        self.monitor = cfg_trainer.get('monitor', 'off')

        # configuration to monitor model performance and save best
        if self.monitor == 'off':
            self.mnt_mode = 'off'
            self.mnt_best = 0
        else:
            self.mnt_mode, self.mnt_metric = self.monitor.split()
            assert self.mnt_mode in ['min', 'max']

            self.mnt_best = math.inf if self.mnt_mode == 'min' else -math.inf
            self.early_stop = cfg_trainer.get('early_stop', math.inf)

        self.start_epoch = 1

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime('%m-%d_%H-%M-%S')
        self.checkpoint_dir = os.path.join(cfg_trainer['save_dir'],
                                           config['name'], start_time)
        # setup visualization writer instance
        writer_dir = os.path.join(cfg_trainer['log_dir'], config['name'],
                                  start_time)
        self.writer = WriterTensorboardX(writer_dir, self.logger,
                                         cfg_trainer['tensorboardX'])

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #3
0
def main(args):
    conf = yaml.load(open(args.config))
    conf.update(conf[conf['model']])

    if args.multi_gpu:
        conf['batch_size'] *= torch.cuda.device_count()

    datasets = {
        'MNIST': torchvision.datasets.MNIST,
        'CIFAR': torchvision.datasets.CIFAR10
    }
    if conf['dataset'].upper() == 'MNIST':
        conf['data_path'] = os.path.join(conf['data_path'], 'MNIST')
        size = 28
        classes = list(range(10))
        mean, std = ((0.1307,), (0.3081,))
    elif conf['dataset'].upper() == 'CIFAR':
        conf['data_path'] = os.path.join(conf['data_path'], 'CIFAR')
        size = 32
        classes = ['plane', 'car', 'bird', 'cat', 'deer',
                   'dog', 'frog', 'horse', 'ship', 'truck']
        mean, std = ((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    else:
        raise ValueError('Dataset must be either MNIST or CIFAR!')
    transform = transforms.Compose([
        transforms.RandomCrop(size, padding=2),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
    loaders = {}
    trainset = datasets[conf['dataset'].upper()](root=conf['data_path'],
                        train=True, download=True, transform=transform)
    testset = datasets[conf['dataset'].upper()](root=conf['data_path'],
                       train=False, download=True, transform=transform)
    loaders['train'] = torch.utils.data.DataLoader(trainset,
                batch_size=conf['batch_size'], shuffle=True, num_workers=4)
    loaders['test'] = torch.utils.data.DataLoader(testset,
                batch_size=conf['batch_size'], shuffle=False, num_workers=4)
    print(9*'#', 'Using {} dataset'.format(conf['dataset']), 9*'#')


    # Training
    use_gpu  = not args.disable_gpu and torch.cuda.is_available()
    caps_net = CapsNetTrainer(loaders,
                              conf['model'],
                              conf['lr'],
                              conf['lr_decay'],
                              conf['num_classes'],
                              conf['num_routing'],
                              conf['loss'],
                              use_gpu=use_gpu,
                              multi_gpu=args.multi_gpu)

    ensure_dir('logs') #
    logger = {}
    logger['train'] = Logger('logs/{}-train'.format(conf['dataset']))
    logger['test'] = Logger('logs/{}-test'.format(conf['dataset']))
    ensure_dir(conf['save_dir']) #
    caps_net.train(conf['epochs'], classes, conf['save_dir'], logger)
Example #4
0
    def evaluate_full_image_list(self, file_list, save_dir, save_name):
        sub_dirs = self.get_full_image_example_dir_names()
        dirs = []
        for d in sub_dirs:
            if 'target' not in d:
                path = os.path.join(save_dir, 'chickpea-full-image', d)
                ensure_dir(path)
                dirs.append(path)

        metric_names = self.get_full_image_metric_names()
        metrics = {n: [] for n in metric_names}

        with torch.no_grad():
            for i, file in enumerate(tqdm(file_list)):
                image = cv2.imread(file, 0)
                if image.shape[0] > 3200 or image.shape[1] > 3200:
                    continue
                image = remove_artifacts(image, 10)

                resized_img, binary_inpainted, rgb_inpainted, unthresh_inpainted = inpaint_full_image(
                    image, self.model, 50)

                remove_binary_inptined = remove_artifacts(binary_inpainted, 10)

                labeled_input, num_labels_input = label(
                    (resized_img / 255.).astype(np.uint8),
                    neighbors=8,
                    background=0,
                    return_num=True)
                labeled_input = convert_labels_to_rgb(labeled_input)
                labeled_pred, num_labels_pred = label(
                    (binary_inpainted / 255.).astype(np.uint8),
                    neighbors=8,
                    background=0,
                    return_num=True)
                labeled_pred = convert_labels_to_rgb(labeled_pred)
                labeled_pred_rm, num_labels_pred = label(
                    (remove_binary_inptined / 255.).astype(np.uint8),
                    neighbors=8,
                    background=0,
                    return_num=True)
                labeled_pred_rm = convert_labels_to_rgb(labeled_pred_rm)

                images = [
                    resized_img, binary_inpainted, remove_binary_inptined,
                    labeled_input, labeled_pred, unthresh_inpainted,
                    rgb_inpainted, labeled_pred_rm
                ]
                metrics["num_labels_input"].append(num_labels_input)
                metrics["num_labels_pred"].append(num_labels_pred)
                metrics["num_labels_pred_rm"].append(num_labels_pred)

                for save_path, image in zip(dirs, images):
                    cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)),
                                image.astype(np.uint8))

        df = pd.DataFrame(metrics, columns=metrics.keys())
        df.to_csv(save_dir + '/' + save_name + '.csv')
        df.describe().to_csv(save_dir + '/' + save_name + '-stats.csv')
    def __init__(self,
                 models,
                 metrics,
                 optimizers,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        if not isinstance(models, collections.Iterable):
            models = [models]
        else:
            assert len(models) > 0

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config['n_gpu'])
        self.models = []

        for i, model in enumerate(models):
            self.models.append(model.to(self.device))
            if len(device_ids) > 1:
                self.models[i] = torch.nn.DataParallel(self.models[i],
                                                       device_ids=device_ids)

        self.metrics = metrics
        self.optimizers = optimizers

        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']

        self.train_logger = train_logger

        # configuration to monitor model performance and save best
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']
        assert self.monitor_mode in ['min', 'max', 'off']
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.start_epoch = 1

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime('%m%d_%H%M%S')
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'],
                                           config['name'], start_time)
        # setup visualization writer instance
        writer_dir = os.path.join(config['visualization']['log_dir'],
                                  config['name'], start_time)
        self.writer = WriterTensorboardX(
            writer_dir, self.logger, config['visualization']['tensorboardX'])

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #6
0
def trainer_paths(config):
    """Returns the paths to save checkpoints and tensorboard runs. eg.
    saved/Mnist_LeNet/<start time>/checkpoints
    saved/Mnist_LeNet/<start time>/runs
    """
    arch_datetime = arch_datetime_path(config)
    return (ensure_dir(arch_datetime / 'checkpoints'),
            ensure_dir(arch_datetime / 'runs'))
Example #7
0
 def _get_SummaryWriter(self):
     if not self.args.debug and not self.args.do_test:
         ensure_dir(os.path.join('./summary/', self.experiment_name))
         self.summarywriter = summary.create_file_writer(
             logdir='./summary/{}/{}/train'.format(
                 self.experiment_name,
                 time.strftime("%m%d-%H-%M-%S", time.localtime(
                     time.time()))))
Example #8
0
 def _download_taxonomy(self):
     ensure_dir(self.taxonomy_dir)
     self.logger.info("Downloading taxonomic tree...")
     res = download_file(self.TAX_URL, self.taxonomy_dir / 'taxdump.tar.gz')
     if res is None:
         self.logger.error(
             "Could not download taxdump information...Please try again")
         exit(1)
Example #9
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)
        self.model = model
        self.loss = loss
        self.metrics = metrics
        self.name = config['name']
        self.epochs = config['trainer']['epochs']
        print('self.epochs ', self.epochs)
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']
        self.with_cuda = config['cuda'] and torch.cuda.is_available()
        if config['cuda'] and not torch.cuda.is_available():
            self.logger.warning(
                'Warning: There\'s no CUDA support on this machine, '
                'training is performed on CPU.')
        else:
            self.gpu = torch.device('cuda:' + str(config['gpu']))
            self.model = self.model.to(self.gpu)

        self.train_logger = train_logger
        # here we add to the optimizer only those parameters that are not frozen!
        non_frozen_parameters = [
            p for p in model.parameters() if p.requires_grad
        ]
        print('%d non_frozen_parameters ' % len(non_frozen_parameters))
        self.optimizer = getattr(optim, config['optimizer_type'])(
            non_frozen_parameters, **config['optimizer'])
        self.lr_scheduler = getattr(optim.lr_scheduler,
                                    config['lr_scheduler_type'], None)
        if self.lr_scheduler:
            self.lr_scheduler = self.lr_scheduler(self.optimizer,
                                                  **config['lr_scheduler'])
            self.lr_scheduler_freq = config['lr_scheduler_freq']
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']

        assert self.monitor_mode == 'min' or self.monitor_mode == 'max'
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf

        self.start_epoch = 1
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'],
                                           self.name)
        ensure_dir(self.checkpoint_dir)
        json.dump(config,
                  open(os.path.join(self.checkpoint_dir, 'config.json'), 'w'),
                  indent=4,
                  sort_keys=False)
        if resume:
            self._resume_checkpoint(resume)
            torch.cuda.empty_cache()
        print('self.monitor_best = ', self.monitor_best)
Example #10
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)
        self.model = model
        self.loss = loss
        self.metrics = metrics
        self.name = config['name']
        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']

        self.with_cuda = config['cuda'] and torch.cuda.is_available()
        if config['cuda'] and not torch.cuda.is_available():
            self.logger.warning(
                'Warning: There\'s no CUDA support on this machine, '
                'training is performed on CPU.')
            device = 'cpu'
        else:
            self.gpus = {i: item for i, item in enumerate(self.config['gpus'])}
            device = 'cuda'
            self.model = torch.nn.DataParallel(self.model)
            torch.cuda.empty_cache()

        self.device = torch.device(device)
        self.model = self.model.to(self.device)

        self.logger.debug('Model is initialized.')
        self._log_memory_useage()

        self.train_logger = train_logger
        self.optimizer = getattr(optim, config['optimizer_type'])(
            model.parameters(), **config['optimizer'])
        self.lr_scheduler = getattr(optim.lr_scheduler,
                                    config['lr_scheduler_type'], None)
        if self.lr_scheduler:
            self.lr_scheduler = self.lr_scheduler(self.optimizer,
                                                  **config['lr_scheduler'])
            self.lr_scheduler_freq = config['lr_scheduler_freq']
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']
        assert self.monitor_mode == 'min' or self.monitor_mode == 'max'
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.start_epoch = 1
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'],
                                           self.name)
        ensure_dir(self.checkpoint_dir)
        json.dump(config,
                  open(os.path.join(self.checkpoint_dir, 'config.json'), 'w'),
                  indent=4,
                  sort_keys=False)
        if resume:
            self._resume_checkpoint(resume)
    def __init__(
        self, model, losses, metrics, optimizer_g,
        optimizer_d_s, optimizer_d_t,
        resume, config,
        train_logger=None,
        pretrained_path=None,
    ):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config['n_gpu'])
        self.model = model.to(self.device)

        self.losses = losses
        self.metrics = metrics
        self.optimizer_g = optimizer_g
        self.optimizer_d_s = optimizer_d_s
        self.optimizer_d_t = optimizer_d_t

        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']

        # Set pretrained_load_strict to False to load model without strict state name matching
        # It's useful when pretrained model without GAN but we want to use GAN for this time
        self.pretrained_load_strict = config['trainer']['pretrained_load_strict']

        self.train_logger = train_logger

        # configuration to monitor model performance and save best
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']
        assert self.monitor_mode in ['min', 'max', 'off']
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.start_epoch = 1

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime('%m%d_%H%M%S')
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'], config['name'], start_time)
        # setup visualization writer instance
        writer_dir = os.path.join(config['visualization']['log_dir'], config['name'], start_time)
        self.writer = WriterTensorboardX(writer_dir, self.logger, config['visualization']['tensorboardX'])

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
        elif pretrained_path is not None:
            self._load_pretrained(pretrained_path)

        # put model into DataParallel module only after the checkpoint is loaded
        if len(device_ids) > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)
Example #12
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 optimizer,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        # setup GPU device if available, move model into configured device
        self.with_cuda = config['cuda'] and torch.cuda.is_available()
        if config['cuda'] and not torch.cuda.is_available():
            self.logger.warning(
                'Warning: There\'s no GPU available on this machine, '
                'training will be performed on CPU.')
        self.device = torch.device(
            'cuda:' + str(config['gpu']) if self.with_cuda else 'cpu')
        self.model = model.to(self.device)

        self.loss = loss
        self.metrics = metrics
        self.optimizer = optimizer

        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']

        self.train_logger = train_logger

        # configuration to monitor model performance and save best
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']
        assert self.monitor_mode in ['min', 'max', 'off']
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.start_epoch = 1

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime('%m%d_%H%M%S')
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'],
                                           config['name'], start_time)
        # setup visualization writer instance
        writer_dir = os.path.join(config['visualization']['log_dir'],
                                  config['name'], start_time)
        self.writer = WriterTensorboardX(
            writer_dir, self.logger, config['visualization']['tensorboardX'])

        # Save configuration into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #13
0
    def __init__(
        self, model, loss, metrics, optimizer, resume, config, train_logger=None
    ):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config["n_gpu"])
        self.model = model.to(self.device)
        if len(device_ids) > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)

        self.loss = loss
        self.metrics = metrics
        self.optimizer = optimizer
        self.train_logger = train_logger

        cfg_trainer = config["trainer"]
        self.epochs = cfg_trainer["epochs"]
        self.save_period = cfg_trainer["save_period"]
        self.verbosity = cfg_trainer["verbosity"]
        self.monitor = cfg_trainer.get("monitor", "off")

        # configuration to monitor model performance and save best
        if self.monitor == "off":
            self.mnt_mode = "off"
            self.mnt_best = 0
        else:
            self.mnt_mode, self.mnt_metric = self.monitor.split()
            assert self.mnt_mode in ["min", "max"]

            self.mnt_best = math.inf if self.mnt_mode == "min" else -math.inf
            self.early_stop = cfg_trainer.get("early_stop", math.inf)

        self.start_epoch = 1

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime("%m%d_%H%M%S")
        self.checkpoint_dir = os.path.join(
            cfg_trainer["save_dir"], config["name"], start_time
        )
        # setup visualization writer instance
        writer_dir = os.path.join(cfg_trainer["log_dir"], config["name"], start_time)
        self.writer = WriterTensorboardX(
            writer_dir, self.logger, cfg_trainer["tensorboardX"]
        )

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, "config.json")
        with open(config_save_path, "w") as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #14
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 optimizer,
                 resume,
                 config):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)
        self.data_type = torch.float32

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config['n_gpu'])
        self.model = model.to(self.device)
        if len(device_ids) > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)

        self.loss = loss
        self.metrics = metrics
        self.optimizer = optimizer

        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']
        self.eval_freq = config['trainer']['eval_freq']
        self.metric_freq = config['trainer']['metric_freq']
        self.early_stopping = config['trainer']['early_stopping']

        # configuration to monitor model performance and save best
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']
        assert self.monitor_mode in ['min', 'max', 'off']
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.monitor_best_se = 0
        self.start_epoch = 1
        self.best_epoch = 0

        # setup directory for checkpoint saving
        start_time = datetime.datetime.now().strftime('%m%d_%H%M%S')
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'], config['name'], start_time)
        # setup visualization writer instance
        writer_dir = os.path.join(config['visualization']['log_dir'], config['name'], start_time)
        self.writer = SummaryWriter(writer_dir)
        if hasattr(self.loss, "set_writer"):
            self.loss.set_writer(self.writer)

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #15
0
    def __init__(
        self,
        model,
        config,
        args,
        test_data_loader,
        begin_time,
        resume_file,
        loss_weight,
    ):

        # for general
        self.config = config
        self.args = args
        self.device = torch.device(
            'cpu') if self.args.gpu == -1 else torch.device('cuda:{}'.format(
                self.args.gpu))
        #self.do_predict = do_predict

        # for train
        #self.visdom = visdom
        self.model = model.to(self.device)
        self.loss_weight = loss_weight.to(self.device)
        self.loss = self._loss(loss_function=self.config.loss).to(self.device)
        self.optimizer = self._optimizer(lr_algorithm=self.config.lr_algorithm)
        self.lr_scheduler = self._lr_scheduler()

        # for time
        self.begin_time = begin_time

        # for data
        self.test_data_loader = test_data_loader

        # for resume/save path
        self.history = {
            'eval': {
                'loss': [],
                'acc': [],
                'miou': [],
                'time': [],
            },
        }
        self.test_log_path = os.path.join(self.args.output, 'test', 'log',
                                          self.model.name, self.begin_time)
        self.predict_path = os.path.join(self.args.output, 'test', 'predict',
                                         self.model.name, self.begin_time)
        # here begin_time is the same with the time used in BaseTrainer.py
        # loading args.weight or the checkpoint-best.pth
        self.resume_ckpt_path = resume_file if resume_file is not None else \
            os.path.join(self.config.save_dir, self.model.name, self.begin_time, 'checkpoint-best.pth')

        ensure_dir(self.test_log_path)
        ensure_dir(self.predict_path)
Example #16
0
    def __init__(self, model, config):
        super(UnetEvaluator, self).__init__(model, config)

        self.syn_test_dataloader = module_data.TestRootDataLoader(
            name='synthetic')
        self.real_test_dataloader = module_data.TestRootDataLoader(
            name='chickpea')
        self.chickpea_test_file_list = get_files(chickpea_valid_path)

        self.testing_dir = os.path.join(self.config["checkpoint_dir"],
                                        'testing')
        ensure_dir(self.testing_dir)
    def _create_saving_dir(self, args):
        saving_dir = os.path.join(global_config['trainer']['save_dir'],
                                  args.ckpts_subdir, global_config['name'],
                                  self.start_time)
        ensure_dir(saving_dir)

        # create a link to the resumed checkpoint as a reference
        if args.resume is not None:
            link = os.path.join(saving_dir, 'resumed_ckpt.pth')
            os.symlink(os.path.abspath(args.resume), link)

        return saving_dir
Example #18
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = Logger(
            self.__class__.__name__
        ).logger  # logging.getLogger(self.__class__.__name__)
        self.model = model
        self.loss = loss
        self.metrics = metrics
        self.name = config['name']
        self.epochs = config['trainer']['epochs']
        self.device = config['device']

        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']

        self.train_logger = train_logger
        self.train_logger.propagate = False
        # here we add to the optimizer only those parameters that are not frozen!
        non_frozen_parameters = [
            p for p in model.parameters() if p.requires_grad
        ]
        self.logger.info('%d non_frozen_parameters ' %
                         len(non_frozen_parameters))
        self.optimizer = getattr(optim, config['optimizer_type'])(
            non_frozen_parameters, **config['optimizer'])

        self.lr_scheduler = getattr(optim.lr_scheduler,
                                    config['lr_scheduler_type'], None)
        if self.lr_scheduler:
            self.lr_scheduler = self.lr_scheduler(self.optimizer,
                                                  **config['lr_scheduler'])
            self.lr_scheduler_freq = config['lr_scheduler_freq']
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']

        assert self.monitor_mode == 'min' or self.monitor_mode == 'max'
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.start_epoch = 1
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'],
                                           self.name)
        ensure_dir(self.checkpoint_dir)
        json.dump(config,
                  open(os.path.join(self.checkpoint_dir, 'config.json'), 'w'),
                  indent=4,
                  sort_keys=False)
        if resume:
            self._resume_checkpoint(resume)
Example #19
0
    def __init__(self, model, config):
        self.model = model
        self.model.eval()
        self.config = config

        # create validation and testing directory in model checkpoints directory
        self.validation_dir = os.path.join(self.config["checkpoint_dir"],
                                           'validation')
        ensure_dir(self.validation_dir)
        self.testing_dir = os.path.join(self.config["checkpoint_dir"],
                                        'testing')
        ensure_dir(self.testing_dir)
Example #20
0
    def save_patch_examples(self, example_dict, save_dir, save_name):
        """
        Save patch examples saved in example dictionary
        :param example_dict: a dictionary contains patch examples
        :param save_dir: saving directory
        :param save_name: saving name
        :return: None
        """

        # create sub folders
        sub_dirs = self.get_patch_example_dir_names()
        dirs = []
        for d in sub_dirs:
            path = os.path.join(save_dir, save_name, d)
            ensure_dir(path)
            dirs.append(path)

        # obtain image lists
        input_images = example_dict["input_images"]
        target_images = example_dict["target_images"]
        pred_images = example_dict["pred_images"]
        unthresh_pred_images = example_dict["unthresh_pred_images"]

        i = 0
        for input, target, unthresh_pred, pred in zip(input_images,
                                                      target_images,
                                                      unthresh_pred_images,
                                                      pred_images):
            # convert binary images into colourful labels
            labeled_input = label(input.astype(np.uint8),
                                  neighbors=8,
                                  background=0)
            labeled_input = convert_labels_to_rgb(labeled_input)
            labeled_target = label(target.astype(np.uint8),
                                   neighbors=8,
                                   background=0)
            labeled_target = convert_labels_to_rgb(labeled_target)
            labeled_pred = label(pred.astype(np.uint8),
                                 neighbors=8,
                                 background=0)
            labeled_pred = convert_labels_to_rgb(labeled_pred)

            images = [
                input, target, pred, labeled_input, labeled_target,
                labeled_pred, unthresh_pred
            ]

            # save images
            for save_path, image in zip(dirs, images):
                cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)),
                            (image * 255).astype(np.uint8))
            i += 1
Example #21
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 data_loader,
                 valid_data_loader,
                 optimizer,
                 epochs,
                 batch_size,
                 save_dir,
                 save_freq,
                 resume,
                 verbosity,
                 training_name,
                 device,
                 train_logger=None,
                 writer=None,
                 monitor='loss',
                 monitor_mode='min'):
        self.logger = logging.getLogger(self.__class__.__name__)
        self.model = model
        self.loss = loss
        self.metrics = metrics

        self.data_loader = data_loader
        self.batch_size = batch_size
        self.valid_data_loader = valid_data_loader
        self.valid = True if self.valid_data_loader is not None else False

        self.optimizer = optimizer
        self.epochs = epochs
        self.save_freq = save_freq
        self.verbosity = verbosity

        self.training_name = training_name
        self.train_logger = train_logger
        self.writer = writer
        self.train_iter = 0
        self.valid_iter = 0

        self.device = device
        self.monitor = monitor
        self.monitor_mode = monitor_mode
        assert monitor_mode == 'min' or monitor_mode == 'max'
        self.monitor_best = math.inf if monitor_mode == 'min' else -math.inf
        self.start_epoch = 1

        self.checkpoint_dir = os.path.join(save_dir, training_name)
        ensure_dir(self.checkpoint_dir)
        if resume:
            self._resume_checkpoint(resume)
Example #22
0
    def __init__(self, model, loss, metrics, class_wise, optimizer, resume, config, train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)

        # setup GPU device if available, move model into configured device
        self.device, device_ids = self._prepare_device(config['n_gpu'], config['gpu_id'])
        self.model = model.to(self.device)
        if len(device_ids) > 1:
            self.model = torch.nn.DataParallel(model, device_ids=device_ids)

        self.loss = loss
        self.metrics = metrics
        self.class_wise = class_wise
        self.optimizer = optimizer
        self.train_logger = train_logger

        cfg_trainer = config['trainer']
        self.epochs = cfg_trainer['epochs']
        self.save_period = cfg_trainer['save_period']
        self.verbosity = cfg_trainer['verbosity']
        # default is off
        self.monitor = cfg_trainer.get('monitor', 'off')

        # configuration to monitor model performance and save best
        if self.monitor == 'off':
            self.mnt_mode = 'off'
            self.mnt_best = 0
        else:
            # "monitor": "min val_loss"
            self.mnt_mode, self.mnt_metric = self.monitor.split()
            assert self.mnt_mode in ['min', 'max']

            self.mnt_best = math.inf if self.mnt_mode == 'min' else -math.inf
            self.early_stop = cfg_trainer.get('early_stop', math.inf)
        
        self.start_epoch = 1

        # setup directory for checkpoint saving
        start_time = str(datetime.now(timezone('US/Pacific')).strftime('%m-%d_%H:%M:%S')) + '_' +str(config['arch']['type'])
        self.checkpoint_dir = os.path.join(cfg_trainer['save_dir'], config['name'], start_time)

        # Save configuration file into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)
        
        if resume:
            self._resume_checkpoint(resume)
    def save_images(self,
                    generator_outputs,
                    targets,
                    generator_labels,
                    target_labels,
                    epoch,
                    batch_idx,
                    r=1,
                    c=2):
        generator_outputs = generator_outputs.cpu().numpy()
        generator_outputs = generator_outputs.transpose((0, 2, 3, 1))
        generator_outputs = generator_outputs[..., 1]
        generator_outputs[generator_outputs >= 0.5] = 1
        generator_outputs[generator_outputs < 0.5] = 0
        targets = targets.cpu().numpy()
        generator_labels = generator_labels.cpu().numpy()
        target_labels = target_labels.cpu().numpy()

        fig, axs = plt.subplots(r, c)

        if r == 1:
            axs[0].set_title('Fake Disc:{:.2f}'.format(generator_labels[0, 0]))
            axs[0].imshow(generator_outputs[0], cmap='gray')
            axs[0].axis('off')

            axs[1].set_title('Target Disc:{:.2f}'.format(target_labels[0, 0]))
            axs[1].imshow(targets[0], cmap='gray')
            axs[1].axis('off')
        else:
            count = 0
            for row in range(r):
                axs[row, 0].set_title('Fake Disc:{:.1f}'.format(
                    generator_labels[count, 0]))
                axs[row, 0].imshow(generator_outputs[count])
                axs[row, 0].axis('off')

                axs[row, 1].set_title('Target Disc:{:.1f}'.format(
                    target_labels[count, 0]))
                axs[row, 1].imshow(targets[count])
                axs[row, 1].axis('off')
                count += 1

        ensure_dir(
            os.path.join(self.checkpoint_dir, 'results',
                         'epoch_{}').format(epoch))
        fig.savefig('{0}/results/epoch_{1}/{2}.jpg'.format(
            self.checkpoint_dir, epoch, batch_idx))
        plt.close(fig)
def main(blurred_dir, sharp_dir, aligned_dir):
    image_names = os.listdir(
        blurred_dir
    )  # we assume that blurred and sharp images have the same names
    ensure_dir(aligned_dir)
    for image_name in tqdm(image_names, ascii=True):
        # convert PIL image to numpy array (H, W, C)
        blurred = np.array(Image.open(os.path.join(blurred_dir,
                                                   image_name)).convert('RGB'),
                           dtype=np.uint8)
        sharp = np.array(Image.open(os.path.join(sharp_dir,
                                                 image_name)).convert('RGB'),
                         dtype=np.uint8)
        aligned = np.concatenate((blurred, sharp),
                                 axis=1)  # horizontal alignment
        Image.fromarray(aligned).save(os.path.join(aligned_dir, image_name))
Example #25
0
 def _save_checkpoint(self, epoch, save_best=False):
     ensure_dir(self.save_dir)
     state = {
         'epoch': epoch,
         'state_dict': self.model.state_dict(),
         'optimizer': self.optimizer.state_dict(),
         'config': self.cfg,
         'best_score': self.best_score
     }
     if save_best: 
         filename = str(self.save_dir + '/model_best.pt')
         torch.save(state, filename)
         self.logger.debug('Saving current best: {}...'.format(filename))
     else:
         filename = str(self.save_dir + '/checkpoint_epoch_{}.pt'.format(epoch))
         torch.save(state, filename)
         self.logger.debug('Saving checkpoint: {} ...'.format(filename))
    def __init__(self, dirPath, split, config):

        self.img_height = config['img_height']

        #with open(os.path.join(dirPath,'sets.json')) as f:
        with open(os.path.join('data', 'sets.json')) as f:
            set_list = json.load(f)[split]

        self.authors = defaultdict(list)
        self.lineIndex = []
        for page_idx, name in enumerate(set_list):
            lines, author = parseXML(
                os.path.join(dirPath, 'xmls', name + '.xml'))

            authorLines = len(self.authors[author])
            self.authors[author] += [
                (os.path.join(dirPath, 'forms', name + '.png'), ) + l
                for l in lines
            ]
            self.lineIndex += [(author, i + authorLines)
                               for i in range(len(lines))]

        char_set_path = config['char_file']
        with open(char_set_path) as f:
            char_set = json.load(f)
        self.char_to_idx = char_set['char_to_idx']

        self.augmentation = config[
            'augmentation'] if 'augmentation' in config else None
        self.normalized_dir = config[
            'cache_normalized'] if 'cache_normalized' in config else None
        if self.normalized_dir is not None:
            ensure_dir(self.normalized_dir)

        self.warning = False

        #DEBUG
        if 'overfit' in config and config['overfit']:
            self.lineIndex = self.lineIndex[:10]

        self.center = config[
            'center_pad']  #if 'center_pad' in config else True

        self.add_spaces = config[
            'add_spaces'] if 'add_spces' in config else False
Example #27
0
    def __init__(self,
                 model,
                 loss,
                 metrics,
                 resume,
                 config,
                 train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)
        self.model = model
        self.loss = loss
        self.metrics = metrics
        self.name = config['name']
        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']
        self.with_cuda = config['cuda'] and torch.cuda.is_available()
        if config['cuda'] and not torch.cuda.is_available():
            self.logger.warning(
                'Warning: There\'s no CUDA support on this machine, '
                'training is performed on CPU.')
        self.train_logger = train_logger
        self.optimizers = {}
        for optim_name, optim_config in config['optimizers'].items():
            self.optimizers[optim_name] = getattr(optim, optim_config['type'])(
                eval('model.' + optim_name).parameters(),
                **optim_config['config'])

        self.save_best = config['trainer']['save_best']
        if self.save_best:
            self.monitor = config['trainer']['monitor']
            self.monitor_mode = config['trainer']['monitor_mode']
            assert self.monitor_mode == 'min' or self.monitor_mode == 'max'
            self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf

        self.start_epoch = 1
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'],
                                           self.name)
        ensure_dir(self.checkpoint_dir)
        json.dump(config,
                  open(os.path.join(self.checkpoint_dir, 'config.json'), 'w'),
                  indent=4,
                  sort_keys=False)
        if resume:
            self._resume_checkpoint(resume)
Example #28
0
 def _create_saving_dir(self, args):
     saving_dir = os.path.join(global_config['trainer']['save_dir'],
                               args.outputs_subdir, global_config['name'])
     if os.path.exists(saving_dir):
         logger.warning(
             f'The saving directory "{saving_dir}" already exists. '
             f'If continued, some files might be overwriten.')
         response = input('Proceed? [y/N] ')
         if response != 'y':
             logger.info('Exit.')
             exit()
     ensure_dir(saving_dir)
     if args.resume is not None:
         link = os.path.join(saving_dir, 'resumed_ckpt.pth')
         if os.path.exists(link):
             os.remove(link)
         # Mark the used resume path by a symbolic link
         os.symlink(os.path.abspath(args.resume), link)
     return saving_dir
Example #29
0
    def __init__(self, model, loss, metrics, resume, config, train_logger=None):
        self.config = config
        self.logger = logging.getLogger(self.__class__.__name__)
        self.model = model
        self.loss = loss
        self.metrics = metrics
        self.name = config['name']
        self.epochs = config['trainer']['epochs']
        self.save_freq = config['trainer']['save_freq']
        self.verbosity = config['trainer']['verbosity']
        self.with_cuda = config['cuda'] and torch.cuda.is_available()
        if config['cuda'] and not torch.cuda.is_available():
            self.logger.warning('Warning: There\'s no CUDA support on this machine, '
                                'training is performed on CPU.')
        self.device = torch.device('cuda:' + str(config['gpu']) if self.with_cuda else 'cpu')
        self.model = self.model.to(self.device)

        self.train_logger = train_logger
        self.writer = WriterTensorboardX(config)

        self.optimizer = getattr(optim, config['optimizer_type'])(filter(lambda p: p.requires_grad, model.parameters()),
                                                                  **config['optimizer'])
        self.lr_scheduler = getattr(
            optim.lr_scheduler,
            config['lr_scheduler_type'], None)
        if self.lr_scheduler:
            self.lr_scheduler = self.lr_scheduler(self.optimizer, **config['lr_scheduler'])
            self.lr_scheduler_freq = config['lr_scheduler_freq']
        self.monitor = config['trainer']['monitor']
        self.monitor_mode = config['trainer']['monitor_mode']
        assert self.monitor_mode == 'min' or self.monitor_mode == 'max'
        self.monitor_best = math.inf if self.monitor_mode == 'min' else -math.inf
        self.start_epoch = 1
        self.checkpoint_dir = os.path.join(config['trainer']['save_dir'], self.name)

        # Save configuration into checkpoint directory:
        ensure_dir(self.checkpoint_dir)
        config_save_path = os.path.join(self.checkpoint_dir, 'config.json')
        with open(config_save_path, 'w') as handle:
            json.dump(config, handle, indent=4, sort_keys=False)

        if resume:
            self._resume_checkpoint(resume)
Example #30
0
    def __init__(self,
                 model,
                 config,
                 test_data_loader,
                 begin_time,
                 loss_weight,
                 #do_predict,
                 ):

        # for general
        self.config = config
        self.device = torch.device('cuda:{}'.format(self.config.device_id)) if self.config.use_gpu else torch.device('cpu')
        #self.do_predict = do_predict

        # for train
        #self.visdom = visdom
        self.model = model.to(self.device)
        self.loss_weight = loss_weight.to(self.device)
        self.loss = self._loss(loss_function= self.config.loss).to(self.device)
        self.optimizer = self._optimizer(lr_algorithm=self.config.lr_algorithm)
        self.lr_scheduler = self._lr_scheduler()

        # for time
        self.begin_time = begin_time

        # for data
        self.test_data_loader = test_data_loader

        # for resume/save path
        self.history = {
            'eval': {
                'loss': [],
                'acc': [],
                'miou': [],
                'time': [],
            },
        }
        self.test_log_path = os.path.join(self.config.test_log_dir, model.name, self.begin_time)
        self.predict_path = os.path.join(self.config.pred_dir, model.name, self.begin_time)
        self.resume_ckpt_path = os.path.join(self.config.save_dir, model.name, self.begin_time, 'checkpoint-best.pth')
        ensure_dir(self.test_log_path)
        ensure_dir(self.predict_path)