def view_dataset(self, mode='train'): train_pairs, valid_pairs = dataset.prepare_data_CANCER() if mode == 'train': train_augmentors = self.train_augmentors() ds = dataset.DatasetSerial(train_pairs, shape_augs=iaa.Sequential(train_augmentors[0]), input_augs=iaa.Sequential(train_augmentors[1])) else: infer_augmentors = self.infer_augmentors() ds = dataset.DatasetSerial(valid_pairs, shape_augs=iaa.Sequential(infer_augmentors)) dataset.visualize(ds, 1) return
def view_dataset(self, mode='train'): train_pairs, valid_pairs = dataset.prepare_PAIP2020_PANDA( self.fold_idx) if mode == 'train': train_augmentors = self.train_augmentors() ds = dataset.DatasetSerial(train_pairs, self.tile_size, self.num_tile, True) else: infer_augmentors = self.infer_augmentors() # HACK ds = dataset.DatasetSerial(valid_pairs, self.tile_size, self.num_tile, False) dataset.visualize(ds, 1) return
def view_dataset(self, mode='train'): train_pairs, valid_pairs = getattr(dataset, ('prepare_%s_data' % self.dataset))() if mode == 'train': train_augmentors = self.train_augmentors() ds = dataset.DatasetSerial(train_pairs, has_aux=False, shape_augs=iaa.Sequential(train_augmentors[0]), input_augs=iaa.Sequential(train_augmentors[1])) else: infer_augmentors = self.infer_augmentors() # HACK ds = dataset.DatasetSerial(valid_pairs, has_aux=False, shape_augs=iaa.Sequential(infer_augmentors)[0]) dataset.visualize(ds, 4) return
def run_once(self): log_dir = self.log_dir misc.check_manual_seed(self.seed) train_pairs, valid_pairs = dataset.prepare_data_VIABLE_2048() print(len(train_pairs)) # --------------------------- Dataloader train_augmentors = self.train_augmentors() train_dataset = dataset.DatasetSerial(train_pairs[:], shape_augs=iaa.Sequential(train_augmentors[0]), input_augs=iaa.Sequential(train_augmentors[1])) infer_augmentors = self.infer_augmentors() infer_dataset = dataset.DatasetSerial(valid_pairs[:], shape_augs=iaa.Sequential(infer_augmentors)) train_loader = data.DataLoader(train_dataset, num_workers=self.nr_procs_train, batch_size=self.train_batch_size, shuffle=True, drop_last=True) valid_loader = data.DataLoader(infer_dataset, num_workers=self.nr_procs_valid, batch_size=self.infer_batch_size, shuffle=True, drop_last=False) # --------------------------- Training Sequence if self.logging: misc.check_log_dir(log_dir) device = 'cuda' # networks input_chs = 3 net = DenseNet(input_chs, self.nr_classes) net = torch.nn.DataParallel(net).to(device) # print(net) # optimizers optimizer = optim.Adam(net.parameters(), lr=self.init_lr) scheduler = optim.lr_scheduler.StepLR(optimizer, self.lr_steps) # load pre-trained models if self.load_network: saved_state = torch.load(self.save_net_path) net.load_state_dict(saved_state) # trainer = Engine(lambda engine, batch: self.train_step(net, batch, optimizer, 'cuda')) inferer = Engine(lambda engine, batch: self.infer_step(net, batch, 'cuda')) train_output = ['loss', 'acc'] infer_output = ['prob', 'true'] ## if self.logging: checkpoint_handler = ModelCheckpoint(log_dir, self.chkpts_prefix, save_interval=1, n_saved=120, require_empty=False) # adding handlers using `trainer.add_event_handler` method API trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={'net': net}) timer = Timer(average=True) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) timer.attach(inferer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # attach running average metrics computation # decay of EMA to 0.95 to match tensorpack default RunningAverage(alpha=0.95, output_transform=lambda x: x['loss']).attach(trainer, 'loss') RunningAverage(alpha=0.95, output_transform=lambda x: x['acc']).attach(trainer, 'acc') # attach progress bar pbar = ProgressBar(persist=True) pbar.attach(trainer, metric_names=['loss']) pbar.attach(inferer) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EXCEPTION_RAISED) def handle_exception(engine, e): if isinstance(e, KeyboardInterrupt) and (engine.state.iteration > 1): engine.terminate() warnings.warn('KeyboardInterrupt caught. Exiting gracefully.') checkpoint_handler(engine, {'net_exception': net}) else: raise e # writer for tensorboard logging if self.logging: writer = SummaryWriter(log_dir=log_dir) json_log_file = log_dir + '/stats.json' with open(json_log_file, 'w') as json_file: json.dump({}, json_file) # create empty file @trainer.on(Events.EPOCH_STARTED) def log_lrs(engine): if self.logging: lr = float(optimizer.param_groups[0]['lr']) writer.add_scalar("lr", lr, engine.state.epoch) # advance scheduler clock scheduler.step() #### def update_logs(output, epoch, prefix, color): # print values and convert max_length = len(max(output.keys(), key=len)) for metric in output: key = colored(prefix + '-' + metric.ljust(max_length), color) print('------%s : ' % key, end='') print('%0.7f' % output[metric]) if 'train' in prefix: lr = float(optimizer.param_groups[0]['lr']) key = colored(prefix + '-' + 'lr'.ljust(max_length), color) print('------%s : %0.7f' % (key, lr)) if not self.logging: return # create stat dicts stat_dict = {} for metric in output: metric_value = output[metric] stat_dict['%s-%s' % (prefix, metric)] = metric_value # json stat log file, update and overwrite with open(json_log_file) as json_file: json_data = json.load(json_file) current_epoch = str(epoch) if current_epoch in json_data: old_stat_dict = json_data[current_epoch] stat_dict.update(old_stat_dict) current_epoch_dict = {current_epoch : stat_dict} json_data.update(current_epoch_dict) with open(json_log_file, 'w') as json_file: json.dump(json_data, json_file) # log values to tensorboard for metric in output: writer.add_scalar(prefix + '-' + metric, output[metric], current_epoch) @trainer.on(Events.EPOCH_COMPLETED) def log_train_running_results(engine): """ running training measurement """ training_ema_output = engine.state.metrics # update_logs(training_ema_output, engine.state.epoch, prefix='train-ema', color='green') #### def get_init_accumulator(output_names): return {metric : [] for metric in output_names} import cv2 def process_accumulated_output(output): def uneven_seq_to_np(seq, batch_size=self.infer_batch_size): if self.infer_batch_size == 1: return np.squeeze(seq) item_count = batch_size * (len(seq) - 1) + len(seq[-1]) cat_array = np.zeros((item_count,) + seq[0][0].shape, seq[0].dtype) for idx in range(0, len(seq)-1): cat_array[idx * batch_size : (idx+1) * batch_size] = seq[idx] cat_array[(idx+1) * batch_size:] = seq[-1] return cat_array # prob = uneven_seq_to_np(output['prob']) true = uneven_seq_to_np(output['true']) # cmap = plt.get_cmap('jet') # epi = prob[...,1] # epi = (cmap(epi) * 255.0).astype('uint8') # cv2.imwrite('sample.png', cv2.cvtColor(epi, cv2.COLOR_RGB2BGR)) pred = np.argmax(prob, axis=-1) true = np.squeeze(true) # deal with ignore index pred = pred.flatten() true = true.flatten() pred = pred[true != 0] - 1 true = true[true != 0] - 1 acc = np.mean(pred == true) inter = (pred * true).sum() total = (pred + true).sum() dice = 2 * inter / total # proc_output = dict(acc=acc, dice=dice) return proc_output @trainer.on(Events.EPOCH_COMPLETED) def infer_valid(engine): """ inference measurement """ inferer.accumulator = get_init_accumulator(infer_output) inferer.run(valid_loader) output_stat = process_accumulated_output(inferer.accumulator) update_logs(output_stat, engine.state.epoch, prefix='valid', color='red') @inferer.on(Events.ITERATION_COMPLETED) def accumulate_outputs(engine): batch_output = engine.state.output for key, item in batch_output.items(): engine.accumulator[key].extend([item]) ### #Setup is done. Now let's run the training trainer.run(train_loader, self.nr_epochs) return
def run_once(self): # self.log_path = 'log/%s/' % self.dataset # self.model_name = 'efficientnet-b0_MSI_{0}fold_random_tile_patch'.format(self.fold_idx) # self.log_dir = self.log_path + self.model_name log_dir = self.log_dir check_manual_seed(self.seed) train_pairs, valid_pairs = dataset.prepare_PAIP2020_PANDA( self.fold_idx) print(len(train_pairs)) print(len(valid_pairs)) train_augmentors = self.train_augmentors() train_dataset = dataset.DatasetSerial(train_pairs[:], self.tile_size, self.num_tile, train_mode=True) infer_augmentors = self.infer_augmentors() # HACK at has_aux infer_dataset = dataset.DatasetSerial(valid_pairs[:], self.tile_size, self.num_tile, train_mode=False) train_loader = data.DataLoader(train_dataset, num_workers=self.nr_procs_train, batch_size=self.train_batch_size, shuffle=True, drop_last=True) valid_loader = data.DataLoader(infer_dataset, num_workers=self.nr_procs_valid, batch_size=self.infer_batch_size, shuffle=True, drop_last=False) # --------------------------- Training Sequence if self.logging: check_log_dir(log_dir) # device = 'cuda' # networksv input_chs = 3 # TODO: dynamic config # ### VGGNet net = EfficientNet.from_pretrained('efficientnet-b0', num_classes=2) #net =DenseNet(3,2) # load pre-trained models net = torch.nn.DataParallel(net).to(device) if self.load_network: saved_state = torch.load(self.save_net_path) net.load_state_dict(saved_state) # optimizers optimizer = optim.Adam(net.parameters(), lr=self.init_lr) scheduler = StepLR(optimizer, self.lr_steps, gamma=0.1) scheduler = LRScheduler(scheduler) # trainer = Engine(lambda engine, batch: self.train_step( net, batch, optimizer, device)) valider = Engine( lambda engine, batch: self.infer_step(net, batch, device)) infer_output = ['prob', 'true'] ## if self.logging: checkpoint_handler = ModelCheckpoint(log_dir, self.chkpts_prefix, save_interval=1, n_saved=100, require_empty=False) # adding handlers using `trainer.add_event_handler` method API trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={'net': net}) timer = Timer(average=True) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) timer.attach(valider, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # attach running average metrics computation # decay of EMA to 0.95 to match tensorpack default # TODO: refactor this RunningAverage(alpha=0.95, output_transform=lambda x: x['acc']).attach( trainer, 'acc') RunningAverage(alpha=0.95, output_transform=lambda x: x['loss']).attach( trainer, 'loss') # attach progress bar pbar = ProgressBar(persist=True) pbar.attach(trainer, metric_names=['loss']) pbar.attach(valider) # #early Stopping # def score_function(engine): # val_acc=engine.state.metrics["valid-acc"] # return val_acc # early_stopping_handler=EarlyStopping(patience=10,score_function=score_function,trainer=trainer) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EXCEPTION_RAISED) def handle_exception(engine, e): if isinstance(e, KeyboardInterrupt) and (engine.state.iteration > 1): engine.terminate() warnings.warn('KeyboardInterrupt caught. Exiting gracefully.') checkpoint_handler(engine, {'net_exception': net}) else: raise e # writer for tensorboard logging tfwriter = None # HACK temporary if self.logging: tfwriter = SummaryWriter(log_dir) json_log_file = log_dir + '/stats.json' with open(json_log_file, 'w') as json_file: json.dump({}, json_file) # create empty file ### TODO refactor again log_info_dict = { 'logging': self.logging, 'optimizer': optimizer, 'tfwriter': tfwriter, 'json_file': json_log_file, 'nr_classes': self.nr_classes, 'metric_names': infer_output, 'infer_batch_size': self.infer_batch_size # too cumbersome } trainer.add_event_handler(Events.EPOCH_COMPLETED, log_train_ema_results, log_info_dict) trainer.add_event_handler(Events.EPOCH_COMPLETED, inference, valider, valid_loader, log_info_dict) valider.add_event_handler(Events.ITERATION_COMPLETED, accumulate_outputs) # Setup is done. Now let's run the training trainer.run(train_loader, self.nr_epochs) return
def run_once(self, fold_idx): log_dir = self.log_dir check_manual_seed(self.seed) train_pairs, valid_pairs = getattr(dataset, ('prepare_%s_data' % self.dataset))(fold_idx) # --------------------------- Dataloader train_augmentors = self.train_augmentors() train_dataset = dataset.DatasetSerial(train_pairs[:], has_aux=False, shape_augs=iaa.Sequential(train_augmentors[0]), input_augs=iaa.Sequential(train_augmentors[1])) infer_augmentors = self.infer_augmentors() # HACK at has_aux infer_dataset = dataset.DatasetSerial(valid_pairs[:], has_aux=False, shape_augs=iaa.Sequential(infer_augmentors[0])) train_loader = data.DataLoader(train_dataset, num_workers=self.nr_procs_train, batch_size=self.train_batch_size, shuffle=True, drop_last=True) valid_loader = data.DataLoader(infer_dataset, num_workers=self.nr_procs_valid, batch_size=self.infer_batch_size, shuffle=True, drop_last=False) # --------------------------- Training Sequence if self.logging: check_log_dir(log_dir) device = 'cuda' # networks if "CLASS" in self.task_type: net_def = importlib.import_module('model.class_dense') # dynamic import if "REGRESS" in self.task_type: net_def = importlib.import_module('model.regres_dense') # dynamic import if "MULTI" in self.task_type: net_def = importlib.import_module('model.multitask_net2') # dynamic import net = net_def.densenet121() # load pre-trained models if self.load_network: saved_state = torch.load(self.save_net_path) net.load_state_dict(saved_state) net = torch.nn.DataParallel(net).to(device) # optimizers optimizer = optim.Adam(net.parameters(), lr=self.init_lr) scheduler = optim.lr_scheduler.StepLR(optimizer, self.lr_steps) # trainer = Engine(lambda engine, batch: self.train_step(net, batch, optimizer, device)) valider = Engine(lambda engine, batch: self.infer_step(net, batch, device)) infer_output = ['logit', 'pred', 'true', 'val_loss'] ## if self.logging: checkpoint_handler = ModelCheckpoint(log_dir, self.chkpts_prefix, save_interval=1, n_saved=30, require_empty=False) # adding handlers using `trainer.add_event_handler` method API trainer.add_event_handler(event_name=Events.EPOCH_COMPLETED, handler=checkpoint_handler, to_save={'net': net}) timer = Timer(average=True) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) timer.attach(valider, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # attach running average metrics computation # decay of EMA to 0.95 to match tensorpack default # TODO: refactor this RunningAverage(alpha=0.95, output_transform=lambda x: x['acc']).attach(trainer, 'acc') RunningAverage(alpha=0.95, output_transform=lambda x: x['loss']).attach(trainer, 'loss') # attach progress bar pbar = ProgressBar(persist=True) pbar.attach(trainer, metric_names=['loss']) pbar.attach(valider) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EXCEPTION_RAISED) def handle_exception(engine, e): if isinstance(e, KeyboardInterrupt) and (engine.state.iteration > 1): engine.terminate() warnings.warn('KeyboardInterrupt caught. Exiting gracefully.') checkpoint_handler(engine, {'net_exception': net}) else: raise e # writer for tensorboard logging tfwriter = None # HACK temporary if self.logging: tfwriter = SummaryWriter(logdir=log_dir) json_log_file = log_dir + '/stats.json' with open(json_log_file, 'w') as json_file: json.dump({}, json_file) # create empty file ### TODO refactor again log_info_dict = { 'logging': self.logging, 'optimizer': optimizer, 'tfwriter': tfwriter, 'json_file': json_log_file if self.logging else None, 'nr_classes': self.nr_classes, 'metric_names': infer_output, 'infer_batch_size': self.infer_batch_size # too cumbersome } trainer.add_event_handler(Events.EPOCH_STARTED, lambda engine: scheduler.step()) # to change the lr trainer.add_event_handler(Events.EPOCH_COMPLETED, log_train_ema_results, log_info_dict) trainer.add_event_handler(Events.EPOCH_COMPLETED, inference, valider, valid_loader, log_info_dict) valider.add_event_handler(Events.ITERATION_COMPLETED, accumulate_outputs) # Setup is done. Now let's run the training trainer.run(train_loader, self.nr_epochs) return