config = json5.load(f) # build up the data generator with open(config['generator']['data']) as f: data_config = json5.load(f) data_list = data_config['list'] if args.test: data_list = data_list[:1] loader_config = data_config['loader'] loader_name = loader_config.pop('name') data_loader = DataLoader(loader_name, **loader_config) data_loader.set_data_list(data_list) data_gen = DataGenerator(data_loader, config['generator']['struct']) # build up the reverter reverter = Reverter(data_gen) DL = data_gen.struct['DL'] PG = data_gen.struct['PG'] BG = data_gen.struct['BG'] # ensure the order if PG.n_workers > 1: assert PG.ordered assert BG.n_workers == 1 if 'AG' in data_gen.struct: assert data_gen.struct['AG'].n_workers == 1 # - GPUs if 'gpus' in config: if isinstance(config['gpus'], list): gpus = ','.join([str(idx) for idx in config['gpus']]) else:
def run(self, stage): stage_config = self.config['stage'][stage] # build data flow from the given data generator # single data flow if isinstance(stage_config['generator'], str): data_gen = self.generators[stage_config['generator']] class_names = data_gen.struct['DL'].ROIs n_steps = len(data_gen) gen_tags = None # multiple data flows elif isinstance(stage_config['generator'], dict): gens = [ self.generators[cfg] for cfg in stage_config['generator'].values() ] data_gen = zip(*gens) class_names = gens[0].struct['DL'].ROIs n_steps = min([len(g) for g in gens]) gen_tags = list(stage_config['generator'].keys()) else: raise TypeError('generator of type %s is not supported.' % type(stage_config['generator'])) progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] loss: %.5f, accu: %.5f' % (stage, 0.0, 0.0)) if stage not in self.step: self.step[stage] = 1 task_result_list = { task_name: [] for task_name in stage_config['task'] } need_revert = 'revert' in stage_config and stage_config['revert'] for batch in progress_bar: # format the batch into {tag: {key: data}} if gen_tags is None: assert isinstance(batch, dict) formatted_batch = {NO_TAG: dict()} for key in batch: if torch.cuda.device_count() >= 1: formatted_batch[NO_TAG][key] = batch[key].cuda() else: formatted_batch[NO_TAG][key] = batch[key] else: formatted_batch = dict() for (tag, tag_batch) in zip(gen_tags, batch): tag_data = dict() for key in tag_batch: if torch.cuda.device_count() >= 1: tag_data[key] = tag_batch[key].cuda() else: tag_data[key] = tag_batch[key] formatted_batch[tag] = tag_data # execute each task in this stage for task_name in stage_config['task']: task_config = self.tasks[task_name] # skip the task periodically if 'period' in task_config \ and self.step[stage] % task_config['period'] != 0: continue # modify the status of modules if the running task changed if self.running_task != task_name: # toggle trainable parameters of each module for (key, toggle) in task_config['toggle'].items(): self.handlers[key].model.train(toggle) for param in self.handlers[key].model.parameters(): param.requires_grad = toggle self.running_task = task_name task_result = self.run_task( task_config, formatted_batch, need_revert=need_revert, ) # detach all the results, move them to CPU, and convert them to numpy for key in task_result: task_result[key] = task_result[key].detach().cpu().numpy() # average accuracy if multi-dim assert 'accu' in task_result if task_result['accu'].ndim == 0: step_accu = math.nan if task_result[ 'accu'] == math.nan else task_result['accu'] else: assert task_result['accu'].ndim == 1 empty = True for acc in task_result['accu']: if not np.isnan(acc): empty = False break step_accu = math.nan if empty else np.nanmean( task_result['accu']) assert 'loss' in task_result progress_bar.set_description( '[%s][%s] loss: %.5f, accu: %.5f' % (stage, task_name, task_result['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar( '%s/%s/step/loss' % (stage, task_name), task_result['loss'], self.step[stage]) self.logger.add_scalar( '%s/%s/step/accu' % (stage, task_name), -1 if math.isnan(step_accu) else step_accu, self.step[stage]) task_result_list[task_name].append(task_result) self.step[stage] += 1 # summarize the result list task_summary = dict() for (task_name, result_list) in task_result_list.items(): if len(result_list) == 0: continue summary = dict() if need_revert: reverter = Reverter(data_gen) result_collection_blacklist = reverter.revertible scores = dict() progress_bar = tqdm(reverter.on_batches(result_list), total=len(reverter.data_list), dynamic_ncols=True, ncols=get_tty_columns(), desc='[Data index]') for reverted in progress_bar: data_idx = reverted['idx'] scores[data_idx] = data_gen.struct['DL'].evaluate( data_idx, reverted['prediction']) info = '[%s] mean score: %.3f' % ( data_idx, np.mean(list(scores[data_idx].values()))) progress_bar.set_description(info) # summerize score of each class over data indices cls_scores = { cls: np.mean([scores[data_idx][cls] for data_idx in scores]) for cls in class_names } summary['scores'] = scores summary['cls_scores'] = cls_scores summary['cls_mean'] = np.mean(list(cls_scores.values())) else: result_collection_blacklist = [] # collect results except those revertible ones, e.g., collect accu, loss summary.update({ key: np.nanmean(np.vstack([result[key] for result in result_list]), axis=0) for key in result_list[0].keys() if key not in result_collection_blacklist }) # process 1D array accu to dictionary of each class score if len(summary['accu']) > 1: assert len(summary['accu']) == len(class_names), (len( summary['accu']), len(class_names)) summary['cls_accu'] = { cls: summary['accu'][i] for (i, cls) in enumerate(class_names) } summary['accu'] = summary['accu'].mean() # print summary info print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary.items() if not isinstance(val, dict) ])) if 'cls_scores' in summary: print('Class score: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary['cls_scores'].items() ])) print('Class mean: %.3f' % summary['cls_mean']) task_summary[task_name] = summary return task_summary
# - data pipeline data_gen = dict() loader_name = loader_config.pop('name') ROIs = None for stage in stages: data_loader = DataLoader(loader_name, **loader_config) if data_list[stage] is not None: data_loader.set_data_list(data_list[stage]) data_gen[stage] = DataGenerator(data_loader, generator_config[stage]) if ROIs is None: ROIs = data_loader.ROIs # FIXME reverter = Reverter(data_gen['valid']) # - GPUs os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus']) torch.backends.cudnn.enabled = True # - model model_handlers = { 'seg': ModelHandler( config['models']['seg'], checkpoint=args.checkpoint, ), 'dis': ModelHandler( config['models']['dis'], checkpoint=args.checkpoint_dis, ),
timer = time.time() with open(args.loader_config) as f: loader_config = yaml.safe_load(f) loader_name = loader_config.pop('name') data_loader = DataLoader(loader_name, **loader_config) with open(args.generator_config) as f: generator_config = yaml.safe_load(f) data_gen = DataGenerator(data_loader, generator_config) os.makedirs(args.output_dir, exist_ok=True) DL = data_gen.struct['DL'] batch_list = list({'prediction': data['label']} for data in data_gen) reverter = Reverter(data_gen) progress_bar = tqdm(reverter.on_batches(batch_list), total=len(reverter.data_list), dynamic_ncols=True, desc='[Data index]') scores = dict() for result in progress_bar: data_idx = result['idx'] DL.save_prediction(data_idx, result['prediction'], args.output_dir) scores[data_idx] = DL.evaluate(data_idx, result['prediction']) info = '[%s] ' % data_idx info += ', '.join('%s: %.3f' % (key, val) for key, val in scores[data_idx].items()) progress_bar.set_description(info)
def run_invertible(self, stage): stage_config = self.config['stage'][stage] assert len(stage_config['task']) == 1 if stage not in self.step: self.step[stage] = 1 task_name = stage_config['task'][0] task_config = self.tasks[task_name] (data_gen, n_steps, gen_tags, class_names) = self.get_data_gen(stage) inv_gen = self.invertible_gen(stage, data_gen, n_steps, gen_tags, task_name, task_config) reverter = Reverter(data_gen) summary = dict() result_collection_blacklist = reverter.revertible # We use a mutable result list to collect the results during the validation result_list = [] scores = dict() progress_bar = tqdm(reverter.on_future_batches( inv_gen, mutable_results=result_list, ), total=len(reverter.data_list), dynamic_ncols=True, ncols=get_tty_columns(), desc='[Data index]') for reverted in progress_bar: data_idx = reverted['idx'] scores[data_idx] = data_gen.struct['DL'].evaluate( data_idx, reverted['prediction']) info = '[%s] mean score: %.3f' % ( data_idx, np.mean(list(scores[data_idx].values()))) progress_bar.set_description(info) # summerize score of each class over data indices cls_scores = { cls: np.mean([scores[data_idx][cls] for data_idx in scores]) for cls in class_names } summary['scores'] = scores summary['cls_scores'] = cls_scores summary['cls_mean'] = np.mean(list(cls_scores.values())) # collect results except those revertible ones, e.g., collect accu, loss summary.update({ key: np.nanmean(np.vstack([result[key] for result in result_list]), axis=0) for key in result_list[0].keys() if key not in result_collection_blacklist }) # process 1D array accu to dictionary of each class score if len(summary['accu']) > 1: assert len(summary['accu']) == len(class_names), (len( summary['accu']), len(class_names)) summary['cls_accu'] = { cls: summary['accu'][i] for (i, cls) in enumerate(class_names) } summary['accu'] = summary['accu'].mean() # print summary info print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary.items() if not isinstance(val, dict) ])) if 'cls_scores' in summary: print('Class score: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary['cls_scores'].items() ])) print('Class mean: %.3f' % summary['cls_mean']) return {task_name: summary}
def run(self, stage): stage_config = self.config['stage'][stage] # build data flow from the given data generator # single data flow if isinstance(stage_config['generator'], str): data_gen = self.generators[stage_config['generator']] class_names = data_gen.struct['DL'].ROIs n_steps = len(data_gen) gen_tags = None # multiple data flows elif isinstance(stage_config['generator'], dict): gens = [ self.generators[cfg] for cfg in stage_config['generator'].values() ] data_gen = zip(*gens) class_names = gens[0].struct['DL'].ROIs n_steps = min([len(g) for g in gens]) gen_tags = list(stage_config['generator'].keys()) # the forward config should match the multiple data flows assert isinstance(stage_config['forward'], dict) assert gen_tags == list(stage_config['forward'].keys()) else: raise TypeError('generator of type %s is not supported.' % type(stage_config['generator'])) progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] loss: %.5f, accu: %.5f' % (stage, 0.0, 0.0)) if stage not in self.step: self.step[stage] = 1 # toggle trainable parameters of each module need_backward = False for key, toggle in stage_config['toggle'].items(): self.handlers[key].model.train(toggle) for param in self.handlers[key].model.parameters(): param.requires_grad = toggle if toggle: need_backward = True result_list = [] need_revert = 'revert' in stage_config and stage_config['revert'] for batch in progress_bar: self.step[stage] += 1 # single data flow if gen_tags is None: assert isinstance(batch, dict) # insert batch to data data = dict() for key in batch: if torch.cuda.device_count() >= 1: data[key] = batch[key].cuda() else: data[key] = batch[key] # forward for key in stage_config['forward']: data.update(self.handlers[key].model(data)) # multiple data flows else: assert isinstance(batch, tuple) data = dict() for (tag, tag_batch) in zip(gen_tags, batch): tag_data = dict() # insert batch to data for key in tag_batch: if torch.cuda.device_count() >= 1: tag_data[key] = tag_batch[key].cuda() else: tag_data[key] = tag_batch[key] # forward for key in stage_config['forward'][tag]: tag_data.update(self.handlers[key].model(tag_data)) # insert tag data back to the data data.update({ '%s_%s' % (key, tag): tag_data[key] for key in tag_data }) # compute loss and accuracy results = self.metrics[stage_config['metric']](data) # backpropagation if need_backward: results['loss'].backward() for key, toggle in stage_config['toggle'].items(): if toggle: self.optims[key].step() self.optims[key].zero_grad() # compute match for dice score of each case after reversion if need_revert: assert 'prediction' in data, list(data.keys()) assert 'label' in data, list(data.keys()) with torch.set_grad_enabled(False): match, total = match_up( data['prediction'], data['label'], needs_softmax=True, batch_wise=True, threshold=-1, ) results.update({'match': match, 'total': total}) # detach all results, move to CPU, and convert to numpy for key in results: results[key] = results[key].detach().cpu().numpy() # average accuracy if multi-dim assert 'accu' in results if results['accu'].ndim == 0: step_accu = math.nan if results[ 'accu'] == math.nan else results['accu'] else: assert results['accu'].ndim == 1 empty = True for acc in results['accu']: if not np.isnan(acc): empty = False break step_accu = math.nan if empty else np.nanmean(results['accu']) assert 'loss' in results progress_bar.set_description('[%s] loss: %.5f, accu: %.5f' % (stage, results['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar('%s/step/loss' % stage, results['loss'], self.step[stage]) self.logger.add_scalar( '%s/step/accu' % stage, -1 if math.isnan(step_accu) else step_accu, self.step[stage]) result_list.append(results) summary = dict() if need_revert: reverter = Reverter(data_gen) result_collection_blacklist = reverter.revertible scores = dict() progress_bar = tqdm(reverter.on_batches(result_list), total=len(reverter.data_list), dynamic_ncols=True, ncols=get_tty_columns(), desc='[Data index]') for reverted in progress_bar: data_idx = reverted['idx'] scores[data_idx] = reverted['score'] info = '[%s] mean score: %.3f' % ( data_idx, np.mean(list(scores[data_idx].values()))) progress_bar.set_description(info) # summerize score of each class over data indices cls_scores = { cls: np.mean([scores[data_idx][cls] for data_idx in scores]) for cls in class_names } cls_scores.update( {'mean': np.mean([cls_scores[cls] for cls in class_names])}) summary['scores'] = scores summary['cls_scores'] = cls_scores else: result_collection_blacklist = [] # collect results except those revertible ones, e.g., accu, loss summary.update({ key: np.nanmean(np.vstack([result[key] for result in result_list]), axis=0) for key in result_list[0].keys() if key not in result_collection_blacklist }) # process 1D array accu to dictionary of each class score if len(summary['accu']) > 1: assert len(summary['accu']) == len(class_names), (len( summary['accu']), len(class_names)) summary['cls_accu'] = { cls: summary['accu'][i] for (i, cls) in enumerate(class_names) } summary['accu'] = summary['accu'].mean() # print summary info print('Average: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary.items() if not isinstance(val, dict) ])) if 'cls_scores' in summary: print('Class score: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary['cls_scores'].items() ])) return summary
# load config with open(args.config) as f: config = yaml.safe_load(f) generator_config = config['generator'] with open(config['data']) as f: data_config = yaml.safe_load(f) data_list = data_config['list'] loader_config = data_config['loader'] # - data pipeline loader_name = loader_config.pop('name') data_loader = DataLoader(loader_name, **loader_config) if data_list is not None: data_loader.set_data_list(data_list) data_gen = DataGenerator(data_loader, generator_config) reverter = Reverter(data_gen) ROIs = data_loader.ROIs DL = data_gen.struct['DL'] PG = data_gen.struct['PG'] BG = data_gen.struct['BG'] # ensure the order if PG.n_workers > 1: assert PG.ordered assert BG.n_workers == 1 if 'AG' in data_gen.struct: assert data_gen.struct['AG'].n_workers == 1 assert 'output_threshold' in config if args.prediction_dir is not None:
loader_name = loader_config.pop('name') ROIs = None for stage in stages: data_loader = DataLoader(loader_name, **loader_config) if stage == 'train_ssl' and stage not in data_list: data_loader.set_data_list(data_list['valid']) else: assert stage in data_list data_loader.set_data_list(data_list[stage]) data_gen[stage] = DataGenerator(data_loader, generator_config[stage]) if ROIs is None: ROIs = data_loader.ROIs # FIXME reverter = Reverter(data_gen['valid']) # - GPUs os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus']) torch.backends.cudnn.enabled = True # - model model_handler = ModelHandler(**config['model']) # - checkpoint handler ckpt_handler = CheckpointHandler(model_handler, **config['ckpt_handler']) # - optimizer if 'optimizer' in config: optimizer = Optimizer(config['optimizer'])(model_handler.model) else: