def run(self, data_gen, training=True): stage = 'train' if training else 'valid' running_loss = running_accu = 0.0 n_steps = len(data_gen) progress_bar = tqdm(enumerate(data_gen), total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] Loss: %.5f, Accu: %.5f' % (stage, 0.0, 0.0)) for step, batch in progress_bar: loss, accu = self.process_batch(batch, training=training) # TODO: multiple classes accu = accu.mean() progress_bar.set_description('[%s] Loss: %.5f, Avg accu: %.5f' % (stage, loss.item(), accu.item())) running_loss += loss.item() running_accu += accu.item() if self.logger is not None: self.logger.add_scalar('%s/metrics/step_loss' % stage, loss.item(), self.step + 1) self.logger.add_scalar('%s/metrics/step_accu' % stage, accu.item(), self.step + 1) if training: self.step += 1 running_loss /= n_steps running_accu /= n_steps return { 'loss': running_loss, 'accu': running_accu, }
model_handlers = dict() for (key, cfg) in config['module'].items(): if 'ckpt' in cfg: ckpt = cfg['ckpt'] else: ckpt = None model_handlers[key] = ModelHandler(cfg['config'], checkpoint=ckpt) model_handlers[key].model.eval() # toggle off all trainable parameters of each module for param in model_handlers[key].model.parameters(): param.requires_grad = False progress_bar = tqdm(data_gen, total=len(data_gen), ncols=get_tty_columns(), dynamic_ncols=True, desc='[Inferring]') def batch_gen(source_gen): for batch in source_gen: assert isinstance(batch, dict) data = dict() for key in batch: if torch.cuda.device_count() >= 1: data[key] = batch[key].cuda() else: data[key] = batch[key]
def run(self, stage): stage_config = self.config['stage'][stage] # build data flow from the given data generator # single data flow if isinstance(stage_config['generator'], str): data_gen = self.generators[stage_config['generator']] class_names = data_gen.struct['DL'].ROIs n_steps = len(data_gen) gen_tags = None # multiple data flows elif isinstance(stage_config['generator'], dict): gens = [ self.generators[cfg] for cfg in stage_config['generator'].values() ] data_gen = zip(*gens) class_names = gens[0].struct['DL'].ROIs n_steps = min([len(g) for g in gens]) gen_tags = list(stage_config['generator'].keys()) else: raise TypeError('generator of type %s is not supported.' % type(stage_config['generator'])) progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] loss: %.5f, accu: %.5f' % (stage, 0.0, 0.0)) if stage not in self.step: self.step[stage] = 1 task_result_list = { task_name: [] for task_name in stage_config['task'] } need_revert = 'revert' in stage_config and stage_config['revert'] for batch in progress_bar: # format the batch into {tag: {key: data}} if gen_tags is None: assert isinstance(batch, dict) formatted_batch = {NO_TAG: dict()} for key in batch: if torch.cuda.device_count() >= 1: formatted_batch[NO_TAG][key] = batch[key].cuda() else: formatted_batch[NO_TAG][key] = batch[key] else: formatted_batch = dict() for (tag, tag_batch) in zip(gen_tags, batch): tag_data = dict() for key in tag_batch: if torch.cuda.device_count() >= 1: tag_data[key] = tag_batch[key].cuda() else: tag_data[key] = tag_batch[key] formatted_batch[tag] = tag_data # execute each task in this stage for task_name in stage_config['task']: task_config = self.tasks[task_name] # skip the task periodically if 'period' in task_config \ and self.step[stage] % task_config['period'] != 0: continue # modify the status of modules if the running task changed if self.running_task != task_name: # toggle trainable parameters of each module for (key, toggle) in task_config['toggle'].items(): self.handlers[key].model.train(toggle) for param in self.handlers[key].model.parameters(): param.requires_grad = toggle self.running_task = task_name task_result = self.run_task( task_config, formatted_batch, need_revert=need_revert, ) # detach all the results, move them to CPU, and convert them to numpy for key in task_result: task_result[key] = task_result[key].detach().cpu().numpy() # average accuracy if multi-dim assert 'accu' in task_result if task_result['accu'].ndim == 0: step_accu = math.nan if task_result[ 'accu'] == math.nan else task_result['accu'] else: assert task_result['accu'].ndim == 1 empty = True for acc in task_result['accu']: if not np.isnan(acc): empty = False break step_accu = math.nan if empty else np.nanmean( task_result['accu']) assert 'loss' in task_result progress_bar.set_description( '[%s][%s] loss: %.5f, accu: %.5f' % (stage, task_name, task_result['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar( '%s/%s/step/loss' % (stage, task_name), task_result['loss'], self.step[stage]) self.logger.add_scalar( '%s/%s/step/accu' % (stage, task_name), -1 if math.isnan(step_accu) else step_accu, self.step[stage]) task_result_list[task_name].append(task_result) self.step[stage] += 1 # summarize the result list task_summary = dict() for (task_name, result_list) in task_result_list.items(): if len(result_list) == 0: continue summary = dict() if need_revert: reverter = Reverter(data_gen) result_collection_blacklist = reverter.revertible scores = dict() progress_bar = tqdm(reverter.on_batches(result_list), total=len(reverter.data_list), dynamic_ncols=True, ncols=get_tty_columns(), desc='[Data index]') for reverted in progress_bar: data_idx = reverted['idx'] scores[data_idx] = data_gen.struct['DL'].evaluate( data_idx, reverted['prediction']) info = '[%s] mean score: %.3f' % ( data_idx, np.mean(list(scores[data_idx].values()))) progress_bar.set_description(info) # summerize score of each class over data indices cls_scores = { cls: np.mean([scores[data_idx][cls] for data_idx in scores]) for cls in class_names } summary['scores'] = scores summary['cls_scores'] = cls_scores summary['cls_mean'] = np.mean(list(cls_scores.values())) else: result_collection_blacklist = [] # collect results except those revertible ones, e.g., collect accu, loss summary.update({ key: np.nanmean(np.vstack([result[key] for result in result_list]), axis=0) for key in result_list[0].keys() if key not in result_collection_blacklist }) # process 1D array accu to dictionary of each class score if len(summary['accu']) > 1: assert len(summary['accu']) == len(class_names), (len( summary['accu']), len(class_names)) summary['cls_accu'] = { cls: summary['accu'][i] for (i, cls) in enumerate(class_names) } summary['accu'] = summary['accu'].mean() # print summary info print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary.items() if not isinstance(val, dict) ])) if 'cls_scores' in summary: print('Class score: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary['cls_scores'].items() ])) print('Class mean: %.3f' % summary['cls_mean']) task_summary[task_name] = summary return task_summary
def run_invertible(self, stage): stage_config = self.config['stage'][stage] assert len(stage_config['task']) == 1 if stage not in self.step: self.step[stage] = 1 task_name = stage_config['task'][0] task_config = self.tasks[task_name] (data_gen, n_steps, gen_tags, class_names) = self.get_data_gen(stage) inv_gen = self.invertible_gen(stage, data_gen, n_steps, gen_tags, task_name, task_config) reverter = Reverter(data_gen) summary = dict() result_collection_blacklist = reverter.revertible # We use a mutable result list to collect the results during the validation result_list = [] scores = dict() progress_bar = tqdm(reverter.on_future_batches( inv_gen, mutable_results=result_list, ), total=len(reverter.data_list), dynamic_ncols=True, ncols=get_tty_columns(), desc='[Data index]') for reverted in progress_bar: data_idx = reverted['idx'] scores[data_idx] = data_gen.struct['DL'].evaluate( data_idx, reverted['prediction']) info = '[%s] mean score: %.3f' % ( data_idx, np.mean(list(scores[data_idx].values()))) progress_bar.set_description(info) # summerize score of each class over data indices cls_scores = { cls: np.mean([scores[data_idx][cls] for data_idx in scores]) for cls in class_names } summary['scores'] = scores summary['cls_scores'] = cls_scores summary['cls_mean'] = np.mean(list(cls_scores.values())) # collect results except those revertible ones, e.g., collect accu, loss summary.update({ key: np.nanmean(np.vstack([result[key] for result in result_list]), axis=0) for key in result_list[0].keys() if key not in result_collection_blacklist }) # process 1D array accu to dictionary of each class score if len(summary['accu']) > 1: assert len(summary['accu']) == len(class_names), (len( summary['accu']), len(class_names)) summary['cls_accu'] = { cls: summary['accu'][i] for (i, cls) in enumerate(class_names) } summary['accu'] = summary['accu'].mean() # print summary info print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary.items() if not isinstance(val, dict) ])) if 'cls_scores' in summary: print('Class score: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary['cls_scores'].items() ])) print('Class mean: %.3f' % summary['cls_mean']) return {task_name: summary}
def invertible_gen(self, stage, data_gen, n_steps, gen_tags, task_name, task_config): progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] loss: %.5f, accu: %.5f' % (stage, 0.0, 0.0)) # modify the status of modules if the running task changed if self.running_task != task_name: # toggle trainable parameters of each module for (key, toggle) in task_config['toggle'].items(): self.handlers[key].model.train(toggle) for param in self.handlers[key].model.parameters(): param.requires_grad = toggle self.running_task = task_name for batch in progress_bar: # format the batch into {tag: {key: data}} if gen_tags is None: assert isinstance(batch, dict) formatted_batch = {NO_TAG: dict()} for key in batch: if torch.cuda.device_count() >= 1: formatted_batch[NO_TAG][key] = batch[key].cuda() else: formatted_batch[NO_TAG][key] = batch[key] else: formatted_batch = dict() for (tag, tag_batch) in zip(gen_tags, batch): tag_data = dict() for key in tag_batch: if torch.cuda.device_count() >= 1: tag_data[key] = tag_batch[key].cuda() else: tag_data[key] = tag_batch[key] formatted_batch[tag] = tag_data task_result = self.run_task( task_config, formatted_batch, need_revert=True, ) # detach all the results, move them to CPU, and convert them to numpy for key in task_result: task_result[key] = task_result[key].detach().cpu().numpy() # average accuracy if multi-dim assert 'accu' in task_result if task_result['accu'].ndim == 0: step_accu = math.nan if task_result[ 'accu'] == math.nan else task_result['accu'] else: assert task_result['accu'].ndim == 1 empty = True for acc in task_result['accu']: if not np.isnan(acc): empty = False break step_accu = math.nan if empty else np.nanmean( task_result['accu']) assert 'loss' in task_result progress_bar.set_description( '[%s][%s] loss: %.5f, accu: %.5f' % (stage, task_name, task_result['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar('%s/%s/step/loss' % (stage, task_name), task_result['loss'], self.step[stage]) self.logger.add_scalar( '%s/%s/step/accu' % (stage, task_name), -1 if math.isnan(step_accu) else step_accu, self.step[stage]) self.step[stage] += 1 yield task_result
def run(self, stage): stage_config = self.config['stage'][stage] # build data flow from the given data generator # single data flow if isinstance(stage_config['generator'], str): data_gen = self.generators[stage_config['generator']] class_names = data_gen.struct['DL'].ROIs n_steps = len(data_gen) gen_tags = None # multiple data flows elif isinstance(stage_config['generator'], dict): gens = [ self.generators[cfg] for cfg in stage_config['generator'].values() ] data_gen = zip(*gens) class_names = gens[0].struct['DL'].ROIs n_steps = min([len(g) for g in gens]) gen_tags = list(stage_config['generator'].keys()) # the forward config should match the multiple data flows assert isinstance(stage_config['forward'], dict) assert gen_tags == list(stage_config['forward'].keys()) else: raise TypeError('generator of type %s is not supported.' % type(stage_config['generator'])) progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] loss: %.5f, accu: %.5f' % (stage, 0.0, 0.0)) if stage not in self.step: self.step[stage] = 1 # toggle trainable parameters of each module need_backward = False for key, toggle in stage_config['toggle'].items(): self.handlers[key].model.train(toggle) for param in self.handlers[key].model.parameters(): param.requires_grad = toggle if toggle: need_backward = True result_list = [] need_revert = 'revert' in stage_config and stage_config['revert'] for batch in progress_bar: self.step[stage] += 1 # single data flow if gen_tags is None: assert isinstance(batch, dict) # insert batch to data data = dict() for key in batch: if torch.cuda.device_count() >= 1: data[key] = batch[key].cuda() else: data[key] = batch[key] # forward for key in stage_config['forward']: data.update(self.handlers[key].model(data)) # multiple data flows else: assert isinstance(batch, tuple) data = dict() for (tag, tag_batch) in zip(gen_tags, batch): tag_data = dict() # insert batch to data for key in tag_batch: if torch.cuda.device_count() >= 1: tag_data[key] = tag_batch[key].cuda() else: tag_data[key] = tag_batch[key] # forward for key in stage_config['forward'][tag]: tag_data.update(self.handlers[key].model(tag_data)) # insert tag data back to the data data.update({ '%s_%s' % (key, tag): tag_data[key] for key in tag_data }) # compute loss and accuracy results = self.metrics[stage_config['metric']](data) # backpropagation if need_backward: results['loss'].backward() for key, toggle in stage_config['toggle'].items(): if toggle: self.optims[key].step() self.optims[key].zero_grad() # compute match for dice score of each case after reversion if need_revert: assert 'prediction' in data, list(data.keys()) assert 'label' in data, list(data.keys()) with torch.set_grad_enabled(False): match, total = match_up( data['prediction'], data['label'], needs_softmax=True, batch_wise=True, threshold=-1, ) results.update({'match': match, 'total': total}) # detach all results, move to CPU, and convert to numpy for key in results: results[key] = results[key].detach().cpu().numpy() # average accuracy if multi-dim assert 'accu' in results if results['accu'].ndim == 0: step_accu = math.nan if results[ 'accu'] == math.nan else results['accu'] else: assert results['accu'].ndim == 1 empty = True for acc in results['accu']: if not np.isnan(acc): empty = False break step_accu = math.nan if empty else np.nanmean(results['accu']) assert 'loss' in results progress_bar.set_description('[%s] loss: %.5f, accu: %.5f' % (stage, results['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar('%s/step/loss' % stage, results['loss'], self.step[stage]) self.logger.add_scalar( '%s/step/accu' % stage, -1 if math.isnan(step_accu) else step_accu, self.step[stage]) result_list.append(results) summary = dict() if need_revert: reverter = Reverter(data_gen) result_collection_blacklist = reverter.revertible scores = dict() progress_bar = tqdm(reverter.on_batches(result_list), total=len(reverter.data_list), dynamic_ncols=True, ncols=get_tty_columns(), desc='[Data index]') for reverted in progress_bar: data_idx = reverted['idx'] scores[data_idx] = reverted['score'] info = '[%s] mean score: %.3f' % ( data_idx, np.mean(list(scores[data_idx].values()))) progress_bar.set_description(info) # summerize score of each class over data indices cls_scores = { cls: np.mean([scores[data_idx][cls] for data_idx in scores]) for cls in class_names } cls_scores.update( {'mean': np.mean([cls_scores[cls] for cls in class_names])}) summary['scores'] = scores summary['cls_scores'] = cls_scores else: result_collection_blacklist = [] # collect results except those revertible ones, e.g., accu, loss summary.update({ key: np.nanmean(np.vstack([result[key] for result in result_list]), axis=0) for key in result_list[0].keys() if key not in result_collection_blacklist }) # process 1D array accu to dictionary of each class score if len(summary['accu']) > 1: assert len(summary['accu']) == len(class_names), (len( summary['accu']), len(class_names)) summary['cls_accu'] = { cls: summary['accu'][i] for (i, cls) in enumerate(class_names) } summary['accu'] = summary['accu'].mean() # print summary info print('Average: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary.items() if not isinstance(val, dict) ])) if 'cls_scores' in summary: print('Class score: ' + ', '.join([ '%s: %.3f' % (key, val) for (key, val) in summary['cls_scores'].items() ])) return summary
def run( self, data_gen, training=True, stage=None, min_ratio=0., **kwargs, ): if stage is None: stage = 'train' if training else 'valid' n_steps = len(data_gen) progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] Loss: %.5f, Accu: %.5f' % (stage, 0.0, 0.0)) if stage not in self.step: self.step[stage] = 1 result_list = [] for batch in progress_bar: self.step[stage] += 1 if self.logger is not None and 'label' in batch: ratio = (batch['label'] > 0).float().mean().item() self.logger.add_scalar('%s/quality/ratio' % stage, ratio, self.step[stage]) if ratio < min_ratio: continue data = {key: batch[key].cuda() for key in batch} if training: results = self.learner.learn(data, **kwargs) else: results = self.learner.infer(data, **kwargs) # detach all, move to CPU, and convert to numpy for key in results: results[key] = results[key].detach().cpu().numpy() if 'accu' in results: step_accu = np.nanmean(results['accu']) else: step_accu = math.nan if 'loss' in results: progress_bar.set_description( '[%s] Loss: %.5f, Avg accu: %.5f' % (stage, results['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar('%s/step/loss' % stage, results['loss'], self.step[stage]) self.logger.add_scalar( '%s/step/accu' % stage, -1 if math.isnan(step_accu) else step_accu, self.step[stage]) # if include_prediction: # # XXX: deprecated # # output_threshold = 0.3 # # prediction = results.pop('prediction') # # for i in range(1, prediction.shape[1]): # # prediction[:, i, ...] += \ # # (prediction[:, i, ...] >= output_threshold).astype(np.float) # # # prediction[:, 1:, ...] = (prediction[:, 1:, ...] >= output_threshold).astype(np.float) # # prediction = np.argmax(prediction, 1) # # prediction_list.append(prediction) # prediction_list.append(results.pop('prediction')) result_list.append(results) return result_list
def run( self, data_gen, training=True, stage=None, unlabeled=False, # FIXME train_dis=False, # FIXME min_ratio=0., include_prediction=False, ): if stage is None: stage = 'train' if training else 'valid' n_steps = len(data_gen) progress_bar = tqdm(data_gen, total=n_steps, ncols=get_tty_columns(), dynamic_ncols=True, desc='[%s] Loss: %.5f, Accu: %.5f' % (stage, 0.0, 0.0)) if stage not in self.step: self.step[stage] = 1 result_list = [] for batch in progress_bar: self.step[stage] += 1 if self.logger is not None and 'label' in batch: ratio = (batch['label'] > 0).float().mean().item() self.logger.add_scalar('%s/quality/ratio' % stage, ratio, self.step[stage]) if ratio < min_ratio: continue data = {key: batch[key].cuda() for key in batch} # FIXME if train_dis: results = self.learners['seg'].infer(data) results.update(self.learners['dis'].learn(data)) else: if training: if unlabeled: results = self.learners['seg'].learn_unlabeled(data) else: results = self.learners['seg'].learn(data) if self.step[stage] >= self.start_adv: results.update(self.learners['dis'].learn(data)) else: results = self.learners['seg'].infer( data, include_prediction=include_prediction, compute_match=(not include_prediction)) # detach all, move to CPU, and convert to numpy for key in results: results[key] = results[key].detach().cpu().numpy() if 'accu' in results: step_accu = np.nanmean(results['accu']) else: step_accu = math.nan progress_bar.set_description('[%s] Loss: %.5f, Avg accu: %.5f' % (stage, results['loss'], step_accu)) if self.logger is not None: self.logger.add_scalar('%s/step/loss' % stage, results['loss'], self.step[stage]) self.logger.add_scalar( '%s/step/accu' % stage, -1 if math.isnan(step_accu) else step_accu, self.step[stage]) result_list.append(results) return result_list