Esempio n. 1
0
    config = json5.load(f)

# build up the data generator
with open(config['generator']['data']) as f:
    data_config = json5.load(f)
data_list = data_config['list']
if args.test:
    data_list = data_list[:1]
loader_config = data_config['loader']
loader_name = loader_config.pop('name')
data_loader = DataLoader(loader_name, **loader_config)
data_loader.set_data_list(data_list)
data_gen = DataGenerator(data_loader, config['generator']['struct'])

# build up the reverter
reverter = Reverter(data_gen)
DL = data_gen.struct['DL']
PG = data_gen.struct['PG']
BG = data_gen.struct['BG']
# ensure the order
if PG.n_workers > 1:
    assert PG.ordered
assert BG.n_workers == 1
if 'AG' in data_gen.struct:
    assert data_gen.struct['AG'].n_workers == 1

# - GPUs
if 'gpus' in config:
    if isinstance(config['gpus'], list):
        gpus = ','.join([str(idx) for idx in config['gpus']])
    else:
Esempio n. 2
0
    def run(self, stage):
        stage_config = self.config['stage'][stage]

        # build data flow from the given data generator
        # single data flow
        if isinstance(stage_config['generator'], str):
            data_gen = self.generators[stage_config['generator']]
            class_names = data_gen.struct['DL'].ROIs
            n_steps = len(data_gen)
            gen_tags = None

        # multiple data flows
        elif isinstance(stage_config['generator'], dict):
            gens = [
                self.generators[cfg]
                for cfg in stage_config['generator'].values()
            ]
            data_gen = zip(*gens)
            class_names = gens[0].struct['DL'].ROIs
            n_steps = min([len(g) for g in gens])
            gen_tags = list(stage_config['generator'].keys())

        else:
            raise TypeError('generator of type %s is not supported.' %
                            type(stage_config['generator']))

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] loss: %.5f, accu: %.5f' %
                            (stage, 0.0, 0.0))

        if stage not in self.step:
            self.step[stage] = 1

        task_result_list = {
            task_name: []
            for task_name in stage_config['task']
        }
        need_revert = 'revert' in stage_config and stage_config['revert']
        for batch in progress_bar:

            # format the batch into {tag: {key: data}}
            if gen_tags is None:
                assert isinstance(batch, dict)
                formatted_batch = {NO_TAG: dict()}
                for key in batch:
                    if torch.cuda.device_count() >= 1:
                        formatted_batch[NO_TAG][key] = batch[key].cuda()
                    else:
                        formatted_batch[NO_TAG][key] = batch[key]
            else:
                formatted_batch = dict()
                for (tag, tag_batch) in zip(gen_tags, batch):
                    tag_data = dict()
                    for key in tag_batch:
                        if torch.cuda.device_count() >= 1:
                            tag_data[key] = tag_batch[key].cuda()
                        else:
                            tag_data[key] = tag_batch[key]
                    formatted_batch[tag] = tag_data

            # execute each task in this stage
            for task_name in stage_config['task']:
                task_config = self.tasks[task_name]

                # skip the task periodically
                if 'period' in task_config \
                    and self.step[stage] % task_config['period'] != 0:
                    continue

                # modify the status of modules if the running task changed
                if self.running_task != task_name:

                    # toggle trainable parameters of each module
                    for (key, toggle) in task_config['toggle'].items():
                        self.handlers[key].model.train(toggle)
                        for param in self.handlers[key].model.parameters():
                            param.requires_grad = toggle

                    self.running_task = task_name

                task_result = self.run_task(
                    task_config,
                    formatted_batch,
                    need_revert=need_revert,
                )

                # detach all the results, move them to CPU, and convert them to numpy
                for key in task_result:
                    task_result[key] = task_result[key].detach().cpu().numpy()

                # average accuracy if multi-dim
                assert 'accu' in task_result
                if task_result['accu'].ndim == 0:
                    step_accu = math.nan if task_result[
                        'accu'] == math.nan else task_result['accu']
                else:
                    assert task_result['accu'].ndim == 1
                    empty = True
                    for acc in task_result['accu']:
                        if not np.isnan(acc):
                            empty = False
                            break
                    step_accu = math.nan if empty else np.nanmean(
                        task_result['accu'])

                assert 'loss' in task_result
                progress_bar.set_description(
                    '[%s][%s] loss: %.5f, accu: %.5f' %
                    (stage, task_name, task_result['loss'], step_accu))

                if self.logger is not None:
                    self.logger.add_scalar(
                        '%s/%s/step/loss' % (stage, task_name),
                        task_result['loss'], self.step[stage])
                    self.logger.add_scalar(
                        '%s/%s/step/accu' % (stage, task_name),
                        -1 if math.isnan(step_accu) else step_accu,
                        self.step[stage])

                task_result_list[task_name].append(task_result)

            self.step[stage] += 1

        # summarize the result list
        task_summary = dict()
        for (task_name, result_list) in task_result_list.items():

            if len(result_list) == 0:
                continue

            summary = dict()

            if need_revert:
                reverter = Reverter(data_gen)
                result_collection_blacklist = reverter.revertible

                scores = dict()
                progress_bar = tqdm(reverter.on_batches(result_list),
                                    total=len(reverter.data_list),
                                    dynamic_ncols=True,
                                    ncols=get_tty_columns(),
                                    desc='[Data index]')
                for reverted in progress_bar:
                    data_idx = reverted['idx']
                    scores[data_idx] = data_gen.struct['DL'].evaluate(
                        data_idx, reverted['prediction'])
                    info = '[%s] mean score: %.3f' % (
                        data_idx, np.mean(list(scores[data_idx].values())))
                    progress_bar.set_description(info)

                # summerize score of each class over data indices
                cls_scores = {
                    cls:
                    np.mean([scores[data_idx][cls] for data_idx in scores])
                    for cls in class_names
                }

                summary['scores'] = scores
                summary['cls_scores'] = cls_scores
                summary['cls_mean'] = np.mean(list(cls_scores.values()))

            else:
                result_collection_blacklist = []

            # collect results except those revertible ones, e.g., collect accu, loss
            summary.update({
                key:
                np.nanmean(np.vstack([result[key] for result in result_list]),
                           axis=0)
                for key in result_list[0].keys()
                if key not in result_collection_blacklist
            })

            # process 1D array accu to dictionary of each class score
            if len(summary['accu']) > 1:
                assert len(summary['accu']) == len(class_names), (len(
                    summary['accu']), len(class_names))
                summary['cls_accu'] = {
                    cls: summary['accu'][i]
                    for (i, cls) in enumerate(class_names)
                }
                summary['accu'] = summary['accu'].mean()

            # print summary info
            print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([
                '%s: %.3f' % (key, val)
                for (key, val) in summary.items() if not isinstance(val, dict)
            ]))

            if 'cls_scores' in summary:
                print('Class score: ' + ', '.join([
                    '%s: %.3f' % (key, val)
                    for (key, val) in summary['cls_scores'].items()
                ]))
                print('Class mean: %.3f' % summary['cls_mean'])

            task_summary[task_name] = summary

        return task_summary
Esempio n. 3
0
# - data pipeline
data_gen = dict()
loader_name = loader_config.pop('name')
ROIs = None
for stage in stages:
    data_loader = DataLoader(loader_name, **loader_config)
    if data_list[stage] is not None:
        data_loader.set_data_list(data_list[stage])
    data_gen[stage] = DataGenerator(data_loader, generator_config[stage])

    if ROIs is None:
        ROIs = data_loader.ROIs

# FIXME
reverter = Reverter(data_gen['valid'])

# - GPUs
os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus'])
torch.backends.cudnn.enabled = True

# - model
model_handlers = {
    'seg': ModelHandler(
        config['models']['seg'],
        checkpoint=args.checkpoint,
    ),
    'dis': ModelHandler(
        config['models']['dis'],
        checkpoint=args.checkpoint_dis,
    ),
Esempio n. 4
0
timer = time.time()
with open(args.loader_config) as f:
    loader_config = yaml.safe_load(f)
loader_name = loader_config.pop('name')
data_loader = DataLoader(loader_name, **loader_config)

with open(args.generator_config) as f:
    generator_config = yaml.safe_load(f)
data_gen = DataGenerator(data_loader, generator_config)

os.makedirs(args.output_dir, exist_ok=True)

DL = data_gen.struct['DL']
batch_list = list({'prediction': data['label']} for data in data_gen)
reverter = Reverter(data_gen)
progress_bar = tqdm(reverter.on_batches(batch_list),
                    total=len(reverter.data_list),
                    dynamic_ncols=True,
                    desc='[Data index]')

scores = dict()
for result in progress_bar:
    data_idx = result['idx']
    DL.save_prediction(data_idx, result['prediction'], args.output_dir)
    scores[data_idx] = DL.evaluate(data_idx, result['prediction'])

    info = '[%s] ' % data_idx
    info += ', '.join('%s: %.3f' % (key, val)
                      for key, val in scores[data_idx].items())
    progress_bar.set_description(info)
Esempio n. 5
0
    def run_invertible(self, stage):

        stage_config = self.config['stage'][stage]
        assert len(stage_config['task']) == 1

        if stage not in self.step:
            self.step[stage] = 1

        task_name = stage_config['task'][0]
        task_config = self.tasks[task_name]

        (data_gen, n_steps, gen_tags, class_names) = self.get_data_gen(stage)
        inv_gen = self.invertible_gen(stage, data_gen, n_steps, gen_tags,
                                      task_name, task_config)
        reverter = Reverter(data_gen)

        summary = dict()

        result_collection_blacklist = reverter.revertible

        # We use a mutable result list to collect the results during the validation
        result_list = []

        scores = dict()
        progress_bar = tqdm(reverter.on_future_batches(
            inv_gen,
            mutable_results=result_list,
        ),
                            total=len(reverter.data_list),
                            dynamic_ncols=True,
                            ncols=get_tty_columns(),
                            desc='[Data index]')
        for reverted in progress_bar:
            data_idx = reverted['idx']
            scores[data_idx] = data_gen.struct['DL'].evaluate(
                data_idx, reverted['prediction'])
            info = '[%s] mean score: %.3f' % (
                data_idx, np.mean(list(scores[data_idx].values())))
            progress_bar.set_description(info)

        # summerize score of each class over data indices
        cls_scores = {
            cls: np.mean([scores[data_idx][cls] for data_idx in scores])
            for cls in class_names
        }

        summary['scores'] = scores
        summary['cls_scores'] = cls_scores
        summary['cls_mean'] = np.mean(list(cls_scores.values()))

        # collect results except those revertible ones, e.g., collect accu, loss
        summary.update({
            key: np.nanmean(np.vstack([result[key] for result in result_list]),
                            axis=0)
            for key in result_list[0].keys()
            if key not in result_collection_blacklist
        })

        # process 1D array accu to dictionary of each class score
        if len(summary['accu']) > 1:
            assert len(summary['accu']) == len(class_names), (len(
                summary['accu']), len(class_names))
            summary['cls_accu'] = {
                cls: summary['accu'][i]
                for (i, cls) in enumerate(class_names)
            }
            summary['accu'] = summary['accu'].mean()

        # print summary info
        print('[%s][%s] Average: ' % (stage, task_name) + ', '.join([
            '%s: %.3f' % (key, val)
            for (key, val) in summary.items() if not isinstance(val, dict)
        ]))

        if 'cls_scores' in summary:
            print('Class score: ' + ', '.join([
                '%s: %.3f' % (key, val)
                for (key, val) in summary['cls_scores'].items()
            ]))
            print('Class mean: %.3f' % summary['cls_mean'])

        return {task_name: summary}
Esempio n. 6
0
    def run(self, stage):
        stage_config = self.config['stage'][stage]

        # build data flow from the given data generator
        # single data flow
        if isinstance(stage_config['generator'], str):
            data_gen = self.generators[stage_config['generator']]
            class_names = data_gen.struct['DL'].ROIs
            n_steps = len(data_gen)
            gen_tags = None

        # multiple data flows
        elif isinstance(stage_config['generator'], dict):
            gens = [
                self.generators[cfg]
                for cfg in stage_config['generator'].values()
            ]
            data_gen = zip(*gens)
            class_names = gens[0].struct['DL'].ROIs
            n_steps = min([len(g) for g in gens])
            gen_tags = list(stage_config['generator'].keys())

            # the forward config should match the multiple data flows
            assert isinstance(stage_config['forward'], dict)
            assert gen_tags == list(stage_config['forward'].keys())

        else:
            raise TypeError('generator of type %s is not supported.' %
                            type(stage_config['generator']))

        progress_bar = tqdm(data_gen,
                            total=n_steps,
                            ncols=get_tty_columns(),
                            dynamic_ncols=True,
                            desc='[%s] loss: %.5f, accu: %.5f' %
                            (stage, 0.0, 0.0))

        if stage not in self.step:
            self.step[stage] = 1

        # toggle trainable parameters of each module
        need_backward = False
        for key, toggle in stage_config['toggle'].items():
            self.handlers[key].model.train(toggle)
            for param in self.handlers[key].model.parameters():
                param.requires_grad = toggle
            if toggle:
                need_backward = True

        result_list = []
        need_revert = 'revert' in stage_config and stage_config['revert']
        for batch in progress_bar:

            self.step[stage] += 1

            # single data flow
            if gen_tags is None:
                assert isinstance(batch, dict)

                # insert batch to data
                data = dict()
                for key in batch:
                    if torch.cuda.device_count() >= 1:
                        data[key] = batch[key].cuda()
                    else:
                        data[key] = batch[key]

                # forward
                for key in stage_config['forward']:
                    data.update(self.handlers[key].model(data))

            # multiple data flows
            else:
                assert isinstance(batch, tuple)
                data = dict()
                for (tag, tag_batch) in zip(gen_tags, batch):
                    tag_data = dict()

                    # insert batch to data
                    for key in tag_batch:
                        if torch.cuda.device_count() >= 1:
                            tag_data[key] = tag_batch[key].cuda()
                        else:
                            tag_data[key] = tag_batch[key]

                    # forward
                    for key in stage_config['forward'][tag]:
                        tag_data.update(self.handlers[key].model(tag_data))

                    # insert tag data back to the data
                    data.update({
                        '%s_%s' % (key, tag): tag_data[key]
                        for key in tag_data
                    })

            # compute loss and accuracy
            results = self.metrics[stage_config['metric']](data)

            # backpropagation
            if need_backward:
                results['loss'].backward()
                for key, toggle in stage_config['toggle'].items():
                    if toggle:
                        self.optims[key].step()
                        self.optims[key].zero_grad()

            # compute match for dice score of each case after reversion
            if need_revert:
                assert 'prediction' in data, list(data.keys())
                assert 'label' in data, list(data.keys())
                with torch.set_grad_enabled(False):
                    match, total = match_up(
                        data['prediction'],
                        data['label'],
                        needs_softmax=True,
                        batch_wise=True,
                        threshold=-1,
                    )
                    results.update({'match': match, 'total': total})

            # detach all results, move to CPU, and convert to numpy
            for key in results:
                results[key] = results[key].detach().cpu().numpy()

            # average accuracy if multi-dim
            assert 'accu' in results
            if results['accu'].ndim == 0:
                step_accu = math.nan if results[
                    'accu'] == math.nan else results['accu']
            else:
                assert results['accu'].ndim == 1
                empty = True
                for acc in results['accu']:
                    if not np.isnan(acc):
                        empty = False
                        break
                step_accu = math.nan if empty else np.nanmean(results['accu'])

            assert 'loss' in results
            progress_bar.set_description('[%s] loss: %.5f, accu: %.5f' %
                                         (stage, results['loss'], step_accu))

            if self.logger is not None:
                self.logger.add_scalar('%s/step/loss' % stage, results['loss'],
                                       self.step[stage])
                self.logger.add_scalar(
                    '%s/step/accu' % stage,
                    -1 if math.isnan(step_accu) else step_accu,
                    self.step[stage])

            result_list.append(results)

        summary = dict()
        if need_revert:
            reverter = Reverter(data_gen)
            result_collection_blacklist = reverter.revertible

            scores = dict()
            progress_bar = tqdm(reverter.on_batches(result_list),
                                total=len(reverter.data_list),
                                dynamic_ncols=True,
                                ncols=get_tty_columns(),
                                desc='[Data index]')
            for reverted in progress_bar:
                data_idx = reverted['idx']
                scores[data_idx] = reverted['score']
                info = '[%s] mean score: %.3f' % (
                    data_idx, np.mean(list(scores[data_idx].values())))
                progress_bar.set_description(info)

            # summerize score of each class over data indices
            cls_scores = {
                cls: np.mean([scores[data_idx][cls] for data_idx in scores])
                for cls in class_names
            }
            cls_scores.update(
                {'mean': np.mean([cls_scores[cls] for cls in class_names])})

            summary['scores'] = scores
            summary['cls_scores'] = cls_scores

        else:
            result_collection_blacklist = []

        # collect results except those revertible ones, e.g., accu, loss
        summary.update({
            key: np.nanmean(np.vstack([result[key] for result in result_list]),
                            axis=0)
            for key in result_list[0].keys()
            if key not in result_collection_blacklist
        })

        # process 1D array accu to dictionary of each class score
        if len(summary['accu']) > 1:
            assert len(summary['accu']) == len(class_names), (len(
                summary['accu']), len(class_names))
            summary['cls_accu'] = {
                cls: summary['accu'][i]
                for (i, cls) in enumerate(class_names)
            }
            summary['accu'] = summary['accu'].mean()

        # print summary info
        print('Average: ' + ', '.join([
            '%s: %.3f' % (key, val)
            for (key, val) in summary.items() if not isinstance(val, dict)
        ]))

        if 'cls_scores' in summary:
            print('Class score: ' + ', '.join([
                '%s: %.3f' % (key, val)
                for (key, val) in summary['cls_scores'].items()
            ]))

        return summary
Esempio n. 7
0
# load config
with open(args.config) as f:
    config = yaml.safe_load(f)
generator_config = config['generator']
with open(config['data']) as f:
    data_config = yaml.safe_load(f)
data_list = data_config['list']
loader_config = data_config['loader']

# - data pipeline
loader_name = loader_config.pop('name')
data_loader = DataLoader(loader_name, **loader_config)
if data_list is not None:
    data_loader.set_data_list(data_list)
data_gen = DataGenerator(data_loader, generator_config)
reverter = Reverter(data_gen)

ROIs = data_loader.ROIs
DL = data_gen.struct['DL']
PG = data_gen.struct['PG']
BG = data_gen.struct['BG']
# ensure the order
if PG.n_workers > 1:
    assert PG.ordered
assert BG.n_workers == 1
if 'AG' in data_gen.struct:
    assert data_gen.struct['AG'].n_workers == 1

assert 'output_threshold' in config

if args.prediction_dir is not None:
Esempio n. 8
0
loader_name = loader_config.pop('name')
ROIs = None
for stage in stages:
    data_loader = DataLoader(loader_name, **loader_config)
    if stage == 'train_ssl' and stage not in data_list:
        data_loader.set_data_list(data_list['valid'])
    else:
        assert stage in data_list
        data_loader.set_data_list(data_list[stage])
    data_gen[stage] = DataGenerator(data_loader, generator_config[stage])

    if ROIs is None:
        ROIs = data_loader.ROIs

# FIXME
reverter = Reverter(data_gen['valid'])

# - GPUs
os.environ['CUDA_VISIBLE_DEVICES'] = str(config['gpus'])
torch.backends.cudnn.enabled = True

# - model
model_handler = ModelHandler(**config['model'])

# - checkpoint handler
ckpt_handler = CheckpointHandler(model_handler, **config['ckpt_handler'])

# - optimizer
if 'optimizer' in config:
    optimizer = Optimizer(config['optimizer'])(model_handler.model)
else:
Esempio n. 9
0
# load config
with open(args.config) as f:
    config = yaml.safe_load(f)
generator_config = config['generator']
with open(config['data']) as f:
    data_config = yaml.safe_load(f)
data_list = data_config['list']
loader_config = data_config['loader']

# - data pipeline
loader_name = loader_config.pop('name')
data_loader = DataLoader(loader_name, **loader_config)
if data_list is not None:
    data_loader.set_data_list(data_list)
data_gen = DataGenerator(data_loader, generator_config)
reverter = Reverter(data_gen)

ROIs = data_loader.ROIs
DL = data_gen.struct['DL']
PG = data_gen.struct['PG']
BG = data_gen.struct['BG']
# ensure the order
if PG.n_workers > 1:
    assert PG.ordered
assert BG.n_workers == 1
if 'AG' in data_gen.struct:
    assert data_gen.struct['AG'].n_workers == 1

assert 'output_threshold' in config

if args.prediction_dir is not None:
Esempio n. 10
0
# load config
with open(args.config) as f:
    config = yaml.safe_load(f)
generator_config = config['generator']
with open(config['data']) as f:
    data_config = yaml.safe_load(f)
data_list = data_config['list']
loader_config = data_config['loader']

# - data pipeline
loader_name = loader_config.pop('name')
data_loader = DataLoader(loader_name, **loader_config)
if data_list is not None:
    data_loader.set_data_list(data_list)
data_gen = DataGenerator(data_loader, generator_config)
reverter = Reverter(data_gen)

ROIs = data_loader.ROIs
DL = data_gen.struct['DL']
PG = data_gen.struct['PG']
BG = data_gen.struct['BG']
# ensure the order
if PG.n_workers > 1:
    assert PG.ordered
assert BG.n_workers == 1
if 'AG' in data_gen.struct:
    assert data_gen.struct['AG'].n_workers == 1

assert 'output_threshold' in config

if args.prediction_dir is not None: