def _eval_from_gen(self, eval_gen, n_test_examples):
        batch_size = min(
            self.batch_size, 10
        )  # just make batches manually, this way we can make sure we test everything
        # test metrics: categorical cross-entropy and dice

        cces, dice_per_label, accs, all_eval_ids = utils.eval_seg_from_gen(
            self.segmenter_model,
            eval_gen=eval_gen,
            label_mapping=self.label_mapping,
            n_eval_examples=n_test_examples,
            batch_size=batch_size,
            logger=self.logger)
        return cces, dice_per_label, accs
        n_eval_examples = min(eval_ds.params['n_validation'], eval_ds.vols_labeled_valid.shape[0])

    if do_sas:
        # assume single atlas
        source_X = eval_ds.vols_labeled_train[[0]]
        source_Y = eval_ds.segs_labeled_train[[0]]

        eval_cces, eval_dice, eval_accs, eval_ids = utils.eval_seg_sas_from_gen(
            sas_model=voxelmorph_model,
            atlas_vol=source_X, atlas_labels=source_Y,
            eval_gen=eval_gen, label_mapping=label_mapping,
            n_eval_examples=n_eval_examples, batch_size=16)
    else:
        eval_cces, eval_dice, eval_accs, eval_ids = utils.eval_seg_from_gen(
            segmenter_model=segmenter_model,
            eval_gen=eval_gen, label_mapping=label_mapping,
            n_eval_examples=n_eval_examples, batch_size=16)
    print(eval_ids)

    # save results in a .mat file
    results_dir = './segmentation_test_results'
    if not os.path.isdir(results_dir):
        os.mkdir(results_dir)

    if do_final_test:
        mode = 'test'
    else:
        mode = 'valid'

    results_file = os.path.join(results_dir, '{}_{}_{}.mat'.format(eval_ds.display_name, mode, model_id))
    print('Saved results to {}'.format(results_file))