Python create_subfolders Examples, imsegm.utils.experiments.create_subfolders Python Examples

Example #1

0

Show file

def main(params):
    """ PIPELINE candidate clustering

    :param {str: any} params:
    """
    with open(os.path.join(params['path_expt'],
                           'config_clustering.json'), 'w') as fp:
        json.dump(params, fp)

    tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS)

    list_paths = [params[k] for k in ['path_images', 'path_segms', 'path_centers']]
    df_paths = tl_data.find_files_match_names_across_dirs(list_paths)
    df_paths.columns = ['path_image', 'path_segm', 'path_points']
    df_paths.index = range(1, len(df_paths) + 1)
    path_cover = os.path.join(params['path_expt'], run_train.NAME_CSV_TRIPLES)
    df_paths.to_csv(path_cover)

    logging.info('run clustering...')
    df_paths_new = pd.DataFrame()
    _wrapper_clustering = partial(cluster_points_draw_export, params=params,
                                  path_out=params['path_expt'])
    rows = (dict(row) for idx, row in df_paths.iterrows())
    iterate = tl_expt.WrapExecuteSequence(_wrapper_clustering, rows,
                                          nb_jobs=params['nb_jobs'])
    for dict_center in iterate:
        df_paths_new = df_paths_new.append(dict_center, ignore_index=True)

    df_paths_new.set_index('image', inplace=True)
    df_paths_new.to_csv(path_cover)

Example #2

0

Show file

File: run_segm_slic_model_graphcut.py Project: chelovek21/pyImSegm

def main(params):
    """ the main body containgn two approches:
    1) segment each image indecently
    2) estimate model over whole image sequence and estimate

    :param {str: ...} params:
    :return {str: ...}:
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running...')

    reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(
        params,
        dir_name=NAME_EXPERIMENT,
        stamp_unique=EACH_UNIQUE_EXPERIMENT,
        skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if params['visual']:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)

    assert os.path.isfile(params['path_train_list']), \
        'missing %s' % params['path_train_list']
    dict_segms_gmm, dict_segms_group = {}, {}
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    paths_img = df_paths['path_image'].tolist()

    def path_expt(n):
        return os.path.join(params['path_exp'], n)

    # Segment as single model per image
    dict_segms_gmm = experiment_single_gmm(params, paths_img,
                                           path_expt(FOLDER_SEGM_GMM),
                                           path_expt(FOLDER_SEGM_GMM_VISU))
    gc.collect()
    time.sleep(1)

    dict_segms_group = experiment_group_gmm(params, paths_img,
                                            path_expt(FOLDER_SEGM_GROUP),
                                            path_expt(FOLDER_SEGM_GROUP_VISU))
    gc.collect()
    time.sleep(1)

    df_ars = compare_segms_metric_ars(dict_segms_gmm,
                                      dict_segms_group,
                                      suffix='_gmm-group')
    df_ars.to_csv(path_expt(NAME_CSV_ARS_CORES))
    logging.info(df_ars.describe())

    logging.info('DONE')
    return params

Example #3

0

Show file

File: run_center_evaluation.py Project: chelovek21/pyImSegm

def evaluate_detection_stage(df_paths, stage, path_info, path_out, nb_jobs=1):
    """ evaluate center detection for particular list of stages

    :param df_paths:
    :param [int] stage:
    :param str path_info:
    :param str path_out:
    :param int nb_jobs:
    :return DF:
    """
    logging.info('evaluate stages: %s', repr(stage))
    str_stage = '-'.join(map(str, stage))

    path_csv = os.path.join(path_out, NAME_CSV_ANNOT_STAGE % str_stage)
    if not os.path.exists(path_csv) or FORCE_RELOAD:
        df_slices_info = seg_annot.load_info_group_by_slices(path_info, stage)
        logging.debug('export slices_info to "%s"', path_csv)
        df_slices_info.to_csv(path_csv)
    else:
        logging.debug('loading slices_info from "%s"', path_csv)
        df_slices_info = pd.read_csv(path_csv, index_col=0)

    if len(df_slices_info) == 0:
        return df_paths

    # df_paths = pd.merge(df_paths, df_slices_info, how='inner',
    #                     left_index=True, right_index=True)

    df_eval = pd.DataFrame()
    path_annot = os.path.join(path_out, FOLDER_ANNOT % str_stage)
    path_visu = os.path.join(path_out, FOLDER_ANNOT_VISUAL % str_stage)
    list_dirs = [os.path.basename(p) for p in [path_annot, path_visu]]
    logging.debug('create sub-dirs: %s', repr(list_dirs))
    tl_expt.create_subfolders(path_out, list_dirs)

    # perfom on new images
    stage_prefix = '[stage-%s] ' % str_stage
    logging.info('start section %s - load_center_evaluate ...', stage_prefix)
    wrapper_detection = partial(load_center_evaluate,
                                df_annot=df_slices_info,
                                path_annot=path_annot,
                                path_visu=path_visu,
                                col_prefix=stage_prefix)
    iterate = tl_expt.WrapExecuteSequence(wrapper_detection,
                                          df_paths.iterrows(),
                                          nb_jobs=nb_jobs)
    for dict_eval in iterate:
        df_eval = df_eval.append(dict_eval, ignore_index=True)
        df_eval.to_csv(os.path.join(path_out, NAME_CSV_TRIPLES_TEMP))
        # gc.collect(), time.sleep(1)
    return df_eval

Example #4

0

Show file

File: run_center_prediction.py Project: chelovek21/pyImSegm

def main(params):
    """ PIPELINE for new detections

    :param {str: str} paths:
    """
    logging.info('running...')
    params = run_train.prepare_experiment_folder(params, FOLDER_EXPERIMENT)

    # run_train.check_pathes_patterns(paths)
    tl_expt.set_experiment_logger(params['path_expt'])
    logging.info('COMPUTER: \n%s', repr(os.uname()))
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))

    tl_expt.create_subfolders(params['path_expt'], LIST_SUBFOLDER)

    path_csv = os.path.join(params['path_expt'], NAME_CSV_TRIPLES)
    df_paths = get_csv_triplets(params['path_list'],
                                path_csv,
                                params['path_images'],
                                params['path_segms'],
                                force_reload=FORCE_RERUN)

    dict_classif = seg_clf.load_classifier(params['path_classif'])
    params_clf = dict_classif['params']
    params_clf.update(params)
    logging.info(tl_expt.string_dict(params, desc='UPDATED PARAMETERS'))

    # perform on new images
    df_stat = pd.DataFrame()
    wrapper_detection = partial(load_compute_detect_centers,
                                params=params_clf,
                                path_classif=params['path_classif'],
                                path_output=params['path_expt'])
    iterate = tl_expt.WrapExecuteSequence(wrapper_detection,
                                          df_paths.iterrows(),
                                          nb_jobs=params['nb_jobs'])
    for dict_center in iterate:
        df_stat = df_stat.append(dict_center, ignore_index=True)
        df_stat.to_csv(os.path.join(params['path_expt'],
                                    NAME_CSV_TRIPLES_TEMP))

    df_stat.set_index(['image'], inplace=True)
    df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES))
    logging.info('STATISTIC: \n %s', repr(df_stat.describe()))

    logging.info('DONE')

Example #5

0

Show file

File: run_ovary_egg-segmentation.py Project: sureshkumar0707/pyImSegm

def main(params, debug_export=DEBUG_EXPORT):
    """ the main entry point

    :param {str: ...} params: segmentation parameters
    :param bool debug_export: whether export visualisations
    """
    logging.getLogger().setLevel(logging.DEBUG)

    params = tl_expt.create_experiment_folder(
        params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    # tl_expt.create_subfolders(params['path_exp'], [FOLDER_IMAGE])

    df_paths = pd.read_csv(params['path_list'], index_col=0)
    logging.info('loaded %i items with columns: %s', len(df_paths),
                 repr(df_paths.columns.tolist()))
    df_paths.dropna(how='any', inplace=True)

    # create sub-folders if required
    tl_expt.create_subfolders(params['path_exp'], ['input', 'simple'])
    dict_segment = create_dict_segmentation(params, None, None, None, None)
    tl_expt.create_subfolders(params['path_exp'], [n for n in dict_segment] +
                              [n + DIR_CENTRE_POSIX for n in dict_segment] +
                              [n + DIR_VISUAL_POSIX for n in dict_segment])
    if debug_export:
        list_dirs = [n + DIR_DEBUG_POSIX for n in dict_segment if 'rg2sp' in n]
        tl_expt.create_subfolders(params['path_exp'], list_dirs)

    _wrapper_segment = partial(image_segmentation, params=params)
    iterate = tl_expt.WrapExecuteSequence(_wrapper_segment,
                                          df_paths.iterrows(),
                                          nb_jobs=params['nb_jobs'])
    list(iterate)

Example #6

0

Show file

File: run_ovary_segm_evaluation.py Project: chelovek21/pyImSegm

def main(dict_paths, export_visual=EXPORT_VUSIALISATION, nb_jobs=NB_THREADS):
    """ evaluate all segmentations in experiment folder

    :param {str: str} paths: path to all required directories
    :param bool export_visual: export visualisations
    :param int nb_jobs: number threads in parralel
    """
    logging.info('running in %i jobs...', nb_jobs)
    logging.info(tl_expt.string_dict(dict_paths, desc='PATHS'))

    list_results = sorted(glob.glob(os.path.join(dict_paths['results'], '*')))
    list_results = sorted([
        p for p in list_results
        if os.path.isdir(p) and '___' not in os.path.basename(p)
        and os.path.basename(p) not in SKIP_DIRS
    ])

    tl_expt.create_subfolders(
        dict_paths['results'],
        [NAME_DIR_VISUAL_1, NAME_DIR_VISUAL_2, NAME_DIR_VISUAL_3])

    df_all = pd.DataFrame()
    wrapper_eval = partial(evaluate_folder,
                           dict_paths=dict_paths,
                           export_visual=export_visual)
    iterate = tl_expt.WrapExecuteSequence(wrapper_eval,
                                          list_results,
                                          nb_jobs=nb_jobs)
    for dict_eval in iterate:
        df_all = df_all.append(dict_eval, ignore_index=True)

    df_all.set_index(['method'], inplace=True)
    df_all.sort_index(inplace=True)
    logging.info('STATISTIC: \n %s', repr(df_all))
    df_all.to_csv(
        os.path.join(dict_paths['results'], NAME_CSV_STAT % 'OVERALL'))

    logging.info('Done :]')

Example #7

0

Show file

def main_train(params):
    """ PIPELINE for training
    0) load triplets or create triplets from path to images, annotations
    1) load precomputed data or compute them now
    2) train classifier with hyper-parameters
    3) perform Leave-One-Out experiment

    :param {str: any} params:
    """
    logging.info('run TRAINING...')
    params = prepare_experiment_folder(params, FOLDER_EXPERIMENT)

    tl_expt.set_experiment_logger(params['path_expt'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))

    with open(os.path.join(params['path_expt'], NAME_JSON_PARAMS), 'w') as f:
        json.dump(params, f)

    tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS)

    df_paths, _ = load_df_paths(params)

    path_dump_data = os.path.join(params['path_expt'], NAME_DUMP_TRAIN_DATA)
    if not os.path.isfile(path_dump_data) or FORCE_RECOMP_DATA:
        dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, \
        dict_features, dict_labels, feature_names = \
            dataset_load_images_segms_compute_features(params, df_paths,
                                                       params['nb_jobs'])
        assert len(dict_imgs) > 0, 'missing images'
        save_dump_data(path_dump_data, dict_imgs, dict_segms, dict_slics,
                       dict_points, dict_centers, dict_features, dict_labels,
                       feature_names)
    else:
        dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, \
        dict_labels, feature_names = load_dump_data(path_dump_data)

    if is_drawing(params['path_expt']) and EXPORT_TRAINING_DATA:
        export_dataset_visual(params['path_expt'], dict_imgs, dict_segms,
                              dict_slics, dict_points, dict_labels,
                              params['nb_jobs'])

    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        drop_labels=[-1],
        balance_type=params['balance'])
    # remove all bad values from features space
    features[np.isnan(features)] = 0
    features[np.isinf(features)] = -1
    assert np.sum(sizes) == len(labels), \
        'not equal sizes (%d) and labels (%i)' \
        % (int(np.sum(sizes)), len(labels))

    # feature norm & train classification
    nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH))
    cv = seg_clf.CrossValidatePSetsOut(sizes, nb_holdout)
    classif, params['path_classif'] = seg_clf.create_classif_train_export(
        params['classif'],
        features,
        labels,
        cross_val=cv,
        params=params,
        feature_names=feature_names,
        nb_search_iter=params['nb_classif_search'],
        pca_coef=params.get('pca_coef', None),
        nb_jobs=params['nb_jobs'],
        path_out=params['path_expt'])
    nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_EVAL))
    cv = seg_clf.CrossValidatePSetsOut(sizes, nb_holdout)
    seg_clf.eval_classif_cross_val_scores(params['classif'],
                                          classif,
                                          features,
                                          labels,
                                          cross_val=cv,
                                          path_out=params['path_expt'])
    seg_clf.eval_classif_cross_val_roc(params['classif'],
                                       classif,
                                       features,
                                       labels,
                                       cross_val=cv,
                                       path_out=params['path_expt'])

    if RUN_LEAVE_ONE_OUT:
        experiment_loo(classif, dict_imgs, dict_segms, dict_centers,
                       dict_slics, dict_points, dict_features, feature_names)

    logging.info('DONE')

Example #8

0

Show file

File: run_segm_slic_model_graphcut.py Project: sureshkumar0707/pyImSegm

def main(params):
    """ the main body containgn two approches:
    1) segment each image indecently
    2) estimate model over whole image sequence and estimate

    :param {str: ...} params:
    :return {str: ...}:
    """
    logging.getLogger().setLevel(logging.DEBUG)
    show_visual = params.get('visual', False)

    reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params,
                                              dir_name=NAME_EXPERIMENT,
                                              stamp_unique=params.get(
                                                  'unique',
                                                  EACH_UNIQUE_EXPERIMENT),
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if show_visual:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)

    paths_img = load_path_images(params)
    assert len(paths_img) > 0, 'missing images'

    def _path_expt(n):
        return os.path.join(params['path_exp'], n)

    # Segment as single model per image
    path_visu = _path_expt(FOLDER_SEGM_GMM_VISU) if show_visual else None
    dict_segms_gmm = experiment_single_gmm(params,
                                           paths_img,
                                           _path_expt(FOLDER_SEGM_GMM),
                                           path_visu,
                                           show_debug_imgs=show_visual)
    gc.collect()
    time.sleep(1)

    # Segment as model ober set of images
    if params.get('run_groupGMM', False):
        path_visu = _path_expt(FOLDER_SEGM_GROUP_VISU) if show_visual else None
        dict_segms_group = experiment_group_gmm(params,
                                                paths_img,
                                                _path_expt(FOLDER_SEGM_GROUP),
                                                path_visu,
                                                show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_SEGM_GROUP))
        # write_skip_file(_path_expt(FOLDER_SEGM_GROUP_VISU))
        dict_segms_group = None

    if dict_segms_group is not None:
        df_ars = compare_segms_metric_ars(dict_segms_gmm,
                                          dict_segms_group,
                                          suffix='_gmm-group')
        df_ars.to_csv(_path_expt(NAME_CSV_ARS_CORES))
        logging.info(df_ars.describe())

    return params

Example #9

0

Show file

File: run_segm_slic_classif_graphcut.py Project: chelovek21/pyImSegm

def main_train(params):
    """ the main composed from following steps:
    1) load already computed data (features and labels) or compute them now
    2) visualise labeled superpixels aka annotation
    3) load or train classifier with hyper-parameters search
    4) perform Leave-One-Out and Leave-P-Out experiments on images

    :param {str: ...} params:
    :return{str: ...} :
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running TRAINING...')

    reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT,
                                              stamp_unique=EACH_UNIQUE_EXPERIMENT,
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if params['visual']:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
    df_stat = pd.DataFrame()

    path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA)
    if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = load_dump_data(path_dump)
    else:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = \
            dataset_load_images_annot_compute_features(params)
        save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics,
                       dict_features, dict_labels, dict_label_hist,
                       feature_names)
    assert len(dict_imgs) > 1, 'training require at least 2 images'

    dict_annot_slic = {n: np.asarray(dict_labels[n])[dict_slics[n]]
                       for n in dict_annot}
    df = eval_segment_with_annot(params, dict_annot, dict_annot_slic,
                                 dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT,
                                 params['nb_jobs'])
    df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True)
    path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS)
    df_stat.set_index(['name']).to_csv(path_csv_stat)

    if params['gc_use_trans']:
        params['label_transitions'] = \
            seg_gc.count_label_transitions_connected_segments(dict_slics,
                                                              dict_labels)
        logging.info('summary on edge-label transitions: \n %s',
                     repr(params['label_transitions']))

    for name in dict_labels:
        weights = np.max(dict_label_hist[name], axis=1)
        dict_labels[name][weights < params['label_purity']] = -1

    logging.info('prepare features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1])
    # drop "do not care" label which are -1
    features = np.nan_to_num(features)

    nb_holdout = max(1, int(round(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH)))
    params, classif, path_classif = load_train_classifier(params, features,
                                                          labels,  feature_names,
                                                          sizes, nb_holdout)

    # test classif on images
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    paths_img = df_paths['path_image'].tolist()
    perform_predictions(params, paths_img, classif)

    # LEAVE ONE OUT
    if RUN_CROSS_VAL_LOO:
        df_stat = experiment_loo(params, df_stat, dict_annot, paths_img,
                                 path_classif, path_dump)

    # LEAVE P OUT
    if RUN_CROSS_VAL_LPO:
        df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img,
                                 path_classif, path_dump, nb_holdout)

    logging.info('training DONE')
    return params

Example #10

0

Show file

File: run_segm_slic_classif_graphcut.py Project: sureshkumar0707/pyImSegm

def main_train(params):
    """ the main composed from following steps:
    1) load already computed data (features and labels) or compute them now
    2) visualise labeled superpixels aka annotation
    3) load or train classifier with hyper-parameters search
    4) perform Leave-One-Out and Leave-P-Out experiments on images

    :param {str: ...} params:
    :return {str: ...}:
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running TRAINING...')
    show_visual = params.get('visual', False)

    reload_dir_config = (os.path.isfile(params.get('path_config', ''))
                         or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params,
                                              dir_name=NAME_EXPERIMENT,
                                              stamp_unique=params.get(
                                                  'unique',
                                                  EACH_UNIQUE_EXPERIMENT),
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if show_visual:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
    df_stat = pd.DataFrame()

    path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA)
    if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = load_dump_data(path_dump)
    else:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = \
            dataset_load_images_annot_compute_features(params,
                                                       show_visual)
        save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics,
                       dict_features, dict_labels, dict_label_hist,
                       feature_names)
    assert len(dict_imgs) > 1, 'training require at least 2 images'

    dict_annot_slic = {
        n: np.asarray(dict_labels[n])[dict_slics[n]]
        for n in dict_annot
    }
    df = eval_segment_with_annot(params, dict_annot, dict_annot_slic,
                                 dict_label_hist,
                                 NAME_CSV_SEGM_STAT_SLIC_ANNOT,
                                 params.get('drop_labels',
                                            None), params['nb_jobs'])
    df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True)
    path_csv_stat = os.path.join(params['path_exp'],
                                 NAME_CSV_SEGM_STAT_RESULTS)
    df_stat.set_index(['name']).to_csv(path_csv_stat)

    if params['gc_use_trans']:
        params['label_transitions'] = \
            seg_gc.count_label_transitions_connected_segments(dict_slics,
                                                              dict_labels)
        logging.info('summary on edge-label transitions: \n %s',
                     repr(params['label_transitions']))

    path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \
        if show_visual else None
    dict_labels = filter_train_with_purity(dict_imgs,
                                           dict_labels,
                                           dict_label_hist,
                                           params['label_purity'],
                                           dict_slics,
                                           drop_labels=params.get(
                                               'drop_labels', None),
                                           path_visu=path_purity_visu,
                                           nb_jobs=params['nb_jobs'])

    logging.info('prepare features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        balance_type=params['balance'],
        drop_labels=[-1, np.nan] + params.get('drop_labels', []))
    # drop "do not care" label which are -1
    features = np.nan_to_num(features)

    nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH)
    nb_holdout = max(1, int(round(len(sizes) * nb_holdout)))  # minimum is 1
    nb_holdout = min(nb_holdout, int(len(sizes) / 2))  # max is half of the set
    params, classif, path_classif = load_train_classifier(
        params, features, labels, feature_names, sizes, nb_holdout)

    def _path_expt(n):
        return os.path.join(params['path_exp'], n)

    # test classif. on  training images
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    df_paths.reset_index(inplace=True)
    paths_img = df_paths['path_image'].tolist()
    if RUN_TRAIN_PREDICT:
        perform_train_predictions(params,
                                  paths_img,
                                  classif,
                                  show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_TRAIN))

    gc.collect()
    time.sleep(1)

    # LEAVE P OUT
    if params.get('run_LPO', True):
        idx_paths_img = list(
            zip(df_paths.index.tolist(), df_paths['path_image'].tolist()))
        df_stat = experiment_lpo(params,
                                 df_stat,
                                 dict_annot,
                                 idx_paths_img,
                                 path_classif,
                                 path_dump,
                                 nb_holdout,
                                 show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_LPO))
        # write_skip_file(_path_expt(FOLDER_LPO_VISU))

    logging.info('Statistic: \n %s', repr(df_stat.describe()))
    logging.info('training DONE')
    return params