コード例 #1
0
def retrain_loo_segment_image(imgs_idx_path, path_classif, path_dump,
                              path_out, path_visu):
    """ load the classifier, and dumped data, subtract the image,
    retrain the classif. without it and do the segmentation

    :param str path_img: path to input image
    :param str path_classif: path to saved classifier
    :param str path_dump: path to dumped data
    :param, str path_out: path to segmentation outputs
    :return (str, ndarray, ndarray):
    """
    idx, path_img = parse_imgs_idx_path(imgs_idx_path)
    dict_imgs, _, _, dict_features, dict_labels, _, _ = \
        load_dump_data(path_dump)
    dict_classif = seg_clf.load_classifier(path_classif)
    classif = dict_classif['clf_pipeline']
    params = dict_classif['params']

    idx_name = get_idx_name(idx, path_img)
    for d in [dict_features, dict_labels]:
        _ = d.pop(idx_name, None)
    assert (len(dict_imgs) - len(dict_features)) == 1, \
        'no image was dropped from training set'

    features, labels, _ = seg_clf.convert_set_features_labels_2_dataset(
        dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1])
    classif.fit(features, labels)

    idx_name, segm, segm_gc = segment_image(imgs_idx_path, params, classif,
                                            path_out, path_visu)
    # gc.collect(), time.sleep(1)
    return idx_name, segm, segm_gc
コード例 #2
0
def retrain_lpo_segment_image(list_imgs_idx_path,
                              path_classif,
                              path_dump,
                              path_out,
                              path_visu,
                              show_debug_imgs=SHOW_DEBUG_IMAGES):
    """ load the classifier, and dumped data, subtract the image,
    retrain the classif without it and do the segmentation

    :param list(str) list_imgs_idx_path: path to input image
    :param str path_classif: path to saved classifier
    :param str path_dump: path to dumped data
    :param, str path_out: path to segmentation outputs
    :param bool show_debug_imgs: whether show debug images
    :return (str, ndarray, ndarray):
    """
    dict_imgs, _, _, dict_features, dict_labels, _, _ = load_dump_data(
        path_dump)
    dict_classif = seg_clf.load_classifier(path_classif)
    classif = dict_classif['clf_pipeline']
    params = dict_classif['params']

    for idx, path_img in list_imgs_idx_path:
        idx_name = get_idx_name(idx, path_img)
        _ = dict_features.pop(idx_name, None)
        _ = dict_labels.pop(idx_name, None)
    if (len(dict_imgs) - len(dict_features)) != len(list_imgs_idx_path):
        raise ValueError(
            'subset of %i images was not dropped, training set %i from total %i'
            % (len(list_imgs_idx_path), len(dict_features), len(dict_imgs)))

    features, labels, _ = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        balance_type=params['balance'],
        drop_labels=[-1, np.nan] + params.get('drop_labels', []))
    classif.fit(features, labels)

    dict_segm, dict_segm_gc = {}, {}
    for imgs_idx_path in list_imgs_idx_path:
        idx_name, segm, segm_gc = segment_image(
            imgs_idx_path,
            params,
            classif,
            path_out,
            path_visu,
            show_debug_imgs=show_debug_imgs)
        dict_segm[idx_name] = segm
        dict_segm_gc[idx_name] = segm_gc
    gc.collect()
    time.sleep(1)
    return dict_segm, dict_segm_gc
コード例 #3
0
def main_train(params):
    """ PIPELINE for training
    0) load triplets or create triplets from path to images, annotations
    1) load precomputed data or compute them now
    2) train classifier with hyper-parameters
    3) perform Leave-One-Out experiment

    :param {str: any} params:
    """
    params = prepare_experiment_folder(params, FOLDER_EXPERIMENT)

    tl_expt.set_experiment_logger(params['path_expt'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.save_config_yaml(
        os.path.join(params['path_expt'], NAME_YAML_PARAMS), params)
    tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS)

    df_paths, _ = load_df_paths(params)

    path_dump_data = os.path.join(params['path_expt'], NAME_DUMP_TRAIN_DATA)
    if not os.path.isfile(path_dump_data) or FORCE_RECOMP_DATA:
        (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers,
         dict_features, dict_labels, feature_names) = \
            dataset_load_images_segms_compute_features(params, df_paths, params['nb_workers'])
        assert len(dict_imgs) > 0, 'missing images'
        save_dump_data(
            path_dump_data,
            dict_imgs,
            dict_segms,
            dict_slics,
            dict_points,
            dict_centers,
            dict_features,
            dict_labels,
            feature_names,
        )
    else:
        (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers,
         dict_features, dict_labels,
         feature_names) = load_dump_data(path_dump_data)

    if is_drawing(params['path_expt']) and EXPORT_TRAINING_DATA:
        export_dataset_visual(params['path_expt'], dict_imgs, dict_segms,
                              dict_slics, dict_points, dict_labels,
                              params['nb_workers'])

    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        drop_labels=[-1],
        balance_type=params['balance'])
    # remove all bad values from features space
    features[np.isnan(features)] = 0
    features[np.isinf(features)] = -1
    assert np.sum(sizes) == len(labels), \
        'not equal sizes (%d) and labels (%i)' \
        % (int(np.sum(sizes)), len(labels))

    # feature norm & train classification
    nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH))
    cv = seg_clf.CrossValidateGroups(sizes, nb_holdout)
    classif, params[
        'path_classif'] = seg_clf.create_classif_search_train_export(
            params['classif'],
            features,
            labels,
            cross_val=cv,
            params=params,
            feature_names=feature_names,
            nb_search_iter=params['nb_classif_search'],
            pca_coef=params.get('pca_coef', None),
            nb_workers=params['nb_workers'],
            path_out=params['path_expt'],
        )
    nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_EVAL))
    cv = seg_clf.CrossValidateGroups(sizes, nb_holdout)
    seg_clf.eval_classif_cross_val_scores(params['classif'],
                                          classif,
                                          features,
                                          labels,
                                          cross_val=cv,
                                          path_out=params['path_expt'])
    seg_clf.eval_classif_cross_val_roc(params['classif'],
                                       classif,
                                       features,
                                       labels,
                                       cross_val=cv,
                                       path_out=params['path_expt'])

    if RUN_LEAVE_ONE_OUT:
        experiment_loo(classif, dict_imgs, dict_segms, dict_centers,
                       dict_slics, dict_points, dict_features, feature_names,
                       params)
コード例 #4
0
ファイル: pipelines.py プロジェクト: tauhidstanford/pyImSegm
def train_classif_color2d_slic_features(list_images,
                                        list_annots,
                                        dict_features,
                                        sp_size=30,
                                        sp_regul=0.2,
                                        clf_name=CLASSIF_NAME,
                                        label_purity=0.9,
                                        feature_balance='unique',
                                        pca_coef=None,
                                        nb_classif_search=1,
                                        nb_hold_out=CROSS_VAL_LEAVE_OUT,
                                        nb_workers=1):
    """ train classifier on list of annotated images

    :param [ndarray] list_images:
    :param [ndarray] list_annots:
    :param int sp_size: initial size of a superpixel(meaning edge lenght)
    :param float sp_regul: regularisation in range(0;1) where "0" gives elastic
        and "1" nearly square segments
    :param dict(list(str)) dict_features: list of features to be extracted
    :param str clf_name: selet udsed classifier
    :param float label_purity: set the sample-labels purity for training
    :param str feature_balance: set how to balance datasets
    :param float pca_coef: select PCA coef or None
    :param int nb_classif_search: number of tries for hyper-parameters seach
    :param int nb_hold_out: cross-val leave out
    :param int nb_workers: parallelism
    :return:
    """
    logging.info('TRAIN Superpixels-Features-Classifier')
    assert len(list_images) == len(list_annots), \
        'size of images (%i) and annotations (%i) should match' \
        % (len(list_images), len(list_annots))

    list_slic, list_features, list_labels = list(), list(), list()
    _wrapper_compute = partial(wrapper_compute_color2d_slic_features_labels,
                               sp_size=sp_size,
                               sp_regul=sp_regul,
                               dict_features=dict_features,
                               label_purity=label_purity)
    list_imgs_annot = zip(list_images, list_annots)
    iterate = WrapExecuteSequence(_wrapper_compute,
                                  list_imgs_annot,
                                  desc='compute SLIC & features & labels',
                                  nb_workers=nb_workers)
    for slic, fts, lbs in iterate:
        list_slic.append(slic)
        list_features.append(fts)
        list_labels.append(lbs)

    logging.debug('concentrate features...')
    # concentrate features, labels
    features, labels, sizes = convert_set_features_labels_2_dataset(
        dict(zip(range(len(list_features)), list_features)),
        dict(zip(range(len(list_labels)), list_labels)),
        balance_type=feature_balance,
        drop_labels=[-1])
    # drop do not care label whichare -1
    features = np.nan_to_num(features)

    logging.debug('train classifier...')
    # clf_pipeline = seg_clf.create_clf_pipeline(clf_name, pca_coef)
    # clf_pipeline.fit(np.array(features), np.array(labels, dtype=int))

    if len(sizes) > (nb_hold_out * 5):
        cv = CrossValidateGroups(sizes, nb_hold_out=nb_hold_out)
    # for small nuber of training images this does not make sence
    else:
        cv = 10

    classif, _ = create_classif_search_train_export(
        clf_name,
        features,
        labels,
        pca_coef=pca_coef,
        cross_val=cv,
        nb_search_iter=nb_classif_search,
        nb_workers=nb_workers)

    return classif, list_slic, list_features, list_labels
コード例 #5
0
def main_train(params):
    """ the main composed from following steps:
    1) load already computed data (features and labels) or compute them now
    2) visualise labeled superpixels aka annotation
    3) load or train classifier with hyper-parameters search
    4) perform Leave-One-Out and Leave-P-Out experiments on images

    :param {str: ...} params:
    :return {str: ...}:
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running TRAINING...')
    show_visual = params.get('visual', False)

    reload_dir_config = (os.path.isfile(params.get('path_config', ''))
                         or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params,
                                              dir_name=NAME_EXPERIMENT,
                                              stamp_unique=params.get(
                                                  'unique',
                                                  EACH_UNIQUE_EXPERIMENT),
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if show_visual:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
    df_stat = pd.DataFrame()

    path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA)
    if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = load_dump_data(path_dump)
    else:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = \
            dataset_load_images_annot_compute_features(params,
                                                       show_visual)
        save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics,
                       dict_features, dict_labels, dict_label_hist,
                       feature_names)
    assert len(dict_imgs) > 1, 'training require at least 2 images'

    dict_annot_slic = {
        n: np.asarray(dict_labels[n])[dict_slics[n]]
        for n in dict_annot
    }
    df = eval_segment_with_annot(params, dict_annot, dict_annot_slic,
                                 dict_label_hist,
                                 NAME_CSV_SEGM_STAT_SLIC_ANNOT,
                                 params.get('drop_labels',
                                            None), params['nb_jobs'])
    df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True)
    path_csv_stat = os.path.join(params['path_exp'],
                                 NAME_CSV_SEGM_STAT_RESULTS)
    df_stat.set_index(['name']).to_csv(path_csv_stat)

    if params['gc_use_trans']:
        params['label_transitions'] = \
            seg_gc.count_label_transitions_connected_segments(dict_slics,
                                                              dict_labels)
        logging.info('summary on edge-label transitions: \n %s',
                     repr(params['label_transitions']))

    path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \
        if show_visual else None
    dict_labels = filter_train_with_purity(dict_imgs,
                                           dict_labels,
                                           dict_label_hist,
                                           params['label_purity'],
                                           dict_slics,
                                           drop_labels=params.get(
                                               'drop_labels', None),
                                           path_visu=path_purity_visu,
                                           nb_jobs=params['nb_jobs'])

    logging.info('prepare features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        balance_type=params['balance'],
        drop_labels=[-1, np.nan] + params.get('drop_labels', []))
    # drop "do not care" label which are -1
    features = np.nan_to_num(features)

    nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH)
    nb_holdout = max(1, int(round(len(sizes) * nb_holdout)))  # minimum is 1
    nb_holdout = min(nb_holdout, int(len(sizes) / 2))  # max is half of the set
    params, classif, path_classif = load_train_classifier(
        params, features, labels, feature_names, sizes, nb_holdout)

    def _path_expt(n):
        return os.path.join(params['path_exp'], n)

    # test classif. on  training images
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    df_paths.reset_index(inplace=True)
    paths_img = df_paths['path_image'].tolist()
    if RUN_TRAIN_PREDICT:
        perform_train_predictions(params,
                                  paths_img,
                                  classif,
                                  show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_TRAIN))

    gc.collect()
    time.sleep(1)

    # LEAVE P OUT
    if params.get('run_LPO', True):
        idx_paths_img = list(
            zip(df_paths.index.tolist(), df_paths['path_image'].tolist()))
        df_stat = experiment_lpo(params,
                                 df_stat,
                                 dict_annot,
                                 idx_paths_img,
                                 path_classif,
                                 path_dump,
                                 nb_holdout,
                                 show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_LPO))
        # write_skip_file(_path_expt(FOLDER_LPO_VISU))

    logging.info('Statistic: \n %s', repr(df_stat.describe()))
    logging.info('training DONE')
    return params
コード例 #6
0
def main_train(params):
    """ the main composed from following steps:
    1) load already computed data (features and labels) or compute them now
    2) visualise labeled superpixels aka annotation
    3) load or train classifier with hyper-parameters search
    4) perform Leave-One-Out and Leave-P-Out experiments on images

    :param {str: ...} params:
    :return{str: ...} :
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running TRAINING...')

    reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT,
                                              stamp_unique=EACH_UNIQUE_EXPERIMENT,
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if params['visual']:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
    df_stat = pd.DataFrame()

    path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA)
    if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = load_dump_data(path_dump)
    else:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = \
            dataset_load_images_annot_compute_features(params)
        save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics,
                       dict_features, dict_labels, dict_label_hist,
                       feature_names)
    assert len(dict_imgs) > 1, 'training require at least 2 images'

    dict_annot_slic = {n: np.asarray(dict_labels[n])[dict_slics[n]]
                       for n in dict_annot}
    df = eval_segment_with_annot(params, dict_annot, dict_annot_slic,
                                 dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT,
                                 params['nb_jobs'])
    df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True)
    path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS)
    df_stat.set_index(['name']).to_csv(path_csv_stat)

    if params['gc_use_trans']:
        params['label_transitions'] = \
            seg_gc.count_label_transitions_connected_segments(dict_slics,
                                                              dict_labels)
        logging.info('summary on edge-label transitions: \n %s',
                     repr(params['label_transitions']))

    for name in dict_labels:
        weights = np.max(dict_label_hist[name], axis=1)
        dict_labels[name][weights < params['label_purity']] = -1

    logging.info('prepare features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1])
    # drop "do not care" label which are -1
    features = np.nan_to_num(features)

    nb_holdout = max(1, int(round(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH)))
    params, classif, path_classif = load_train_classifier(params, features,
                                                          labels,  feature_names,
                                                          sizes, nb_holdout)

    # test classif on images
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    paths_img = df_paths['path_image'].tolist()
    perform_predictions(params, paths_img, classif)

    # LEAVE ONE OUT
    if RUN_CROSS_VAL_LOO:
        df_stat = experiment_loo(params, df_stat, dict_annot, paths_img,
                                 path_classif, path_dump)

    # LEAVE P OUT
    if RUN_CROSS_VAL_LPO:
        df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img,
                                 path_classif, path_dump, nb_holdout)

    logging.info('training DONE')
    return params
コード例 #7
0
def train_classif_color2d_slic_features(list_images,
                                        list_annots,
                                        clr_space='rgb',
                                        sp_size=30,
                                        sp_regul=0.2,
                                        dict_features=FTS_SET_SIMPLE,
                                        clf_name=CLASSIF_NAME,
                                        label_purity=0.9,
                                        feature_balance='unique',
                                        pca_coef=None,
                                        nb_classif_search=1,
                                        nb_jobs=1):
    """ train classifier on list of annotated images

    :param [ndarray] list_images:
    :param [ndarray] list_annots:
    :param str clr_space: chose the color space
    :param int sp_size: initial size of a superpixel(meaning edge lenght)
    :param float sp_regul: regularisation in range(0;1) where "0" gives elastic
           and "1" nearly square segments
    :param {str: [str]} dict_features: list of features to be extracted
    :param str clf_name: selet udsed classifier
    :param float label_purity: set the sample-labels purity for training
    :param str feature_balance: set how to balance datasets
    :param float pca_coef: select PCA coef or None
    :param int nb_classif_search: number of tries for hyper-parameters seach
    :param int nb_jobs: parallelism
    :return:
    """
    logging.info('TRAIN Superpixels-Features-Classifier')
    assert len(list_images) == len(list_annots), \
        'size of images (%i) and annotations (%i) should match' \
        % (len(list_images), len(list_annots))

    list_slic, list_features, list_labels = list(), list(), list()
    wrapper_compute = partial(wrapper_compute_color2d_slic_features_labels,
                              clr_space=clr_space,
                              sp_size=sp_size,
                              sp_regul=sp_regul,
                              dict_features=dict_features,
                              label_purity=label_purity)
    list_imgs_annot = zip(list_images, list_annots)
    iterate = tl_expt.WrapExecuteSequence(wrapper_compute,
                                          list_imgs_annot,
                                          nb_jobs=nb_jobs)
    for slic, fts, lbs in iterate:
        list_slic.append(slic)
        list_features.append(fts)
        list_labels.append(lbs)

    # for img, annot in zip(list_images, list_annots):
    #     assert img.shape[:2] == annot.shape[:2]
    #     slic, features = compute_color2d_superpixels_features(img, clr_space,
    #                                                           sp_size, sp_regul,
    #                                                           dict_features,
    #                                                           fts_norm=False)
    #     list_slic.append(slic)
    #     list_features.append(features)
    #
    #     label_hist = seg_lbs.histogram_regions_labels_norm(slic, annot)
    #     labels = np.argmax(label_hist, axis=1)
    #     purity = np.max(label_hist, axis=1)
    #     labels[purity < label_purity] = -1
    #     list_labels.append(labels)

    logging.debug('concentrate features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict(zip(range(len(list_features)), list_features)),
        dict(zip(range(len(list_labels)), list_labels)),
        balance_type=feature_balance,
        drop_labels=[-1])
    # drop do not care label whichare -1
    features = np.nan_to_num(features)

    logging.debug('train classifier...')
    # clf_pipeline = seg_clf.create_clf_pipeline(clf_name, pca_coef)
    # clf_pipeline.fit(np.array(features), np.array(labels, dtype=int))

    if len(sizes) > (CROSS_VAL_LEAVE_OUT * 5):
        cv = seg_clf.CrossValidatePSetsOut(sizes,
                                           nb_hold_out=CROSS_VAL_LEAVE_OUT)
    # for small nuber of training images this does not make sence
    else:
        cv = 10

    classif, _ = seg_clf.create_classif_train_export(
        clf_name,
        features,
        labels,
        nb_search_iter=nb_classif_search,
        cross_val=cv,
        nb_jobs=nb_jobs,
        pca_coef=pca_coef)

    return classif, list_slic, list_features, list_labels