def retrain_loo_segment_image(imgs_idx_path, path_classif, path_dump, path_out, path_visu): """ load the classifier, and dumped data, subtract the image, retrain the classif. without it and do the segmentation :param str path_img: path to input image :param str path_classif: path to saved classifier :param str path_dump: path to dumped data :param, str path_out: path to segmentation outputs :return (str, ndarray, ndarray): """ idx, path_img = parse_imgs_idx_path(imgs_idx_path) dict_imgs, _, _, dict_features, dict_labels, _, _ = \ load_dump_data(path_dump) dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] idx_name = get_idx_name(idx, path_img) for d in [dict_features, dict_labels]: _ = d.pop(idx_name, None) assert (len(dict_imgs) - len(dict_features)) == 1, \ 'no image was dropped from training set' features, labels, _ = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) classif.fit(features, labels) idx_name, segm, segm_gc = segment_image(imgs_idx_path, params, classif, path_out, path_visu) # gc.collect(), time.sleep(1) return idx_name, segm, segm_gc
def retrain_lpo_segment_image(list_imgs_idx_path, path_classif, path_dump, path_out, path_visu, show_debug_imgs=SHOW_DEBUG_IMAGES): """ load the classifier, and dumped data, subtract the image, retrain the classif without it and do the segmentation :param list(str) list_imgs_idx_path: path to input image :param str path_classif: path to saved classifier :param str path_dump: path to dumped data :param, str path_out: path to segmentation outputs :param bool show_debug_imgs: whether show debug images :return (str, ndarray, ndarray): """ dict_imgs, _, _, dict_features, dict_labels, _, _ = load_dump_data( path_dump) dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] for idx, path_img in list_imgs_idx_path: idx_name = get_idx_name(idx, path_img) _ = dict_features.pop(idx_name, None) _ = dict_labels.pop(idx_name, None) if (len(dict_imgs) - len(dict_features)) != len(list_imgs_idx_path): raise ValueError( 'subset of %i images was not dropped, training set %i from total %i' % (len(list_imgs_idx_path), len(dict_features), len(dict_imgs))) features, labels, _ = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1, np.nan] + params.get('drop_labels', [])) classif.fit(features, labels) dict_segm, dict_segm_gc = {}, {} for imgs_idx_path in list_imgs_idx_path: idx_name, segm, segm_gc = segment_image( imgs_idx_path, params, classif, path_out, path_visu, show_debug_imgs=show_debug_imgs) dict_segm[idx_name] = segm dict_segm_gc[idx_name] = segm_gc gc.collect() time.sleep(1) return dict_segm, dict_segm_gc
def main_train(params): """ PIPELINE for training 0) load triplets or create triplets from path to images, annotations 1) load precomputed data or compute them now 2) train classifier with hyper-parameters 3) perform Leave-One-Out experiment :param {str: any} params: """ params = prepare_experiment_folder(params, FOLDER_EXPERIMENT) tl_expt.set_experiment_logger(params['path_expt']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.save_config_yaml( os.path.join(params['path_expt'], NAME_YAML_PARAMS), params) tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS) df_paths, _ = load_df_paths(params) path_dump_data = os.path.join(params['path_expt'], NAME_DUMP_TRAIN_DATA) if not os.path.isfile(path_dump_data) or FORCE_RECOMP_DATA: (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names) = \ dataset_load_images_segms_compute_features(params, df_paths, params['nb_workers']) assert len(dict_imgs) > 0, 'missing images' save_dump_data( path_dump_data, dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names, ) else: (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names) = load_dump_data(path_dump_data) if is_drawing(params['path_expt']) and EXPORT_TRAINING_DATA: export_dataset_visual(params['path_expt'], dict_imgs, dict_segms, dict_slics, dict_points, dict_labels, params['nb_workers']) # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, drop_labels=[-1], balance_type=params['balance']) # remove all bad values from features space features[np.isnan(features)] = 0 features[np.isinf(features)] = -1 assert np.sum(sizes) == len(labels), \ 'not equal sizes (%d) and labels (%i)' \ % (int(np.sum(sizes)), len(labels)) # feature norm & train classification nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH)) cv = seg_clf.CrossValidateGroups(sizes, nb_holdout) classif, params[ 'path_classif'] = seg_clf.create_classif_search_train_export( params['classif'], features, labels, cross_val=cv, params=params, feature_names=feature_names, nb_search_iter=params['nb_classif_search'], pca_coef=params.get('pca_coef', None), nb_workers=params['nb_workers'], path_out=params['path_expt'], ) nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_EVAL)) cv = seg_clf.CrossValidateGroups(sizes, nb_holdout) seg_clf.eval_classif_cross_val_scores(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_expt']) seg_clf.eval_classif_cross_val_roc(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_expt']) if RUN_LEAVE_ONE_OUT: experiment_loo(classif, dict_imgs, dict_segms, dict_centers, dict_slics, dict_points, dict_features, feature_names, params)
def train_classif_color2d_slic_features(list_images, list_annots, dict_features, sp_size=30, sp_regul=0.2, clf_name=CLASSIF_NAME, label_purity=0.9, feature_balance='unique', pca_coef=None, nb_classif_search=1, nb_hold_out=CROSS_VAL_LEAVE_OUT, nb_workers=1): """ train classifier on list of annotated images :param [ndarray] list_images: :param [ndarray] list_annots: :param int sp_size: initial size of a superpixel(meaning edge lenght) :param float sp_regul: regularisation in range(0;1) where "0" gives elastic and "1" nearly square segments :param dict(list(str)) dict_features: list of features to be extracted :param str clf_name: selet udsed classifier :param float label_purity: set the sample-labels purity for training :param str feature_balance: set how to balance datasets :param float pca_coef: select PCA coef or None :param int nb_classif_search: number of tries for hyper-parameters seach :param int nb_hold_out: cross-val leave out :param int nb_workers: parallelism :return: """ logging.info('TRAIN Superpixels-Features-Classifier') assert len(list_images) == len(list_annots), \ 'size of images (%i) and annotations (%i) should match' \ % (len(list_images), len(list_annots)) list_slic, list_features, list_labels = list(), list(), list() _wrapper_compute = partial(wrapper_compute_color2d_slic_features_labels, sp_size=sp_size, sp_regul=sp_regul, dict_features=dict_features, label_purity=label_purity) list_imgs_annot = zip(list_images, list_annots) iterate = WrapExecuteSequence(_wrapper_compute, list_imgs_annot, desc='compute SLIC & features & labels', nb_workers=nb_workers) for slic, fts, lbs in iterate: list_slic.append(slic) list_features.append(fts) list_labels.append(lbs) logging.debug('concentrate features...') # concentrate features, labels features, labels, sizes = convert_set_features_labels_2_dataset( dict(zip(range(len(list_features)), list_features)), dict(zip(range(len(list_labels)), list_labels)), balance_type=feature_balance, drop_labels=[-1]) # drop do not care label whichare -1 features = np.nan_to_num(features) logging.debug('train classifier...') # clf_pipeline = seg_clf.create_clf_pipeline(clf_name, pca_coef) # clf_pipeline.fit(np.array(features), np.array(labels, dtype=int)) if len(sizes) > (nb_hold_out * 5): cv = CrossValidateGroups(sizes, nb_hold_out=nb_hold_out) # for small nuber of training images this does not make sence else: cv = 10 classif, _ = create_classif_search_train_export( clf_name, features, labels, pca_coef=pca_coef, cross_val=cv, nb_search_iter=nb_classif_search, nb_workers=nb_workers) return classif, list_slic, list_features, list_labels
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return {str: ...}: """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params.get('path_config', '')) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=params.get( 'unique', EACH_UNIQUE_EXPERIMENT), skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params, show_visual) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = { n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot } df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \ if show_visual else None dict_labels = filter_train_with_purity(dict_imgs, dict_labels, dict_label_hist, params['label_purity'], dict_slics, drop_labels=params.get( 'drop_labels', None), path_visu=path_purity_visu, nb_jobs=params['nb_jobs']) logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1, np.nan] + params.get('drop_labels', [])) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH) nb_holdout = max(1, int(round(len(sizes) * nb_holdout))) # minimum is 1 nb_holdout = min(nb_holdout, int(len(sizes) / 2)) # max is half of the set params, classif, path_classif = load_train_classifier( params, features, labels, feature_names, sizes, nb_holdout) def _path_expt(n): return os.path.join(params['path_exp'], n) # test classif. on training images df_paths = pd.read_csv(params['path_train_list'], index_col=0) df_paths.reset_index(inplace=True) paths_img = df_paths['path_image'].tolist() if RUN_TRAIN_PREDICT: perform_train_predictions(params, paths_img, classif, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_TRAIN)) gc.collect() time.sleep(1) # LEAVE P OUT if params.get('run_LPO', True): idx_paths_img = list( zip(df_paths.index.tolist(), df_paths['path_image'].tolist())) df_stat = experiment_lpo(params, df_stat, dict_annot, idx_paths_img, path_classif, path_dump, nb_holdout, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_LPO)) # write_skip_file(_path_expt(FOLDER_LPO_VISU)) logging.info('Statistic: \n %s', repr(df_stat.describe())) logging.info('training DONE') return params
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return{str: ...} : """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT, skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if params['visual']: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = {n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot} df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) for name in dict_labels: weights = np.max(dict_label_hist[name], axis=1) dict_labels[name][weights < params['label_purity']] = -1 logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = max(1, int(round(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH))) params, classif, path_classif = load_train_classifier(params, features, labels, feature_names, sizes, nb_holdout) # test classif on images df_paths = pd.read_csv(params['path_train_list'], index_col=0) paths_img = df_paths['path_image'].tolist() perform_predictions(params, paths_img, classif) # LEAVE ONE OUT if RUN_CROSS_VAL_LOO: df_stat = experiment_loo(params, df_stat, dict_annot, paths_img, path_classif, path_dump) # LEAVE P OUT if RUN_CROSS_VAL_LPO: df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img, path_classif, path_dump, nb_holdout) logging.info('training DONE') return params
def train_classif_color2d_slic_features(list_images, list_annots, clr_space='rgb', sp_size=30, sp_regul=0.2, dict_features=FTS_SET_SIMPLE, clf_name=CLASSIF_NAME, label_purity=0.9, feature_balance='unique', pca_coef=None, nb_classif_search=1, nb_jobs=1): """ train classifier on list of annotated images :param [ndarray] list_images: :param [ndarray] list_annots: :param str clr_space: chose the color space :param int sp_size: initial size of a superpixel(meaning edge lenght) :param float sp_regul: regularisation in range(0;1) where "0" gives elastic and "1" nearly square segments :param {str: [str]} dict_features: list of features to be extracted :param str clf_name: selet udsed classifier :param float label_purity: set the sample-labels purity for training :param str feature_balance: set how to balance datasets :param float pca_coef: select PCA coef or None :param int nb_classif_search: number of tries for hyper-parameters seach :param int nb_jobs: parallelism :return: """ logging.info('TRAIN Superpixels-Features-Classifier') assert len(list_images) == len(list_annots), \ 'size of images (%i) and annotations (%i) should match' \ % (len(list_images), len(list_annots)) list_slic, list_features, list_labels = list(), list(), list() wrapper_compute = partial(wrapper_compute_color2d_slic_features_labels, clr_space=clr_space, sp_size=sp_size, sp_regul=sp_regul, dict_features=dict_features, label_purity=label_purity) list_imgs_annot = zip(list_images, list_annots) iterate = tl_expt.WrapExecuteSequence(wrapper_compute, list_imgs_annot, nb_jobs=nb_jobs) for slic, fts, lbs in iterate: list_slic.append(slic) list_features.append(fts) list_labels.append(lbs) # for img, annot in zip(list_images, list_annots): # assert img.shape[:2] == annot.shape[:2] # slic, features = compute_color2d_superpixels_features(img, clr_space, # sp_size, sp_regul, # dict_features, # fts_norm=False) # list_slic.append(slic) # list_features.append(features) # # label_hist = seg_lbs.histogram_regions_labels_norm(slic, annot) # labels = np.argmax(label_hist, axis=1) # purity = np.max(label_hist, axis=1) # labels[purity < label_purity] = -1 # list_labels.append(labels) logging.debug('concentrate features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict(zip(range(len(list_features)), list_features)), dict(zip(range(len(list_labels)), list_labels)), balance_type=feature_balance, drop_labels=[-1]) # drop do not care label whichare -1 features = np.nan_to_num(features) logging.debug('train classifier...') # clf_pipeline = seg_clf.create_clf_pipeline(clf_name, pca_coef) # clf_pipeline.fit(np.array(features), np.array(labels, dtype=int)) if len(sizes) > (CROSS_VAL_LEAVE_OUT * 5): cv = seg_clf.CrossValidatePSetsOut(sizes, nb_hold_out=CROSS_VAL_LEAVE_OUT) # for small nuber of training images this does not make sence else: cv = 10 classif, _ = seg_clf.create_classif_train_export( clf_name, features, labels, nb_search_iter=nb_classif_search, cross_val=cv, nb_jobs=nb_jobs, pca_coef=pca_coef) return classif, list_slic, list_features, list_labels