def main(params): """ PIPELINE candidate clustering :param {str: any} params: """ with open(os.path.join(params['path_expt'], 'config_clustering.json'), 'w') as fp: json.dump(params, fp) tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS) list_paths = [params[k] for k in ['path_images', 'path_segms', 'path_centers']] df_paths = tl_data.find_files_match_names_across_dirs(list_paths) df_paths.columns = ['path_image', 'path_segm', 'path_points'] df_paths.index = range(1, len(df_paths) + 1) path_cover = os.path.join(params['path_expt'], run_train.NAME_CSV_TRIPLES) df_paths.to_csv(path_cover) logging.info('run clustering...') df_paths_new = pd.DataFrame() _wrapper_clustering = partial(cluster_points_draw_export, params=params, path_out=params['path_expt']) rows = (dict(row) for idx, row in df_paths.iterrows()) iterate = tl_expt.WrapExecuteSequence(_wrapper_clustering, rows, nb_jobs=params['nb_jobs']) for dict_center in iterate: df_paths_new = df_paths_new.append(dict_center, ignore_index=True) df_paths_new.set_index('image', inplace=True) df_paths_new.to_csv(path_cover)
def main(params): """ the main body containgn two approches: 1) segment each image indecently 2) estimate model over whole image sequence and estimate :param {str: ...} params: :return {str: ...}: """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running...') reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD) params = tl_expt.create_experiment_folder( params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT, skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if params['visual']: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) assert os.path.isfile(params['path_train_list']), \ 'missing %s' % params['path_train_list'] dict_segms_gmm, dict_segms_group = {}, {} df_paths = pd.read_csv(params['path_train_list'], index_col=0) paths_img = df_paths['path_image'].tolist() def path_expt(n): return os.path.join(params['path_exp'], n) # Segment as single model per image dict_segms_gmm = experiment_single_gmm(params, paths_img, path_expt(FOLDER_SEGM_GMM), path_expt(FOLDER_SEGM_GMM_VISU)) gc.collect() time.sleep(1) dict_segms_group = experiment_group_gmm(params, paths_img, path_expt(FOLDER_SEGM_GROUP), path_expt(FOLDER_SEGM_GROUP_VISU)) gc.collect() time.sleep(1) df_ars = compare_segms_metric_ars(dict_segms_gmm, dict_segms_group, suffix='_gmm-group') df_ars.to_csv(path_expt(NAME_CSV_ARS_CORES)) logging.info(df_ars.describe()) logging.info('DONE') return params
def evaluate_detection_stage(df_paths, stage, path_info, path_out, nb_jobs=1): """ evaluate center detection for particular list of stages :param df_paths: :param [int] stage: :param str path_info: :param str path_out: :param int nb_jobs: :return DF: """ logging.info('evaluate stages: %s', repr(stage)) str_stage = '-'.join(map(str, stage)) path_csv = os.path.join(path_out, NAME_CSV_ANNOT_STAGE % str_stage) if not os.path.exists(path_csv) or FORCE_RELOAD: df_slices_info = seg_annot.load_info_group_by_slices(path_info, stage) logging.debug('export slices_info to "%s"', path_csv) df_slices_info.to_csv(path_csv) else: logging.debug('loading slices_info from "%s"', path_csv) df_slices_info = pd.read_csv(path_csv, index_col=0) if len(df_slices_info) == 0: return df_paths # df_paths = pd.merge(df_paths, df_slices_info, how='inner', # left_index=True, right_index=True) df_eval = pd.DataFrame() path_annot = os.path.join(path_out, FOLDER_ANNOT % str_stage) path_visu = os.path.join(path_out, FOLDER_ANNOT_VISUAL % str_stage) list_dirs = [os.path.basename(p) for p in [path_annot, path_visu]] logging.debug('create sub-dirs: %s', repr(list_dirs)) tl_expt.create_subfolders(path_out, list_dirs) # perfom on new images stage_prefix = '[stage-%s] ' % str_stage logging.info('start section %s - load_center_evaluate ...', stage_prefix) wrapper_detection = partial(load_center_evaluate, df_annot=df_slices_info, path_annot=path_annot, path_visu=path_visu, col_prefix=stage_prefix) iterate = tl_expt.WrapExecuteSequence(wrapper_detection, df_paths.iterrows(), nb_jobs=nb_jobs) for dict_eval in iterate: df_eval = df_eval.append(dict_eval, ignore_index=True) df_eval.to_csv(os.path.join(path_out, NAME_CSV_TRIPLES_TEMP)) # gc.collect(), time.sleep(1) return df_eval
def main(params): """ PIPELINE for new detections :param {str: str} paths: """ logging.info('running...') params = run_train.prepare_experiment_folder(params, FOLDER_EXPERIMENT) # run_train.check_pathes_patterns(paths) tl_expt.set_experiment_logger(params['path_expt']) logging.info('COMPUTER: \n%s', repr(os.uname())) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_expt'], LIST_SUBFOLDER) path_csv = os.path.join(params['path_expt'], NAME_CSV_TRIPLES) df_paths = get_csv_triplets(params['path_list'], path_csv, params['path_images'], params['path_segms'], force_reload=FORCE_RERUN) dict_classif = seg_clf.load_classifier(params['path_classif']) params_clf = dict_classif['params'] params_clf.update(params) logging.info(tl_expt.string_dict(params, desc='UPDATED PARAMETERS')) # perform on new images df_stat = pd.DataFrame() wrapper_detection = partial(load_compute_detect_centers, params=params_clf, path_classif=params['path_classif'], path_output=params['path_expt']) iterate = tl_expt.WrapExecuteSequence(wrapper_detection, df_paths.iterrows(), nb_jobs=params['nb_jobs']) for dict_center in iterate: df_stat = df_stat.append(dict_center, ignore_index=True) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES_TEMP)) df_stat.set_index(['image'], inplace=True) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES)) logging.info('STATISTIC: \n %s', repr(df_stat.describe())) logging.info('DONE')
def main(params, debug_export=DEBUG_EXPORT): """ the main entry point :param {str: ...} params: segmentation parameters :param bool debug_export: whether export visualisations """ logging.getLogger().setLevel(logging.DEBUG) params = tl_expt.create_experiment_folder( params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) # tl_expt.create_subfolders(params['path_exp'], [FOLDER_IMAGE]) df_paths = pd.read_csv(params['path_list'], index_col=0) logging.info('loaded %i items with columns: %s', len(df_paths), repr(df_paths.columns.tolist())) df_paths.dropna(how='any', inplace=True) # create sub-folders if required tl_expt.create_subfolders(params['path_exp'], ['input', 'simple']) dict_segment = create_dict_segmentation(params, None, None, None, None) tl_expt.create_subfolders(params['path_exp'], [n for n in dict_segment] + [n + DIR_CENTRE_POSIX for n in dict_segment] + [n + DIR_VISUAL_POSIX for n in dict_segment]) if debug_export: list_dirs = [n + DIR_DEBUG_POSIX for n in dict_segment if 'rg2sp' in n] tl_expt.create_subfolders(params['path_exp'], list_dirs) _wrapper_segment = partial(image_segmentation, params=params) iterate = tl_expt.WrapExecuteSequence(_wrapper_segment, df_paths.iterrows(), nb_jobs=params['nb_jobs']) list(iterate)
def main(dict_paths, export_visual=EXPORT_VUSIALISATION, nb_jobs=NB_THREADS): """ evaluate all segmentations in experiment folder :param {str: str} paths: path to all required directories :param bool export_visual: export visualisations :param int nb_jobs: number threads in parralel """ logging.info('running in %i jobs...', nb_jobs) logging.info(tl_expt.string_dict(dict_paths, desc='PATHS')) list_results = sorted(glob.glob(os.path.join(dict_paths['results'], '*'))) list_results = sorted([ p for p in list_results if os.path.isdir(p) and '___' not in os.path.basename(p) and os.path.basename(p) not in SKIP_DIRS ]) tl_expt.create_subfolders( dict_paths['results'], [NAME_DIR_VISUAL_1, NAME_DIR_VISUAL_2, NAME_DIR_VISUAL_3]) df_all = pd.DataFrame() wrapper_eval = partial(evaluate_folder, dict_paths=dict_paths, export_visual=export_visual) iterate = tl_expt.WrapExecuteSequence(wrapper_eval, list_results, nb_jobs=nb_jobs) for dict_eval in iterate: df_all = df_all.append(dict_eval, ignore_index=True) df_all.set_index(['method'], inplace=True) df_all.sort_index(inplace=True) logging.info('STATISTIC: \n %s', repr(df_all)) df_all.to_csv( os.path.join(dict_paths['results'], NAME_CSV_STAT % 'OVERALL')) logging.info('Done :]')
def main_train(params): """ PIPELINE for training 0) load triplets or create triplets from path to images, annotations 1) load precomputed data or compute them now 2) train classifier with hyper-parameters 3) perform Leave-One-Out experiment :param {str: any} params: """ logging.info('run TRAINING...') params = prepare_experiment_folder(params, FOLDER_EXPERIMENT) tl_expt.set_experiment_logger(params['path_expt']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) with open(os.path.join(params['path_expt'], NAME_JSON_PARAMS), 'w') as f: json.dump(params, f) tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS) df_paths, _ = load_df_paths(params) path_dump_data = os.path.join(params['path_expt'], NAME_DUMP_TRAIN_DATA) if not os.path.isfile(path_dump_data) or FORCE_RECOMP_DATA: dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, \ dict_features, dict_labels, feature_names = \ dataset_load_images_segms_compute_features(params, df_paths, params['nb_jobs']) assert len(dict_imgs) > 0, 'missing images' save_dump_data(path_dump_data, dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names) else: dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, \ dict_labels, feature_names = load_dump_data(path_dump_data) if is_drawing(params['path_expt']) and EXPORT_TRAINING_DATA: export_dataset_visual(params['path_expt'], dict_imgs, dict_segms, dict_slics, dict_points, dict_labels, params['nb_jobs']) # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, drop_labels=[-1], balance_type=params['balance']) # remove all bad values from features space features[np.isnan(features)] = 0 features[np.isinf(features)] = -1 assert np.sum(sizes) == len(labels), \ 'not equal sizes (%d) and labels (%i)' \ % (int(np.sum(sizes)), len(labels)) # feature norm & train classification nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH)) cv = seg_clf.CrossValidatePSetsOut(sizes, nb_holdout) classif, params['path_classif'] = seg_clf.create_classif_train_export( params['classif'], features, labels, cross_val=cv, params=params, feature_names=feature_names, nb_search_iter=params['nb_classif_search'], pca_coef=params.get('pca_coef', None), nb_jobs=params['nb_jobs'], path_out=params['path_expt']) nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_EVAL)) cv = seg_clf.CrossValidatePSetsOut(sizes, nb_holdout) seg_clf.eval_classif_cross_val_scores(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_expt']) seg_clf.eval_classif_cross_val_roc(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_expt']) if RUN_LEAVE_ONE_OUT: experiment_loo(classif, dict_imgs, dict_segms, dict_centers, dict_slics, dict_points, dict_features, feature_names) logging.info('DONE')
def main(params): """ the main body containgn two approches: 1) segment each image indecently 2) estimate model over whole image sequence and estimate :param {str: ...} params: :return {str: ...}: """ logging.getLogger().setLevel(logging.DEBUG) show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=params.get( 'unique', EACH_UNIQUE_EXPERIMENT), skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) paths_img = load_path_images(params) assert len(paths_img) > 0, 'missing images' def _path_expt(n): return os.path.join(params['path_exp'], n) # Segment as single model per image path_visu = _path_expt(FOLDER_SEGM_GMM_VISU) if show_visual else None dict_segms_gmm = experiment_single_gmm(params, paths_img, _path_expt(FOLDER_SEGM_GMM), path_visu, show_debug_imgs=show_visual) gc.collect() time.sleep(1) # Segment as model ober set of images if params.get('run_groupGMM', False): path_visu = _path_expt(FOLDER_SEGM_GROUP_VISU) if show_visual else None dict_segms_group = experiment_group_gmm(params, paths_img, _path_expt(FOLDER_SEGM_GROUP), path_visu, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_SEGM_GROUP)) # write_skip_file(_path_expt(FOLDER_SEGM_GROUP_VISU)) dict_segms_group = None if dict_segms_group is not None: df_ars = compare_segms_metric_ars(dict_segms_gmm, dict_segms_group, suffix='_gmm-group') df_ars.to_csv(_path_expt(NAME_CSV_ARS_CORES)) logging.info(df_ars.describe()) return params
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return{str: ...} : """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT, skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if params['visual']: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = {n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot} df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) for name in dict_labels: weights = np.max(dict_label_hist[name], axis=1) dict_labels[name][weights < params['label_purity']] = -1 logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = max(1, int(round(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH))) params, classif, path_classif = load_train_classifier(params, features, labels, feature_names, sizes, nb_holdout) # test classif on images df_paths = pd.read_csv(params['path_train_list'], index_col=0) paths_img = df_paths['path_image'].tolist() perform_predictions(params, paths_img, classif) # LEAVE ONE OUT if RUN_CROSS_VAL_LOO: df_stat = experiment_loo(params, df_stat, dict_annot, paths_img, path_classif, path_dump) # LEAVE P OUT if RUN_CROSS_VAL_LPO: df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img, path_classif, path_dump, nb_holdout) logging.info('training DONE') return params
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return {str: ...}: """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params.get('path_config', '')) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=params.get( 'unique', EACH_UNIQUE_EXPERIMENT), skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params, show_visual) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = { n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot } df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \ if show_visual else None dict_labels = filter_train_with_purity(dict_imgs, dict_labels, dict_label_hist, params['label_purity'], dict_slics, drop_labels=params.get( 'drop_labels', None), path_visu=path_purity_visu, nb_jobs=params['nb_jobs']) logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1, np.nan] + params.get('drop_labels', [])) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH) nb_holdout = max(1, int(round(len(sizes) * nb_holdout))) # minimum is 1 nb_holdout = min(nb_holdout, int(len(sizes) / 2)) # max is half of the set params, classif, path_classif = load_train_classifier( params, features, labels, feature_names, sizes, nb_holdout) def _path_expt(n): return os.path.join(params['path_exp'], n) # test classif. on training images df_paths = pd.read_csv(params['path_train_list'], index_col=0) df_paths.reset_index(inplace=True) paths_img = df_paths['path_image'].tolist() if RUN_TRAIN_PREDICT: perform_train_predictions(params, paths_img, classif, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_TRAIN)) gc.collect() time.sleep(1) # LEAVE P OUT if params.get('run_LPO', True): idx_paths_img = list( zip(df_paths.index.tolist(), df_paths['path_image'].tolist())) df_stat = experiment_lpo(params, df_stat, dict_annot, idx_paths_img, path_classif, path_dump, nb_holdout, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_LPO)) # write_skip_file(_path_expt(FOLDER_LPO_VISU)) logging.info('Statistic: \n %s', repr(df_stat.describe())) logging.info('training DONE') return params