def parse_params(default_params, methods): """ parse arguments from command line :param dict default_params: :param list(str) methods: list of possible methods :return dict: """ parser = create_args_parser(default_params, methods) params = copy.deepcopy(default_params) arg_params = parse_arg_params(parser) params.update(arg_params) # if YAML config exists update configuration if arg_params.get('path_config', None) is not None \ and os.path.isfile(arg_params['path_config']): logging.info('loading config: %s', arg_params['path_config']) d_config = load_config_yaml(arg_params['path_config']) logging.debug(string_dict(d_config, desc='LOADED CONFIG:')) # skip al keys with path or passed from arg params d_update = { k: d_config[k] for k in d_config if k not in arg_params or arg_params[k] is None } logging.debug(string_dict(d_update, desc='TO BE UPDATED:')) params.update(d_update) return params
def main_predict(path_classif, path_pattern_imgs, path_out, name='SEGMENT___', params_local=None): """ given trained classifier segment new images :param str path_classif: :param str path_pattern_imgs: :param str path_out: :param str name: """ logging.getLogger().setLevel(logging.INFO) logging.info('running PREDICTION...') assert path_pattern_imgs is not None dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] if params_local is not None: params.update({ k: params_local[k] for k in params_local if k.startswith('path_') or k.startswith('gc_') }) path_out, path_visu = prepare_output_dir(path_pattern_imgs, path_out, name, visual=params.get( 'visual', False)) tl_expt.set_experiment_logger(path_out) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) paths_img = sorted(glob.glob(path_pattern_imgs)) logging.info('found %i images on path "%s"', len(paths_img), path_pattern_imgs) logging.debug('run prediction...') show_debug_imgs = params.get('visual', False) _wrapper_segment = partial( try_segment_image, params=params, classif=classif, path_out=path_out, path_visu=path_visu, show_debug_imgs=show_debug_imgs, ) list_img_path = list(zip([None] * len(paths_img), paths_img)) iterate = tl_expt.WrapExecuteSequence( _wrapper_segment, list_img_path, nb_workers=params['nb_workers'], desc='segmenting images', ) for _ in iterate: gc.collect() time.sleep(1) logging.info('prediction DONE')
def _estimate_atlas_weights(self, images, params): """ set all params and run the atlas estimation in try mode :param ndarray images: np.array<w, h> :param dict params: :return tuple(ndarray,ndarray,dict): """ logging.debug(' -> estimate atlas...') logging.debug(string_dict(params, desc='PARAMETERS')) init_atlas = self._init_atlas(params['nb_labels'] - 1, params['init_tp'], self._images) # prefix = 'expt_{}'.format(p['init_tp']) path_out = os.path.join(params['path_exp'], 'debug' + params['name_suffix']) bpdl_params = {k: params[k] for k in params if k in LIST_BPDL_PARAMS} bpdl_params['deform_coef'] = params.get('deform_coef', None) atlas, weights, deforms = bpdl_pipeline(images, init_atlas=init_atlas, out_dir=path_out, **bpdl_params) assert atlas.max() == weights.shape[1], \ 'atlas max=%i and dict=%i' % (int(atlas.max()), weights.shape[1]) extras = {'deforms': deforms} return atlas, weights, extras
def parse_experiments(params): """ with specific input parameters wal over result folder and parse it :param dict params: :return: DF<nb_experiments, nb_info> """ logging.info('running parse Experiments results') logging.info(string_dict(params, desc='ARGUMENTS:')) assert os.path.isdir(params['path']), 'missing "%s"' % params['path'] nb_workers = params.get('nb_workers', NB_WORKERS) df_all = pd.DataFrame() path_dirs = [ p for p in glob.glob(os.path.join(params['path'], '*')) if os.path.isdir(p) ] logging.info('found experiments: %i', len(path_dirs)) _wrapper_parse_folder = partial(parse_experiment_folder, params=params) for df_folder in WrapExecuteSequence(_wrapper_parse_folder, path_dirs, nb_workers): df_all = append_df_folder(df_all, df_folder) if isinstance(params['name_results'], list): name_results = '_'.join( os.path.splitext(n)[0] for n in params['name_results']) else: name_results = os.path.splitext(params['name_results'])[0] df_all.reset_index(inplace=True) path_csv = os.path.join(params['path'], TEMPLATE_NAME_OVERALL_RESULT % name_results) logging.info('export results as %s', path_csv) df_all.to_csv(path_csv, index=False) return df_all
def __init__(self, params, time_stamp=True): """ initialise class and set the experiment parameters :param dict params: :param bool time_stamp: mark if you want an unique folder per experiment """ assert all([n in params for n in self.REQUIRED_PARAMS]), \ 'missing some required parameters' params = simplify_params(params) if 'name' not in params: dataset_name = params['dataset'] if isinstance(dataset_name, list): dataset_name = dataset_name[0] last_dir = os.path.basename(params['path_in']) params['name'] = '{}_{}_{}'.format(params.get('type', ''), last_dir, dataset_name) params['method'] = repr(self.__class__.__name__) if not os.path.exists(params['path_out']): os.mkdir(params['path_out']) # use constructor of parent class super(Experiment, self).__init__(params, time_stamp) self.df_results = pd.DataFrame() self._path_stat = os.path.join(self.params.get('path_exp'), RESULTS_TXT) self._list_img_paths = None # self.params.export_as(self._path_stat) with open(self._path_stat, 'w') as fp: fp.write(string_dict(self.params, desc='PARAMETERS:'))
def main(params): """ PIPELINE for new detections :param dict params: """ params['path_expt'] = os.path.join( params['path_output'], run_detect.FOLDER_EXPERIMENT % params['name']) tl_expt.set_experiment_logger(params['path_expt']) # tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) path_csv = os.path.join(params['path_expt'], NAME_CSV_TRIPLES) df_paths = run_detect.get_csv_triplets(params['path_list'], path_csv, params['path_images'], params['path_segms'], params['path_centers'], FORCE_RELOAD) df_eval = df_paths.copy(deep=True) for stage in params['stages']: df_eval = evaluate_detection_stage(df_eval, stage, params['path_infofile'], params['path_expt'], params['nb_workers']) if not df_eval.empty and 'image' in df_eval.columns: df_eval.set_index('image', inplace=True) df_eval.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES_STAT)) gc.collect() time.sleep(1) if not df_eval.empty: df_stat = df_eval.describe().transpose() logging.info('STATISTIC: \n %r', df_stat) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_STATISTIC))
def main(params): """ the main body containgn two approches: 1) segment each image indecently 2) estimate model over whole image sequence and estimate :param dict params: :return dict: """ logging.getLogger().setLevel(logging.DEBUG) show_visual = params.get('visual', False) reload_dir_config = os.path.isfile(params['path_config']) or FORCE_RELOAD stamp_unique = params.get('unique', EACH_UNIQUE_EXPERIMENT) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=stamp_unique, skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) paths_img = load_path_images(params) assert paths_img, 'missing images' def _path_expt(n): return os.path.join(params['path_exp'], n) # Segment as single model per image path_visu = _path_expt(FOLDER_SEGM_GMM_VISU) if show_visual else None dict_segms_gmm = experiment_single_gmm(params, paths_img, _path_expt(FOLDER_SEGM_GMM), path_visu, show_debug_imgs=show_visual) gc.collect() time.sleep(1) # Segment as model ober set of images if params.get('run_groupGMM', False): path_visu = _path_expt(FOLDER_SEGM_GROUP_VISU) if show_visual else None dict_segms_group = experiment_group_gmm(params, paths_img, _path_expt(FOLDER_SEGM_GROUP), path_visu, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_SEGM_GROUP)) # write_skip_file(_path_expt(FOLDER_SEGM_GROUP_VISU)) dict_segms_group = None if dict_segms_group is not None: df_ars = compare_segms_metric_ars(dict_segms_gmm, dict_segms_group, suffix='_gmm-group') df_ars.to_csv(_path_expt(NAME_CSV_ARS_CORES)) logging.info(df_ars.describe()) return params
def main(params): """ PIPELINE for matching :param {str: str} params: """ logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) df_info = pd.read_csv(params['path_infofile'], sep='\t', index_col=0) df_info = filter_table(df_info, params['path_ellipses']) logging.info('filtered %i item in table' % len(df_info)) path_csv = os.path.join(params['path_output'], NAME_CSV_RESULTS) list_evals = [] # get the folder path_dir_csv = os.path.dirname(params['path_ellipses']) _wrapper_match = partial(select_optimal_ellipse, path_dir_csv=path_dir_csv) iterate = tl_expt.WrapExecuteSequence(_wrapper_match, df_info.iterrows(), nb_workers=params['nb_workers']) for i, dict_row in enumerate(iterate): list_evals.append(dict_row) # every hundreds iteration do export if i % 100 == 0: pd.DataFrame(list_evals).to_csv(path_csv) df_ellipses = pd.DataFrame(list_evals) df_ellipses.to_csv(path_csv)
def main(paths, nb_workers=NB_WORKERS, segm_alpha=MIDDLE_ALPHA_OVERLAP): logging.info('running...') if paths['segms'] == paths['output']: raise RuntimeError('overwriting segmentation dir') if os.path.basename(paths['images']) == paths['output']: raise RuntimeError('overwriting image dir') logging.info(tl_expt.string_dict(paths, desc='PATHS')) if not os.path.exists(paths['output']): dir_out = os.path.dirname(paths['output']) if not os.path.isdir(dir_out): raise FileNotFoundError('missing folder: %s' % dir_out) os.mkdir(paths['output']) paths_imgs = glob.glob(paths['images']) logging.info('found %i images in dir "%s"', len(paths_imgs), paths['images']) _warped_overlap = partial(perform_visu_overlap, paths=paths, segm_alpha=segm_alpha) created = [] iterate = tl_expt.WrapExecuteSequence(_warped_overlap, paths_imgs, nb_workers=nb_workers, desc='overlapping') for r in iterate: created.append(r) logging.info('matched and created %i overlaps', np.sum(created)) logging.info('DONE')
def parse_arg_params(): """ create simple arg parser with default values (input, results, dataset) :return obj: argparse """ parser = argparse.ArgumentParser() parser.add_argument('-imgs', '--path_images', type=str, required=True, help='path to dir with annot', default=PATH_IMAGES) parser.add_argument('--label', type=int, required=False, nargs='+', help='labels to be replaced', default=[-1]) parser.add_argument('--nb_workers', type=int, required=False, help='number of jobs in parallel', default=NB_WORKERS) args = vars(parser.parse_args()) p_dir = tl_data.update_path(os.path.dirname(args['path_images'])) assert os.path.isdir(p_dir), 'missing folder: %s' % args['path_images'] args['path_images'] = os.path.join(p_dir, os.path.basename(args['path_images'])) logging.info(tl_expt.string_dict(args, desc='ARG PARAMETERS')) return args
def main(params): """ PIPELINE for rotation :param {str: str} params: """ logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) list_img_paths = sorted( [p for p in glob.glob(params['path_images']) if os.path.isfile(p)]) logging.info('found images: %i' % len(list_img_paths)) if not os.path.isdir(params['path_output']): os.mkdir(params['path_output']) img_mean = compute_mean_image(list_img_paths) _wrapper_object = partial(perform_orientation_swap, path_out=params['path_output'], img_template=img_mean) dir_name = os.path.dirname(params['path_images']) iterate = tl_expt.WrapExecuteSequence(_wrapper_object, list_img_paths, nb_jobs=params['nb_jobs'], desc=dir_name) list(iterate)
def main(params, debug_export=DEBUG_EXPORT): """ the main entry point :param dict params: segmentation parameters :param bool debug_export: whether export visualisations """ logging.getLogger().setLevel(logging.DEBUG) params = tl_expt.create_experiment_folder( params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) # tl_expt.create_subfolders(params['path_exp'], [FOLDER_IMAGE]) df_paths = pd.read_csv(params['path_list'], index_col=0) logging.info('loaded %i items with columns: %r', len(df_paths), df_paths.columns.tolist()) df_paths.dropna(how='any', inplace=True) # create sub-folders if required tl_expt.create_subfolders(params['path_exp'], ['input', 'simple']) dict_segment = create_dict_segmentation(params, None, None, None, None) dirs_center = [n + DIR_CENTRE_POSIX for n in dict_segment] dirs_visu = [n + DIR_VISUAL_POSIX for n in dict_segment] tl_expt.create_subfolders(params['path_exp'], [n for n in dict_segment] + dirs_center + dirs_visu) if debug_export: list_dirs = [n + DIR_DEBUG_POSIX for n in dict_segment if 'rg2sp' in n] tl_expt.create_subfolders(params['path_exp'], list_dirs) _wrapper_segment = partial(image_segmentation, params=params) iterate = tl_expt.WrapExecuteSequence(_wrapper_segment, df_paths.iterrows(), nb_workers=params['nb_workers']) list(iterate)
def main(paths, nb_jobs=NB_THREADS, segm_alpha=MIDDLE_ALPHA_OVERLAP): logging.info('running...') assert paths['segms'] != paths['output'], 'overwriting segmentation dir' assert os.path.basename(paths['images']) != paths['output'], \ 'overwriting image dir' logging.info(tl_expt.string_dict(paths, desc='PATHS')) if not os.path.exists(paths['output']): assert os.path.isdir(os.path.dirname(paths['output'])), \ 'missing folder: %s' % os.path.dirname(paths['output']) os.mkdir(paths['output']) paths_imgs = glob.glob(paths['images']) logging.info('found %i images in dir "%s"', len(paths_imgs), paths['images']) _warped_overlap = partial(perform_visu_overlap, paths=paths, segm_alpha=segm_alpha) created = [] iterate = tl_expt.WrapExecuteSequence(_warped_overlap, paths_imgs, nb_jobs=nb_jobs, desc='overlapping') for r in iterate: created.append(r) logging.info('matched and created %i overlaps', np.sum(created)) logging.info('DONE')
def parse_arg_params(): """ create simple arg parser with default values (input, results, dataset) :return obj: argparse """ parser = argparse.ArgumentParser() parser.add_argument('-imgs', '--path_images', type=str, required=True, help='path to images', default=PATH_IMAGES) parser.add_argument('-out', '--path_output', type=str, required=True, help='path to output dir', default=PATH_OUTPUT) parser.add_argument('--label_old', type=int, required=False, nargs='+', help='labels to be replaced', default=[0]) parser.add_argument('--label_new', type=int, required=False, nargs='+', help='new labels after replacing', default=[0]) parser.add_argument('--nb_workers', type=int, required=False, help='number of jobs in parallel', default=NB_WORKERS) args = vars(parser.parse_args()) for k in ['path_images', 'path_output']: p_dir = tl_data.update_path(os.path.dirname(args[k])) assert os.path.isdir(p_dir), 'missing folder: %s' % args[k] args[k] = os.path.join(p_dir, os.path.basename(args[k])) assert len(args['label_old']) == len(args['label_new']), \ 'length of old (%i) and new (%i) labels should be same' \ % (len(args['label_old']), len(args['label_new'])) logging.info(tl_expt.string_dict(args, desc='ARG PARAMETERS')) return args
def parse_arg_params(): """ create simple arg parser with default values (input, results, dataset) :return obj: argparse """ parser = argparse.ArgumentParser() parser.add_argument( '-imgs', '--path_images', type=str, required=True, help='path to dir with images', default=PATH_IMAGES ) parser.add_argument( '-m', '--method', type=str, required=False, help='method for quantisation color/position', default='color', choices=['color', 'position'] ) parser.add_argument( '-thr', '--px_threshold', type=float, required=False, help='percentage of pixels of a color to be removed', default=THRESHOLD_INVALID_PIXELS ) parser.add_argument('--nb_workers', type=int, required=False, help='number of jobs in parallel', default=NB_WORKERS) args = vars(parser.parse_args()) p_dir = tl_data.update_path(os.path.dirname(args['path_images'])) if not os.path.isdir(p_dir): raise FileNotFoundError('missing folder: %s' % args['path_images']) args['path_images'] = os.path.join(p_dir, os.path.basename(args['path_images'])) logging.info(tl_expt.string_dict(args, desc='ARG PARAMETERS')) return args
def main(dict_paths, export_visual=EXPORT_VUSIALISATION, nb_workers=NB_THREADS): """ evaluate all segmentations in experiment folder :param {str: str} paths: path to all required directories :param bool export_visual: export visualisations :param int nb_workers: number threads in parralel """ logging.info('running in %i jobs...', nb_workers) logging.info(tl_expt.string_dict(dict_paths, desc='PATHS')) list_results = sorted(glob.glob(os.path.join(dict_paths['results'], '*'))) _if_path = lambda p: all((os.path.isdir(p), '___' not in os.path.basename(p), os.path.basename(p) not in SKIP_DIRS)) list_results = sorted([p for p in list_results if _if_path(p)]) tl_expt.create_subfolders(dict_paths['results'], [NAME_DIR_VISUAL_1, NAME_DIR_VISUAL_2, NAME_DIR_VISUAL_3]) df_all = pd.DataFrame() _wrapper_eval = partial(evaluate_folder, dict_paths=dict_paths, export_visual=export_visual) iterate = tl_expt.WrapExecuteSequence(_wrapper_eval, list_results, nb_workers=nb_workers) for dict_eval in iterate: df_all = df_all.append(dict_eval, ignore_index=True) df_all.set_index(['method'], inplace=True) df_all.sort_index(inplace=True) logging.info('STATISTIC: \n %r', df_all) df_all.to_csv(os.path.join(dict_paths['results'], NAME_CSV_STAT % 'OVERALL'))
def main(params): """ PIPELINE for new detections :param {str: str} params: """ params = run_train.prepare_experiment_folder(params, FOLDER_EXPERIMENT) # run_train.check_pathes_patterns(paths) tl_expt.set_experiment_logger(params['path_expt']) logging.info('COMPUTER: \n%r', platform.uname()) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_expt'], LIST_SUBFOLDER) path_csv = os.path.join(params['path_expt'], NAME_CSV_TRIPLES) df_paths = get_csv_triplets(params['path_list'], path_csv, params['path_images'], params['path_segms'], force_reload=FORCE_RERUN) dict_classif = seg_clf.load_classifier(params['path_classif']) params_clf = dict_classif['params'] params_clf.update(params) logging.info(tl_expt.string_dict(params, desc='UPDATED PARAMETERS')) # perform on new images df_stat = pd.DataFrame() _wrapper_detection = partial( load_compute_detect_centers, params=params_clf, path_classif=params['path_classif'], path_output=params['path_expt'], ) iterate = tl_expt.WrapExecuteSequence(_wrapper_detection, df_paths.iterrows(), nb_workers=params['nb_workers']) for dict_center in iterate: df_stat = df_stat.append(dict_center, ignore_index=True) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES_TEMP)) df_stat.set_index(['image'], inplace=True) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES)) logging.info('STATISTIC: \n %r', df_stat.describe())
def main(params): """ process complete list of experiments """ logging.info(string_dict(params, desc='PARAMETERS:')) list_expt = list_experiments(params['path_expt'], params['name_expt']) assert len(list_expt) > 0, 'No experiments found!' params['path_dataset'] = get_path_dataset(params['path_expt']) for name_expt in tqdm.tqdm(list_expt, desc='Experiments'): process_expt_reconstruction(name_expt, path_expt=params['path_expt'], path_dataset=params['path_dataset'], path_imgs=params['path_images'], nb_workers=params['nb_workers'], visual=params['visual']) gc.collect() time.sleep(1)
def experiments_real(params=REAL_PARAMS): """ run all experiments :param dict params: """ params = parse_params(params, LIST_METHODS) logging.info(string_dict(params, desc='PARAMETERS')) iter_params = filter_iterable_params(params) # iter_params = { # 'init_tp': INIT_TYPES_NORM_REAL, # 'ptn_compact': [True, False], # 'gc_regul': GRAPHCUT_REGUL, # 'nb_labels': params['nb_labels'] # } experiment_iterate(params, iter_params, user_gt=False)
def parse_experiments(params): """ with specific input parameters wal over result folder and parse it :param dict params: """ logging.info('running recompute Experiments results') logging.info(string_dict(params, desc='ARGUMENTS:')) assert os.path.exists(params['path']), 'missing "%s"' % params['path'] nb_workers = params.get('nb_workers', NB_WORKERS) path_dirs = [ p for p in glob.glob(os.path.join(params['path'], '*')) if os.path.isdir(p) ] logging.info('found experiments: %i', len(path_dirs)) _wrapper_parse_folder = partial(parse_experiment_folder, params=params) list(WrapExecuteSequence(_wrapper_parse_folder, path_dirs, nb_workers))
def main(params): """ PIPELINE for matching :param {str: str} params: """ # tl_expt.set_experiment_logger(params['path_expt']) # tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) if not os.path.isdir(params['path_output']): os.mkdir(params['path_output']) df_info = pd.read_csv(params['path_infofile'], index_col=0) df_info = r_match.filter_table(df_info, params['path_images']) df_info.dropna(inplace=True) df_info = df_info[df_info['ellipse_Jaccard'] >= OVERLAP_THRESHOLD] logging.info('filtered %i item in table' % len(df_info)) # execute over groups per stage path_dir_imgs = os.path.dirname(params['path_images']) for stage, df_stage in df_info.groupby('stage'): perform_stage(df_stage, stage, path_dir_imgs, params['path_output'])
def run(self, gt=False, iter_params=None): """ the main procedure that load, perform and evaluate experiment :param bool gt: search for the Ground Truth using standard names :param list iter_params: list of possible configuration """ logging.info('perform the complete experiment') # in case it single value make it iterable if is_list_like(iter_params): self.iter_params = copy.deepcopy(iter_params) elif isinstance(iter_params, dict): logging.info(string_dict(iter_params, desc='ITERATE PARAMETERS:')) self.iter_params = expand_params(iter_params) else: self.iter_params = None self._load_data(gt) self._perform() self._summarise() logging.info('FINISHED >]') logging.getLogger().handlers = []
def parse_arg_params(): """ create simple arg parser with default values (input, results, dataset) :return obj: argparse """ parser = argparse.ArgumentParser() parser.add_argument('-imgs', '--path_images', type=str, required=True, help='path to dir with images', default=PATH_INPUT) parser.add_argument('-out', '--path_out', type=str, required=True, help='path to output dir', default=PATH_OUTPUT) parser.add_argument('-clrs', '--path_colors', type=str, required=False, help='json with colour-label dict', default=None) parser.add_argument('--nb_workers', type=int, required=False, help='number of jobs in parallel', default=NB_WORKERS) args = vars(parser.parse_args()) for n in ['path_images', 'path_out']: p_dir = tl_data.update_path(os.path.dirname(args[n])) if not os.path.isdir(p_dir): raise FileNotFoundError('missing: %s' % args[n]) args[n] = os.path.join(p_dir, os.path.basename(args[n])) if args['path_colors'] is not None: args['path_colors'] = tl_data.update_path(args['path_colors']) logging.info(tl_expt.string_dict(args, desc='ARG PARAMETERS')) return args
def main_train(params): """ PIPELINE for training 0) load triplets or create triplets from path to images, annotations 1) load precomputed data or compute them now 2) train classifier with hyper-parameters 3) perform Leave-One-Out experiment :param {str: any} params: """ params = prepare_experiment_folder(params, FOLDER_EXPERIMENT) tl_expt.set_experiment_logger(params['path_expt']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.save_config_yaml( os.path.join(params['path_expt'], NAME_YAML_PARAMS), params) tl_expt.create_subfolders(params['path_expt'], LIST_SUBDIRS) df_paths, _ = load_df_paths(params) path_dump_data = os.path.join(params['path_expt'], NAME_DUMP_TRAIN_DATA) if not os.path.isfile(path_dump_data) or FORCE_RECOMP_DATA: (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names) = \ dataset_load_images_segms_compute_features(params, df_paths, params['nb_workers']) assert len(dict_imgs) > 0, 'missing images' save_dump_data( path_dump_data, dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names, ) else: (dict_imgs, dict_segms, dict_slics, dict_points, dict_centers, dict_features, dict_labels, feature_names) = load_dump_data(path_dump_data) if is_drawing(params['path_expt']) and EXPORT_TRAINING_DATA: export_dataset_visual(params['path_expt'], dict_imgs, dict_segms, dict_slics, dict_points, dict_labels, params['nb_workers']) # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, drop_labels=[-1], balance_type=params['balance']) # remove all bad values from features space features[np.isnan(features)] = 0 features[np.isinf(features)] = -1 assert np.sum(sizes) == len(labels), \ 'not equal sizes (%d) and labels (%i)' \ % (int(np.sum(sizes)), len(labels)) # feature norm & train classification nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH)) cv = seg_clf.CrossValidateGroups(sizes, nb_holdout) classif, params[ 'path_classif'] = seg_clf.create_classif_search_train_export( params['classif'], features, labels, cross_val=cv, params=params, feature_names=feature_names, nb_search_iter=params['nb_classif_search'], pca_coef=params.get('pca_coef', None), nb_workers=params['nb_workers'], path_out=params['path_expt'], ) nb_holdout = int(np.ceil(len(sizes) * CROSS_VAL_LEAVE_OUT_EVAL)) cv = seg_clf.CrossValidateGroups(sizes, nb_holdout) seg_clf.eval_classif_cross_val_scores(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_expt']) seg_clf.eval_classif_cross_val_roc(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_expt']) if RUN_LEAVE_ONE_OUT: experiment_loo(classif, dict_imgs, dict_segms, dict_centers, dict_slics, dict_points, dict_features, feature_names, params)
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return {str: ...}: """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params.get('path_config', '')) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=params.get( 'unique', EACH_UNIQUE_EXPERIMENT), skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params, show_visual) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = { n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot } df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \ if show_visual else None dict_labels = filter_train_with_purity(dict_imgs, dict_labels, dict_label_hist, params['label_purity'], dict_slics, drop_labels=params.get( 'drop_labels', None), path_visu=path_purity_visu, nb_jobs=params['nb_jobs']) logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1, np.nan] + params.get('drop_labels', [])) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH) nb_holdout = max(1, int(round(len(sizes) * nb_holdout))) # minimum is 1 nb_holdout = min(nb_holdout, int(len(sizes) / 2)) # max is half of the set params, classif, path_classif = load_train_classifier( params, features, labels, feature_names, sizes, nb_holdout) def _path_expt(n): return os.path.join(params['path_exp'], n) # test classif. on training images df_paths = pd.read_csv(params['path_train_list'], index_col=0) df_paths.reset_index(inplace=True) paths_img = df_paths['path_image'].tolist() if RUN_TRAIN_PREDICT: perform_train_predictions(params, paths_img, classif, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_TRAIN)) gc.collect() time.sleep(1) # LEAVE P OUT if params.get('run_LPO', True): idx_paths_img = list( zip(df_paths.index.tolist(), df_paths['path_image'].tolist())) df_stat = experiment_lpo(params, df_stat, dict_annot, idx_paths_img, path_classif, path_dump, nb_holdout, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_LPO)) # write_skip_file(_path_expt(FOLDER_LPO_VISU)) logging.info('Statistic: \n %s', repr(df_stat.describe())) logging.info('training DONE') return params