def main(path_experiment, path_dataset, visual=False, nb_workers=NB_WORKERS): """ main entry points :param str path_experiment: path to the experiment folder :param str path_dataset: path to the dataset with all landmarks :param bool visual: whether visualise the registration results :param int nb_workers: number of parallel jobs """ path_results = os.path.join(path_experiment, ImRegBenchmark.NAME_CSV_REGISTRATION_PAIRS) assert os.path.isfile(path_results) df_experiments = pd.read_csv(path_results) df_results = df_experiments.copy() _compute_lnds_stat = partial(ImRegBenchmark.compute_registration_statistic, df_experiments=df_results, path_dataset=path_dataset, path_experiment=path_experiment) # NOTE: this has to run in SINGLE thread so there is SINGLE table instance list(iterate_mproc_map(_compute_lnds_stat, df_experiments.iterrows(), desc='Statistic', nb_workers=1)) path_csv = os.path.join(path_experiment, NAME_CSV_RESULTS) logging.debug('exporting CSV results: %s', path_csv) df_results.to_csv(path_csv, index=None) export_summary_results(df_results, path_experiment, None, name_csv=NAME_CSV_SUMMARY, name_txt=NAME_TXT_SUMMARY) if visual: _visualise_regist = partial(ImRegBenchmark.visualise_registration, path_dataset=path_dataset, path_experiment=path_experiment) list(iterate_mproc_map(_visualise_regist, df_experiments.iterrows(), desc='Visualisation', nb_workers=nb_workers))
def __execute_method(self, method, input_table, path_csv=None, desc='', aggr_experiments=False, nb_workers=None): """ execute a method in sequence or parallel :param func method: used method :param DF input_table: iterate over table :param str path_csv: path to the output temporal csv :param str desc: name of the running process :param bool aggr_experiments: append output to experiment DF :param int|None nb_workers: number of jobs, by default using class setting :return: """ # setting the temporal split self._main_thread = False # run the experiment in parallel of single thread nb_workers = self.nb_workers if nb_workers is None else nb_workers iter_table = ((idx, dict(row)) for idx, row, in input_table.iterrows()) for res in iterate_mproc_map(method, iter_table, nb_workers=nb_workers, desc=desc): if res is not None and aggr_experiments: self._df_experiments = self._df_experiments.append( res, ignore_index=True) self.__export_df_experiments(path_csv) self._main_thread = True
def export_summary_json(df_experiments, path_experiments, path_output, min_landmarks=1., details=True): """ summarize results in particular JSON format :param DF df_experiments: experiment DataFrame :param str path_experiments: path to experiment folder :param str path_output: path to generated results :param float min_landmarks: required number of submitted landmarks in range (0, 1), match values in COL_PAIRED_LANDMARKS :param bool details: exporting case details :return str: path to exported results """ if COL_NORM_TIME not in df_experiments.columns: df_experiments[COL_NORM_TIME] = np.nan # note, we expect that the path starts with tissue and Unix sep "/" is used def _get_tissue(cell): tissue = cell.split(os.sep)[0] return tissue[:tissue.index('_')] if '_' in cell else tissue df_experiments[COL_TISSUE] = df_experiments[ ImRegBenchmark.COL_POINTS_REF].apply(_get_tissue) # export partial results cases = list( iterate_mproc_map(parse_landmarks, df_experiments.iterrows(), desc='Parsing landmarks', nb_workers=1)) # copy the initial to final for missing for col, col2 in zip(*_filter_tre_measure_columns(df_experiments)): mask = df_experiments[col].isnull() df_experiments.loc[mask, col] = df_experiments.loc[mask, col2] # parse final metrics scores = compute_scores(df_experiments, min_landmarks) path_comp_bm_expt = os.path.join(path_experiments, NAME_JSON_COMPUTER) if os.path.isfile(path_comp_bm_expt): with open(path_comp_bm_expt, 'r') as fp: comp_exp = json.load(fp) else: comp_exp = None results = { 'aggregates': scores, 'cases': dict(cases) if details else 'not exported', 'computer': comp_exp, 'submission-time': time.strftime(FORMAT_DATE_TIME, time.gmtime()), 'required-landmarks': min_landmarks, } path_json = os.path.join(path_output, NAME_JSON_RESULTS) logging.info('exporting JSON results: %s', path_json) with open(path_json, 'w') as fp: json.dump(results, fp) return path_json
def main(path_images, scales, image_extension, overwrite, nb_workers): """ main entry point :param str path_images: path to input images :param list(float) scales: define scales in percentage, range (0, 100) :param str image_extension: image extension used on output :param bool overwrite: whether overwrite existing image on output :param int nb_workers: nb jobs running in parallel :return: """ image_paths = sorted(glob.glob(path_images)) image_path_scales = [(im_path, sc) for im_path in image_paths for sc in scales] if not image_paths: logging.info('No images found on "%s"', path_images) return _wrap_scale = partial(wrap_scale_image, image_ext=image_extension, overwrite=overwrite) list( iterate_mproc_map(_wrap_scale, image_path_scales, desc='Scaling images', nb_workers=nb_workers))
def dataset_expand_landmarks(path_annots, path_dataset, nb_selected=None, nb_total=None, nb_workers=NB_WORKERS): """ select and expand over whole dataset :param str path_annots: root path to original dataset :param str path_dataset: root path to generated dataset :param float|int|None nb_selected: portion of selected points :param int|None nb_total: add extra points up to total number :param int nb_workers: number of jobs running in parallel :return list(int): """ list_sets = list_sub_folders(path_annots) logging.info('Found sets: %i', len(list_sets)) _wrap_extend = partial(extend_landmarks, path_dataset=path_dataset, nb_selected=nb_selected, nb_total=nb_total) counts = list( iterate_mproc_map(_wrap_extend, sorted(list_sets), nb_workers=nb_workers, desc='expand landmarks')) return counts
def __images_preprocessing(self, item): """ create some pre-process images, convert to gray scale and histogram matching :param dict item: the input record :return dict: updated item with optionally added pre-process images """ path_dir = self._get_path_reg_dir(item) def __path_img(path_img, pproc): img_name, img_ext = os.path.splitext(os.path.basename(path_img)) return os.path.join(path_dir, img_name + '_' + pproc + img_ext) def __save_img(col, path_img_new, img): col_temp = col + self.COL_IMAGE_EXT_TEMP if isinstance(item.get(col_temp), str): path_img = self._absolute_path(item[col_temp], destination='expt') os.remove(path_img) save_image(path_img_new, img) return self._relativize_path(path_img_new, destination='path_exp'), col def __convert_gray(path_img_col): path_img, col = path_img_col path_img_new = __path_img(path_img, 'gray') __save_img(col, path_img_new, rgb2gray(load_image(path_img))) return self._relativize_path(path_img_new, destination='path_exp'), col for pproc in self.params.get('preprocessing', []): path_img_ref, path_img_move, _, _ = self._get_paths( item, prefer_pproc=True) if pproc.startswith('match'): color_space = pproc.split('-')[-1] path_img_new = __path_img(path_img_move, pproc) img = image_histogram_matching( load_image(path_img_move), load_image(path_img_ref), use_color=color_space, ) path_img_new, col = __save_img(self.COL_IMAGE_MOVE, path_img_new, img) item[col + self.COL_IMAGE_EXT_TEMP] = path_img_new elif pproc in ('gray', 'grey'): argv_params = [(path_img_ref, self.COL_IMAGE_REF), (path_img_move, self.COL_IMAGE_MOVE)] # IDEA: find a way how to convert images in parallel inside mproc pool # problem is in calling class method inside the pool which is ot static for path_img, col in iterate_mproc_map(__convert_gray, argv_params, nb_workers=1, desc=None): item[col + self.COL_IMAGE_EXT_TEMP] = path_img else: logging.warning('unrecognized pre-processing: %s', pproc) return item
def main(path_images, dimension, overwrite, nb_workers): """ main entry point :param path_images: path to images :param int dimension: for 2D inages it is 0 or 1 :param bool overwrite: whether overwrite existing image on output :param int nb_workers: nb jobs running in parallel """ image_paths = sorted(glob.glob(path_images)) if not image_paths: logging.info('No images found on "%s"', path_images) return _wrap_split = partial(split_image, cut_dim=dimension, overwrite=overwrite) list(iterate_mproc_map(_wrap_split, image_paths, desc='Cut image tissues', nb_workers=nb_workers))
def main(path_images, padding, nb_workers): """ main entry point :param str path_images: path to the images :param float padding: percentage of the image size to be used as padding around detected tissue in the scan image, the range is (0, 1) :param int nb_workers: nb jobs running in parallel """ image_paths = sorted(glob.glob(path_images)) if not image_paths: logging.info('No images found on "%s"', path_images) return _wrap_crop = partial(crop_image, padding=padding) list(iterate_mproc_map(_wrap_crop, image_paths, desc='Crop image tissue', nb_workers=nb_workers))
def main(path_annots, path_dataset, path_output, consensus='mean', visual=False, nb_jobs=NB_THREADS): coll_dirs, _ = collect_triple_dir([path_annots], '', '', with_user=True) logging.info('Collected sub-folder: %i', len(coll_dirs)) user_names = sorted( {parse_path_user_scale(d['landmarks'])[0] for d in coll_dirs}) logging.info('Found users: %r', user_names) if len(user_names) < 2: logging.info('Not enough user annotations.') _evaluate_user = partial(evaluate_user, path_annots=path_annots, path_dataset=path_dataset, path_out=path_output, tp_consensus=consensus, visual=visual) dfs = list( iterate_mproc_map(_evaluate_user, user_names, nb_workers=nb_jobs, desc='evaluate')) # aggregate results df_all = pd.concat(dfs, sort=False) df_all.to_csv(os.path.join(path_output, 'STATISTIC__partial.csv')) df_short = pd.DataFrame() for user, dfg in df_all.groupby('user'): stat = dict(dfg['rTRE median'].describe().T[['mean', 'std', 'max']]) stat = {'%s [median rTRE]' % k: stat[k] for k in stat if k != 'count'} stat.update({ 'user': user, 'count': len(dfg), }) df_short = df_short.append(stat, ignore_index=True) df_short.set_index('user', inplace=True) logging.info('OVERALL \n%s \n %s' % ('=' * 10, df_short)) df_short.to_csv(os.path.join(path_output, 'STATISTIC__overview.csv')) logging.info('Created %i statistics.', len(df_all)) return len(df_all)
def main(path_images, level=DEFAULT_LEVEL, overwrite=False, nb_workers=1): """ main entry point :param str path_images: path to images :param int level: selected level of the internal pyramid representation the level 0 means full scale and higher number is small image in pyramid scaling :param bool overwrite: whether overwrite existing image on output :param int nb_workers: nb jobs running in parallel """ paths_img = sorted(glob.glob(path_images)) _wrap_convert = partial(convert_image, level=level, overwrite=overwrite) list( iterate_mproc_map(_wrap_convert, paths_img, desc='Converting images', nb_workers=nb_workers))
def dataset_scale_landmarks(path_dataset, scales=SCALES, nb_jobs=NB_THREADS): """" scale whole dataset :param str path_dataset: :param [int] scales: selected output scales :param nb_jobs: run parallel jobs :return [int]: """ list_sets = list_sub_folders(path_dataset) logging.info('Found sets: %i', len(list_sets)) _wrap_scale = partial(scale_set_landmarks, scales=scales) counts = list( iterate_mproc_map(_wrap_scale, sorted(list_sets), nb_workers=nb_jobs, desc='scaling sets')) return counts
def dataset_scale_landmarks(path_dataset, scales=DEFAULT_SCALES, nb_workers=NB_WORKERS): """ generate several scales within the same dataset :param str path_dataset: path to the source/generated dataset :param [inr] scales: created scales :param int nb_workers: number of jobs running in parallel :return: """ list_sets = list_sub_folders(path_dataset) logging.info('Found sets: %i', len(list_sets)) _wrap_scale = partial(scale_set_landmarks, scales=scales) counts = list( iterate_mproc_map(_wrap_scale, sorted(list_sets), nb_workers=nb_workers, desc='scaling sets')) return counts
def main(path_landmarks, path_dataset, path_output, scales, nb_jobs=NB_THREADS): assert path_landmarks != path_output, \ 'this folder "%s" cannot be used as output' % path_output assert path_dataset != path_output, \ 'this folder "%s" cannot be used as output' % path_output coll_dirs, _ = collect_triple_dir([path_landmarks], path_dataset, path_output, scales=scales) # filter existing coll_dirs = [d for d in coll_dirs if os.path.isdir(d['images']) and os.path.isdir(d['landmarks'])] if not coll_dirs: logging.info('No sub-folders collected.') return 0 lnds_dirs = sorted([cd['landmarks'] for cd in coll_dirs]) logging.info('Collected %i sub-folder: \n%s', len(coll_dirs), '\n'.join(lnds_dirs)) counts = list(iterate_mproc_map( export_visual_set_scale, coll_dirs, nb_workers=nb_jobs, desc='visualise')) logging.info('Performed %i visualisations', sum(counts)) return counts
def dataset_generate_landmarks(path_annots, path_dataset, tp_consensus='mean', nb_jobs=NB_THREADS): """ generate consensus landmarks in full scale (100%) :param str path_annots: path to folder with annotations :param str path_dataset: output dataset path :param str tp_consensus: type of consensus landmarks :param nb_jobs: run parallel jobs :return [int]: """ list_sets = list_sub_folders(path_annots) logging.info('Found sets: %i', len(list_sets)) _wrap_lnds = partial(generate_consensus_landmarks, path_dataset=path_dataset, tp_consensus=tp_consensus) counts = list( iterate_mproc_map(_wrap_lnds, sorted(list_sets), nb_workers=nb_jobs, desc='consensus landmarks')) return counts
def main(path_experiment, path_table, path_dataset, path_output, path_reference=None, path_comp_bm=None, min_landmarks=1., details=True, allow_inverse=False): """ main entry point :param str path_experiment: path to experiment folder :param str path_table: path to assignment file (requested registration pairs) :param str path_dataset: path to provided landmarks :param str path_output: path to generated results :param str|None path_reference: path to the complete landmark collection, if None use dataset folder :param str|None path_comp_bm: path to reference comp. benchmark :param int nb_workers: number of parallel processes :param float min_landmarks: required number of submitted landmarks in range (0, 1), match values in COL_PAIRED_LANDMARKS :param bool details: exporting case details :param bool allow_inverse: allow evaluate also inverse transformation, warped landmarks from ref to move image """ path_results = os.path.join(path_experiment, ImRegBenchmark.NAME_CSV_REGISTRATION_PAIRS) if not os.path.isfile(path_results): raise AttributeError('Missing experiments results: %s' % path_results) path_reference = path_dataset if not path_reference else path_reference # drop time column from Cover which should be empty df_overview = pd.read_csv(path_table).drop([ImRegBenchmark.COL_TIME], axis=1, errors='ignore') df_overview = _df_drop_unnamed(df_overview) # drop Warp* column from Cover which should be empty df_overview = df_overview.drop( [col for col in df_overview.columns if 'warped' in col.lower()], axis=1, errors='ignore') df_results = pd.read_csv(path_results) df_results = _df_drop_unnamed(df_results) # df_results.drop(filter(lambda c: 'Unnamed' in c, df_results.columns), axis=1, inplace=True) cols_ = list(ImRegBenchmark.COVER_COLUMNS_WRAP) + [ImRegBenchmark.COL_TIME] df_results = df_results[[ col for col in cols_ if col in df_results.columns ]] df_experiments = pd.merge(df_overview, df_results, how='left', on=ImRegBenchmark.COVER_COLUMNS) df_experiments = swap_inverse_experiment(df_experiments, allow_inverse) # df_experiments.drop([ImRegBenchmark.COL_IMAGE_REF_WARP, ImRegBenchmark.COL_POINTS_REF_WARP], # axis=1, errors='ignore', inplace=True) df_experiments.drop(filter(lambda c: 'Unnamed' in c, df_results.columns), axis=1, inplace=True) df_experiments = replicate_missing_warped_landmarks( df_experiments, path_dataset, path_experiment) normalize_exec_time(df_experiments, path_experiment, path_comp_bm) # logging.info('Filter used landmarks.') # path_filtered = os.path.join(path_output, FOLDER_FILTER_DATASET) # create_folder(path_filtered, ok_existing=True) # _filter_lnds = partial(filter_export_landmarks, path_output=path_filtered, # path_dataset=path_dataset, path_reference=path_reference) # for idx, ratio in iterate_mproc_map(_filter_lnds, df_experiments.iterrows(), # desc='Filtering', nb_workers=nb_workers): # df_experiments.loc[idx, COL_PAIRED_LANDMARKS] = np.round(ratio, 2) logging.info('Compute landmarks statistic.') _compute_lnds_stat = partial(ImRegBenchmark.compute_registration_statistic, df_experiments=df_experiments, path_dataset=path_dataset, path_experiment=path_experiment, path_reference=path_reference) # NOTE: this has to run in SINGLE thread so there is SINGLE table instance list( iterate_mproc_map(_compute_lnds_stat, df_experiments.iterrows(), desc='Statistic', nb_workers=1)) name_results, _ = os.path.splitext(os.path.basename(path_results)) path_results = os.path.join(path_output, name_results + '_NEW.csv') logging.debug('exporting CSV results: %s', path_results) df_experiments.to_csv(path_results) path_json = export_summary_json(df_experiments, path_experiment, path_output, min_landmarks, details) return path_json