def save_poly_coco(annotations: list, base_filepath: str): """ @param annotations: Either [[annotation1 of im1, annotation2 of im1, ...], ...] or [{"method1": [annotation1 of im1, ...]}, ...] @param base_filepath: @return: """ # seg_coco_list is either a list of annotations or a list of dictionaries for each method (and sub-methods) used if type(annotations[0]) == dict: # Means several methods/settings were used # Transform list of dicts to a dict of lists: dictionary = local_utils.list_of_dicts_to_dict_of_lists(annotations) dictionary = local_utils.flatten_dict(dictionary) for key, _annotations in dictionary.items(): out_filepath = base_filepath + "." + key + ".json" python_utils.save_json(out_filepath, _annotations) elif type(annotations[0]) == list: # Concatenate all lists flattened_annotations = list(itertools.chain(*annotations)) out_filepath = get_save_filepath(base_filepath, None, ".json") python_utils.save_json(out_filepath, flattened_annotations) else: raise TypeError("annotations has unrecognized type {}".format( type(annotations)))
def eval_shapefile(gt_polygons, pred_info): # Compute metrics metrics_filepath = os.path.splitext( pred_info["shapefile_filepath"])[0] + ".metrics.json" metrics = python_utils.load_json(metrics_filepath) if not metrics: # Load pred shp pred_polygons = load_shapefile(pred_info["shapefile_filepath"]) fixed_dt_polygons = polygon_utils.fix_polygons(pred_polygons) print(f"Loaded {len(fixed_dt_polygons)} pred polygons") max_angle_diffs = polygon_utils.compute_polygon_contour_measures( fixed_dt_polygons, gt_polygons, sampling_spacing=1.0, min_precision=0.5, max_stretch=2) max_angle_diffs = [ value for value in max_angle_diffs if value is not None ] max_angle_diffs = np.array(max_angle_diffs) max_angle_diffs = max_angle_diffs * 180 / np.pi # Convert to degrees metrics = {"max_angle_diffs": list(max_angle_diffs)} python_utils.save_json(metrics_filepath, metrics) print(f"Got {len(metrics['max_angle_diffs'])} max_angle_diff values") return metrics
def main(): args = get_args() print_utils.print_info( f"INFO: evaluating {len(args.pred_filepath)} predictions.") # Match files together im_gt_pred_filepaths = match_im_gt_pred(args.im_filepath, args.gt_filepath, args.pred_filepath) pool = Pool() metrics_iou_list = list( tqdm(pool.imap(partial(eval_one, overwrite=args.overwrite), im_gt_pred_filepaths), desc="Compute eval metrics", total=len(im_gt_pred_filepaths))) # Aggregate metrics and IoU aggr_metrics = {"max_angle_diffs": []} aggr_iou = {"intersection": 0, "union": 0} for metrics_iou in metrics_iou_list: if metrics_iou["metrics"]: aggr_metrics["max_angle_diffs"] += metrics_iou["metrics"][ "max_angle_diffs"] if metrics_iou["iou"]: aggr_iou["intersection"] += metrics_iou["iou"]["intersection"] aggr_iou["union"] += metrics_iou["iou"]["union"] aggr_iou["iou"] = aggr_iou["intersection"] / aggr_iou["union"] aggr_metrics_filepath = os.path.join( os.path.dirname(args.pred_filepath[0]), "aggr_metrics.json") aggr_iou_filepath = os.path.join(os.path.dirname(args.pred_filepath[0]), "aggr_iou.json") python_utils.save_json(aggr_metrics_filepath, aggr_metrics) python_utils.save_json(aggr_iou_filepath, aggr_iou)
def save_geojson(polygons, base_filepath, name=None, image_filepath=None): # TODO: add georef and features filepath = get_save_filepath(base_filepath, name, ".geojson") polygons_geometry_collection = shapely.geometry.collection.GeometryCollection( polygons) geojson = shapely.geometry.mapping(polygons_geometry_collection) python_utils.save_json(filepath, geojson)
def save_geojson(polygons, base_filepath, name=None, image_filepath=None): # TODO: add georef and features if name is not None: filepath = base_filepath + "." + name + ".geojson" else: filepath = base_filepath + ".geojson" polygons_geometry_collection = shapely.geometry.collection.GeometryCollection( polygons) geojson = shapely.geometry.mapping(polygons_geometry_collection) python_utils.save_json(filepath, geojson)
def load_image_ids(self): image_id_list_filepath = os.path.join(self.processed_dir, "image_id_list-small.json" if self.small else "image_id_list.json") if os.path.exists(image_id_list_filepath): image_id_list = python_utils.load_json(image_id_list_filepath) else: coco = self.get_coco() image_id_list = coco.getImgIds(catIds=coco.getCatIds()) # Save for later so that the whole coco object doesn't have to be instantiated when just reading processed samples with multiple workers: python_utils.save_json(image_id_list_filepath, image_id_list) return image_id_list
def eval_one(im_gt_pred_filepath, overwrite=False): im_filepath, gt_filepath, pred_filepath = im_gt_pred_filepath metrics_filepath = os.path.splitext(pred_filepath)[0] + ".metrics.json" iou_filepath = os.path.splitext(pred_filepath)[0] + ".iou.json" metrics = False iou = False if not overwrite: # Try reading metrics and iou json metrics = python_utils.load_json(metrics_filepath) iou = python_utils.load_json(iou_filepath) if not metrics or not iou: # Have to compute at least one so load geometries gt_polygons = load_geom(gt_filepath, im_filepath) fixed_gt_polygons = polygon_utils.fix_polygons( gt_polygons, buffer=0.0001 ) # Buffer adds vertices but is needed to repair some geometries pred_polygons = load_geom(pred_filepath, im_filepath) fixed_pred_polygons = polygon_utils.fix_polygons(pred_polygons) if not metrics: # Compute and save metrics max_angle_diffs = polygon_utils.compute_polygon_contour_measures( fixed_pred_polygons, fixed_gt_polygons, sampling_spacing=1.0, min_precision=0.5, max_stretch=2, progressbar=True) max_angle_diffs = [ value for value in max_angle_diffs if value is not None ] max_angle_diffs = np.array(max_angle_diffs) max_angle_diffs = max_angle_diffs * 180 / np.pi # Convert to degrees metrics = {"max_angle_diffs": list(max_angle_diffs)} python_utils.save_json(metrics_filepath, metrics) if not iou: fixed_gt_polygon_collection = shapely.geometry.collection.GeometryCollection( fixed_gt_polygons) fixed_pred_polygon_collection = shapely.geometry.collection.GeometryCollection( fixed_pred_polygons) intersection = fixed_gt_polygon_collection.intersection( fixed_pred_polygon_collection).area union = fixed_gt_polygon_collection.union( fixed_pred_polygon_collection).area iou = {"intersection": intersection, "union": union} python_utils.save_json(iou_filepath, iou) return { "metrics": metrics, "iou": iou, }
def setup_run(config): run_name = config["run_name"] new_run = config["new_run"] init_run_name = config["init_run_name"] working_dir = os.path.dirname(os.path.abspath(__file__)) runs_dir = os.path.join(working_dir, config["runs_dirpath"]) # setup init checkpoints directory path if one is specified: if init_run_name is not None: init_run_dirpath = run_utils.setup_run_dir(runs_dir, init_run_name) _, init_checkpoints_dirpath = run_utils.setup_run_subdirs( init_run_dirpath) else: init_checkpoints_dirpath = None # setup run directory: run_dirpath = run_utils.setup_run_dir(runs_dir, run_name, new_run) # save config in logs directory run_utils.save_config(config, run_dirpath) # save args args_filepath = os.path.join(run_dirpath, "args.json") args_to_save = { "run_name": run_name, "new_run": new_run, "init_run_name": init_run_name, "batch_size": config["optim_params"]["batch_size"], } if "samples" in config: args_to_save["samples"] = config["samples"] python_utils.save_json(args_filepath, args_to_save) # save current commit hash commit_hash = get_git_revision_hash() if commit_hash is not None: commit_hash_filepath = os.path.join(run_dirpath, "commit_history.json") if os.path.exists(commit_hash_filepath): commit_hashes = python_utils.load_json(commit_hash_filepath) if commit_hashes[-1] != commit_hash: commit_hashes.append(commit_hash) python_utils.save_json(commit_hash_filepath, commit_hashes) else: commit_hashes = [commit_hash] python_utils.save_json(commit_hash_filepath, commit_hashes) return run_dirpath, init_checkpoints_dirpath
def evaluate(self, split_name: str, ds: torch.utils.data.DataLoader): # Prepare data saving: flag_filepath_format = os.path.join(self.eval_dirpath, split_name, "{}.flag") # Loading model self.load_checkpoint() self.model.eval() # Create pool for multiprocessing pool = None if not self.config["eval_params"]["patch_size"]: # If single image is not being split up, then a pool to process each sample in the batch makes sense pool = Pool(processes=self.config["num_workers"]) compute_polygonization = self.config["eval_params"]["save_individual_outputs"]["poly_shapefile"] or \ self.config["eval_params"]["save_individual_outputs"]["poly_geojson"] or \ self.config["eval_params"]["save_individual_outputs"]["poly_viz"] or \ self.config["eval_params"]["save_aggregated_outputs"]["poly_coco"] # Saving individual outputs to disk: save_individual_outputs = True in self.config["eval_params"][ "save_individual_outputs"].values() saver_async = None if save_individual_outputs: save_outputs_partial = partial( save_utils.save_outputs, config=self.config, eval_dirpath=self.eval_dirpath, split_name=split_name, flag_filepath_format=flag_filepath_format) saver_async = async_utils.Async(save_outputs_partial) saver_async.start() # Saving aggregated outputs save_aggregated_outputs = True in self.config["eval_params"][ "save_aggregated_outputs"].values() tile_data_list = [] if self.gpu == 0: tile_iterator = tqdm(ds, desc="Eval {}: ".format(split_name), leave=True) else: tile_iterator = ds for tile_i, tile_data in enumerate(tile_iterator): # --- Inference, add result to tile_data_list if self.config["eval_params"]["patch_size"] is not None: # Cut image into patches for inference inference.inference_with_patching(self.config, self.model, tile_data) else: # Feed images as-is to the model inference.inference_no_patching(self.config, self.model, tile_data) tile_data_list.append(tile_data) # --- Accumulate batches into tile_data_list until capacity is reached (or this is the last batch) if self.config["eval_params"]["batch_size_mult"] <= len(tile_data_list)\ or tile_i == len(tile_iterator) - 1: # Concat tensors of tile_data_list accumulated_tile_data = {} for key in tile_data_list[0].keys(): if isinstance(tile_data_list[0][key], list): accumulated_tile_data[key] = [ item for _tile_data in tile_data_list for item in _tile_data[key] ] elif isinstance(tile_data_list[0][key], torch.Tensor): accumulated_tile_data[key] = torch.cat( [_tile_data[key] for _tile_data in tile_data_list], dim=0) else: raise TypeError( f"Type {type(tile_data_list[0][key])} is not handled!" ) tile_data_list = [] # Empty tile_data_list else: # tile_data_list is not full yet, continue running inference... continue # --- Polygonize if compute_polygonization: crossfield = accumulated_tile_data[ "crossfield"] if "crossfield" in accumulated_tile_data else None accumulated_tile_data["polygons"], accumulated_tile_data[ "polygon_probs"] = polygonize.polygonize( self.config["polygonize_params"], accumulated_tile_data["seg"], crossfield_batch=crossfield, pool=pool) # --- Save output if self.config["eval_params"]["save_individual_outputs"]["seg_mask"] or \ self.config["eval_params"]["save_aggregated_outputs"]["seg_coco"]: # Take seg_interior: seg_pred_mask = self.config["eval_params"][ "seg_threshold"] < accumulated_tile_data["seg"][:, 0, ...] accumulated_tile_data["seg_mask"] = seg_pred_mask accumulated_tile_data = local_utils.batch_to_cpu( accumulated_tile_data) sample_list = local_utils.split_batch(accumulated_tile_data) # Save individual outputs: if save_individual_outputs: for sample in sample_list: saver_async.add_work(sample) # Store aggregated outputs: if save_aggregated_outputs: self.shared_dict["name_list"].extend( accumulated_tile_data["name"]) if self.config["eval_params"]["save_aggregated_outputs"][ "stats"]: y_pred = accumulated_tile_data["seg"][:, 0, ...].cpu() if "gt_mask" in accumulated_tile_data: y_true = accumulated_tile_data["gt_mask"][:, 0, ...] elif "gt_polygons_image" in accumulated_tile_data: y_true = accumulated_tile_data[ "gt_polygons_image"][:, 0, ...] else: raise ValueError( "Either gt_mask or gt_polygons_image should be in accumulated_tile_data" ) iou = measures.iou( y_pred.reshape(y_pred.shape[0], -1), y_true.reshape(y_true.shape[0], -1), threshold=self.config["eval_params"]["seg_threshold"]) self.shared_dict["iou_list"].extend(iou.cpu().numpy()) if self.config["eval_params"]["save_aggregated_outputs"][ "seg_coco"]: for sample in sample_list: annotations = save_utils.seg_coco(sample) self.shared_dict["seg_coco_list"].extend(annotations) if self.config["eval_params"]["save_aggregated_outputs"][ "poly_coco"]: for sample in sample_list: annotations = save_utils.poly_coco( sample["polygons"], sample["polygon_probs"], sample["image_id"].item()) self.shared_dict["poly_coco_list"].append( annotations ) # annotations could be a dict, or a list # END of loop over samples # Save aggregated results if save_aggregated_outputs: self.barrier.wait( ) # Wait on all processes so that shared_dict is synchronized. if self.gpu == 0: if self.config["eval_params"]["save_aggregated_outputs"][ "stats"]: print("Start saving stats:") # Save sample_stats in CSV: t1 = time.time() stats_filepath = os.path.join( self.eval_dirpath, "{}.stats.csv".format(split_name)) stats_file = open(stats_filepath, "w") fnames = ["name", "iou"] writer = csv.DictWriter(stats_file, fieldnames=fnames) writer.writeheader() for name, iou in sorted(zip(self.shared_dict["name_list"], self.shared_dict["iou_list"]), key=lambda pair: pair[0]): writer.writerow({"name": name, "iou": iou}) stats_file.close() print(f"Finished in {time.time() - t1:02}s") if self.config["eval_params"]["save_aggregated_outputs"][ "seg_coco"]: print("Start saving seg_coco:") t1 = time.time() seg_coco_filepath = os.path.join( self.eval_dirpath, "{}.annotation.seg.json".format(split_name)) python_utils.save_json( seg_coco_filepath, list(self.shared_dict["seg_coco_list"])) print(f"Finished in {time.time() - t1:02}s") if self.config["eval_params"]["save_aggregated_outputs"][ "poly_coco"]: print("Start saving poly_coco:") poly_coco_base_filepath = os.path.join( self.eval_dirpath, f"{split_name}.annotation.poly") t1 = time.time() save_utils.save_poly_coco( self.shared_dict["poly_coco_list"], poly_coco_base_filepath) print(f"Finished in {time.time() - t1:02}s") # Sync point of individual outputs if save_individual_outputs: print_utils.print_info( f"GPU {self.gpu} -> INFO: Finishing saving individual outputs." ) saver_async.join() self.barrier.wait( ) # Wait on all processes so that all saver_asyncs are finished
def __init__(self, root: str, fold: str = "train", pre_process: bool = True, patch_size: int = None, pre_transform=None, transform=None, small: bool = False, pool_size: int = 1, raw_dirname: str = "raw", processed_dirname: str = "processed"): """ @param root: @param fold: @param pre_process: If True, the dataset will be pre-processed first, saving training patches on disk. If False, data will be serve on-the-fly without any patching. @param patch_size: @param pre_transform: @param transform: @param small: If True, use a small subset of the dataset (for testing) @param pool_size: @param processed_dirname: """ self.root = root self.fold = fold self.pre_process = pre_process self.patch_size = patch_size self.pre_transform = pre_transform self.transform = transform self.small = small if self.small: print_utils.print_info( "INFO: Using small version of the xView2 xBD dataset.") self.pool_size = pool_size self.raw_dirname = raw_dirname if self.pre_process: # Setup of pre-process self.processed_dirpath = os.path.join(self.root, processed_dirname, self.fold) stats_filepath = os.path.join( self.processed_dirpath, "stats-small.pt" if self.small else "stats.pt") processed_relative_paths_filepath = os.path.join( self.processed_dirpath, "processed_paths-small.json" if self.small else "processed_paths.json") # Check if dataset has finished pre-processing by checking processed_relative_paths_filepath: if os.path.exists(processed_relative_paths_filepath): # Process done, load stats and processed_relative_paths self.stats = torch.load(stats_filepath) self.processed_relative_paths = python_utils.load_json( processed_relative_paths_filepath) else: # Pre-process not finished, launch it: tile_info_list = self.get_tile_info_list() self.stats = self.process(tile_info_list) # Save stats torch.save(self.stats, stats_filepath) # Save processed_relative_paths self.processed_relative_paths = [ tile_info["processed_relative_filepath"] for tile_info in tile_info_list ] python_utils.save_json(processed_relative_paths_filepath, self.processed_relative_paths) else: # Setup data sample list self.tile_info_list = self.get_tile_info_list()
def save_seg_coco(sample, base_filepath, name): filepath = get_save_filepath(base_filepath, name, ".json") annotations = seg_coco(sample) python_utils.save_json(filepath, annotations)
def save_seg_coco(sample, base_filepath, name): filepath = base_filepath + "." + name + ".json" annotations = seg_coco(sample) python_utils.save_json(filepath, annotations)
def eval_one(annotation_filename, run_results_dirpath, cocoGt, config, annType, pool=None): print("---eval_one") annotation_name = os.path.splitext(annotation_filename)[0] if "samples" in config: stats_filepath = os.path.join(run_results_dirpath, "{}.stats.{}.{}.json".format("test", annotation_name, config["samples"])) metrics_filepath = os.path.join(run_results_dirpath, "{}.metrics.{}.{}.json".format("test", annotation_name, config["samples"])) else: stats_filepath = os.path.join(run_results_dirpath, "{}.stats.{}.json".format("test", annotation_name)) metrics_filepath = os.path.join(run_results_dirpath, "{}.metrics.{}.json".format("test", annotation_name)) res_filepath = os.path.join(run_results_dirpath, annotation_filename) if not os.path.exists(res_filepath): print_utils.print_warning("WARNING: result not found at filepath {}".format(res_filepath)) return print_utils.print_info("Evaluate {} annotations:".format(annotation_filename)) try: cocoDt = cocoGt.loadRes(res_filepath) except AssertionError as e: print_utils.print_error("ERROR: {}".format(e)) print_utils.print_info("INFO: continuing by removing unrecognised images") res = json.load(open(res_filepath)) print("Initial res length:", len(res)) annsImgIds = [ann["image_id"] for ann in res] image_id_rm = set(annsImgIds) - set(cocoGt.getImgIds()) print_utils.print_warning("Remove {} image ids!".format(len(image_id_rm))) new_res = [ann for ann in res if ann["image_id"] not in image_id_rm] print("New res length:", len(new_res)) cocoDt = cocoGt.loadRes(new_res) # {4601886185638229705, 4602408603195004682, 4597274499619802317, 4600985465712755606, 4597238470822783353, # 4597418614807878173} # image_id = 0 # annotation_ids = cocoDt.getAnnIds(imgIds=image_id) # annotation_list = cocoDt.loadAnns(annotation_ids) # print(annotation_list) if not os.path.exists(stats_filepath): # Run COCOeval cocoEval = COCOeval(cocoGt, cocoDt, annType) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() # Save stats stats = {} stat_names = ["AP", "AP_50", "AP_75", "AP_S", "AP_M", "AP_L", "AR", "AR_50", "AR_75", "AR_S", "AR_M", "AR_L"] assert len(stat_names) == cocoEval.stats.shape[0] for i, stat_name in enumerate(stat_names): stats[stat_name] = cocoEval.stats[i] python_utils.save_json(stats_filepath, stats) else: print("COCO stats already computed, skipping...") if not os.path.exists(metrics_filepath): # Verify that cocoDt has polygonal segmentation masks and not raster masks: if isinstance(cocoDt.loadAnns(cocoDt.getAnnIds(imgIds=cocoDt.getImgIds()[0]))[0]["segmentation"], list): metrics = {} # Run additionnal metrics print_utils.print_info("INFO: Running contour metrics") contour_eval = ContourEval(cocoGt, cocoDt) max_angle_diffs = contour_eval.evaluate(pool=pool) metrics["max_angle_diffs"] = list(max_angle_diffs) python_utils.save_json(metrics_filepath, metrics) else: print("Contour metrics already computed, skipping...")