def evaluate(self): """ Evaluates standard semantic segmentation metrics (http://cocodataset.org/#stuff-eval): * Mean intersection-over-union averaged across classes (mIoU) * Frequency Weighted IoU (fwIoU) * Mean pixel accuracy averaged across classes (mACC) * Pixel Accuracy (pACC) """ if self._distributed: synchronize() conf_matrix_list = all_gather(self._conf_matrix) self._predictions = all_gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not is_main_process(): return self._conf_matrix = np.zeros_like(self._conf_matrix) for conf_matrix in conf_matrix_list: self._conf_matrix += conf_matrix if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "sem_seg_predictions.json") with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._predictions)) acc = np.full(self._num_classes, np.nan, dtype=np.float) iou = np.full(self._num_classes, np.nan, dtype=np.float) tp = self._conf_matrix.diagonal()[:-1].astype(np.float) pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) class_weights = pos_gt / np.sum(pos_gt) pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) acc_valid = pos_gt > 0 acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] iou_valid = (pos_gt + pos_pred) > 0 union = pos_gt + pos_pred - tp iou[acc_valid] = tp[acc_valid] / union[acc_valid] macc = np.sum(acc[acc_valid]) / np.sum(acc_valid) miou = np.sum(iou[acc_valid]) / np.sum(iou_valid) fiou = np.sum(iou[acc_valid] * class_weights[acc_valid]) pacc = np.sum(tp) / np.sum(pos_gt) res = {} res["mIoU"] = 100 * miou res["fwIoU"] = 100 * fiou for i, name in enumerate(self._class_names): res["IoU-{}".format(name)] = 100 * iou[i] res["mACC"] = 100 * macc res["pACC"] = 100 * pacc for i, name in enumerate(self._class_names): res["ACC-{}".format(name)] = 100 * acc[i] if self._output_dir: file_path = os.path.join(self._output_dir, "sem_seg_evaluation.pth") with PathManager.open(file_path, "wb") as f: torch.save(res, f) results = OrderedDict({"sem_seg": res}) self._logger.info(results) return results
def evaluate(self): if self._distributed: synchronize() endpoint_errors = all_gather(self._endpoint_errors) endpoint_errors = [per_image for per_gpu in endpoint_errors for per_image in per_gpu] self._predictions = all_gather(self._predictions) if not is_main_process(): return if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "flow_predictions.json") with PathManager.open(file_path, "w") as f: f.write(json.dumps(self._predictions)) ave_epe = sum(endpoint_errors) / len(endpoint_errors) res = {"ave_epe": ave_epe} if self._output_dir: file_path = os.path.join(self._output_dir, "flow_evaluation.pth") with PathManager.open(file_path, "wb") as f: torch.save(res, f) results = OrderedDict({"flow": res}) small_table = create_small_table(res) self._logger.info("Evaluation results for flow: \n" + small_table) dump_info_one_task = { "task": "flow", "tables": [small_table], } _dump_to_markdown([dump_info_one_task]) return results
def evaluate(self): if self._distributed: synchronize() results_per_image = all_gather(self.results_per_image) self.results_per_image = list(itertools.chain(*results_per_image)) playback_heap = all_gather(self.playback_heap) playback_heap = list(itertools.chain(*playback_heap)) # each GPU has local N mininums, sort and take global mininums playback_heap = sorted(playback_heap, key=lambda x: x[0]) self.playback_heap = playback_heap[:self.playback_limit] self.callback(self) return {}
def load(self, path, *args, **kwargs): need_sync = False if path and isinstance(self.model, DistributedDataParallel): logger = logging.getLogger(__name__) path = self.path_manager.get_local_path(path) has_file = os.path.isfile(path) all_has_file = comm.all_gather(has_file) if not all_has_file[0]: raise OSError(f"File {path} not found on main worker.") if not all(all_has_file): logger.warning(f"Not all workers can read checkpoint {path}. " "Training may fail to fully resume.") # TODO: broadcast the checkpoint file contents from main # worker, and load from it instead. need_sync = True if not has_file: path = None # don't load if not readable ret = super().load(path, *args, **kwargs) if need_sync: logger.info("Broadcasting model states from main worker ...") if TORCH_VERSION >= (1, 7): self.model._sync_params_and_buffers() return ret
def get_avg_losses(self, ): if self._distributed: synchronize() self._losses = all_gather(self._losses) if not is_main_process(): return all_losses = {} for p in self._losses: all_losses.update(p) else: all_losses = self._losses image_unique_ids = list(all_losses.keys()) loss_keys = list(all_losses[image_unique_ids[0]].keys()) losses_global_avg = {} for key in loss_keys: losses_global_avg[key] = [] for img_spec_id in image_unique_ids: loss_sig = all_losses[img_spec_id] for key in loss_keys: losses_global_avg[key].append(loss_sig[key]) for key in loss_keys: losses_global_avg[key] = np.array(losses_global_avg[key]).mean() global_loss = OrderedDict(losses_global_avg) return global_loss
def resume_or_load(self, resume=True): """ If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by a `last_checkpoint` file), resume from the file. Resuming means loading all available states (eg. optimizer and scheduler) and update iteration counter from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used. Otherwise, this is considered as an independent training. The method will load model weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start from iteration 0. Args: resume (bool): whether to do resume or not """ checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume) if resume and self.checkpointer.has_checkpoint(): self.start_iter = checkpoint.get("iteration", -1) + 1 # The checkpoint stores the training iteration that just finished, thus we start # at the next iteration (or iter zero if there's no checkpoint). if isinstance(self.model, DistributedDataParallel): # broadcast loaded data/model from the first rank, because other # machines may not have access to the checkpoint file if TORCH_VERSION >= (1, 7): self.model._sync_params_and_buffers() self.start_iter = comm.all_gather(self.start_iter)[0]
def compute_kmeans_anchors(cfg, data_loader, sort_by_area=True, _stride=0, _legacy_plus_one=False): assert (cfg.MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG > 0), "Please provide positive MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG" num_training_img = cfg.MODEL.KMEANS_ANCHORS.NUM_TRAINING_IMG div_i, mod_i = divmod(num_training_img, comm.get_world_size()) num_training_img_i = div_i + (comm.get_rank() < mod_i) box_sizes_i = collect_boxes_size_stats( data_loader, num_training_img_i, _legacy_plus_one=_legacy_plus_one, ) all_box_sizes = comm.all_gather(box_sizes_i) box_sizes = np.concatenate(all_box_sizes) logger.info("Collected {} boxes from all gpus".format(len(box_sizes))) assert (cfg.MODEL.KMEANS_ANCHORS.NUM_CLUSTERS > 0), "Please provide positive MODEL.KMEANS_ANCHORS.NUM_CLUSTERS" from sklearn.cluster import KMeans # delayed import default_anchors = (KMeans( n_clusters=cfg.MODEL.KMEANS_ANCHORS.NUM_CLUSTERS, random_state=cfg.MODEL.KMEANS_ANCHORS.RNG_SEED, ).fit(box_sizes).cluster_centers_) anchors = [] for anchor in default_anchors: w, h = anchor # center anchor boxes at (stride/2,stride/2) new_anchors = np.hstack(( _stride / 2 - 0.5 * w, _stride / 2 - 0.5 * h, _stride / 2 + 0.5 * w, _stride / 2 + 0.5 * h, )) anchors.append(new_anchors) anchors = np.array(anchors) # sort anchors by area areas = (anchors[:, 2] - anchors[:, 0]) * (anchors[:, 3] - anchors[:, 1]) sqrt_areas = np.sqrt(areas) if sort_by_area: indices = np.argsort(sqrt_areas) anchors = anchors[indices] sqrt_areas = sqrt_areas[indices].tolist() display_str = "\n".join([ s + "\t sqrt area: {:.2f}".format(a) for s, a in zip(str(anchors).split("\n"), sqrt_areas) ]) logger.info("Compuated kmeans anchors (sorted by area: {}):\n{}".format( sort_by_area, display_str)) return anchors
def split_epoch_end(self, outputs, split='val'): outputs = d2comm.gather(outputs) # master node if d2comm.is_main_process(): assert self.trainer.node_rank == 0 and self.trainer.local_rank == 0 outputs = sum(outputs, []) opt = self.opt loss_mean = sum([_['loss'].item() for _ in outputs]) / len(outputs) predictions = sum([_['predictions'] for _ in outputs], []) if len(outputs[0]['n_predictions']) != 0: n_predictions = sum([_['n_predictions'] for _ in outputs], []) else: n_predictions = [] lang_stats = None if len(n_predictions) > 0 and 'perplexity' in n_predictions[0]: n_predictions = sorted(n_predictions, key=lambda x: x['perplexity']) if not os.path.isdir('eval_results'): os.mkdir('eval_results') torch.save( (predictions, n_predictions), os.path.join('eval_results/', '.saved_pred_' + opt.id + '_' + split + '.pth')) if opt.language_eval: lang_stats = eval_utils.language_eval(opt.input_json, predictions, n_predictions, vars(opt), split) if opt.reduce_on_plateau: optimizer = self.trainer.optimizers[0] if 'CIDEr' in lang_stats: optimizer.scheduler_step(-lang_stats['CIDEr']) else: optimizer.scheduler_step(loss_mean) out = {'loss': loss_mean} out.update(lang_stats) out['to_monitor'] = lang_stats[ 'CIDEr'] if lang_stats is not None else -loss_mean else: out = {} out = d2comm.all_gather(out)[0] # Only the one from master node assert len(out) > 0 # make sure the head has index 0 # must all be tensors out = { k: torch.tensor(v) if not torch.is_tensor(v) else v for k, v in out.items() } return out
def evaluate(self): if self._distributed: synchronize() self._predictions = all_gather(self._predictions) self._predictions = list(itertools.chain(*self._predictions)) if not is_main_process(): return return copy.deepcopy(self._eval_predictions())
def evaluate(self, img_ids=None): if self._distributed: synchronize() predictions = all_gather(self._predictions) predictions = list(itertools.chain(*predictions)) if not is_main_process(): return else: predictions = self._predictions return copy.deepcopy(self._eval_predictions(predictions, img_ids))
def local_master_get_detection_dataset_dicts(*args, **kwargs): logger.info("Only load dataset dicts on local master process ...") dataset_dicts = (d2_get_detection_dataset_dicts(*args, **kwargs) if comm.get_local_rank() == 0 else []) comm.synchronize() dataset_size = comm.all_gather(len(dataset_dicts))[0] if comm.get_local_rank() != 0: dataset_dicts = _FakeListObj(dataset_size) return dataset_dicts
def concat_all_gather(input): bs_int = input.shape[0] size_list = comm.all_gather(bs_int) max_size = max(size_list) max_shape = (max_size,) + input.shape[1:] padded_input = input.new_zeros(max_shape) padded_input[:bs_int] = input all_inputs = differentiable_all_gather(padded_input) inputs = [x[:sz] for sz, x in zip(size_list, all_inputs)] return inputs, size_list
def reset(self): self._working_dir = tempfile.TemporaryDirectory(prefix="cityscapes_eval_") self._temp_dir = self._working_dir.name # All workers will write to the same results directory # TODO this does not work in distributed training self._temp_dir = comm.all_gather(self._temp_dir)[0] if self._temp_dir != self._working_dir.name: self._working_dir.cleanup() self._logger.info( "Writing cityscapes results to temporary directory {} ...".format(self._temp_dir) )
def _log_time(self, msg, avg, all_times, distributed=False): percentiles = [ np.percentile(all_times, k, interpolation="nearest") for k in [1, 5, 95, 99] ] if not distributed: logger.info( f"{msg}: avg={1.0/avg:.1f} it/s, " f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, " f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s.") return avg_per_gpu = comm.all_gather(avg) percentiles_per_gpu = comm.all_gather(percentiles) if comm.get_rank() > 0: return for idx, avg, percentiles in zip(count(), avg_per_gpu, percentiles_per_gpu): logger.info( f"GPU{idx} {msg}: avg={1.0/avg:.1f} it/s, " f"p1={percentiles[0]:.2g}s, p5={percentiles[1]:.2g}s, " f"p95={percentiles[2]:.2g}s, p99={percentiles[3]:.2g}s.")
def reset(self): self._working_dir = tempfile.TemporaryDirectory( prefix="cityscapes_eval_") self._temp_dir = self._working_dir.name # All workers will write to the same results directory # TODO this does not work in distributed training assert (comm.get_local_size() == comm.get_world_size( )), "CityscapesEvaluator currently do not work with multiple machines." self._temp_dir = comm.all_gather(self._temp_dir)[0] if self._temp_dir != self._working_dir.name: self._working_dir.cleanup() self._logger.info( "Writing cityscapes results to temporary directory {} ...".format( self._temp_dir))
def _local_master_gather(func, check_equal=False): if comm.get_local_rank() == 0: x = func() assert x is not None else: x = None x_all = comm.all_gather(x) x_local_master = [x for x in x_all if x is not None] if check_equal: master = x_local_master[0] assert all(x == master for x in x_local_master), x_local_master return x_local_master
def gather_scores(process_scores): # List of scores per process scores_list = comm.all_gather(process_scores) gathered = defaultdict(list) labels = {x for scores in scores_list for x in scores.keys()} for label in labels: # Sort in descending order. sorted_generator = heapq.merge( *[sorted(x[label], reverse=True) for x in scores_list], reverse=True ) top_k = itertools.islice(sorted_generator, topk_loose[label]) top_k_ascending = list(reversed(list(top_k))) # Return to ascending order heapq.heapify(top_k_ascending) gathered[label] = top_k_ascending return gathered
def evaluate(self): """ Evaluates Referring Segmentation IoU: """ if self._distributed: synchronize() self._predictions = all_gather(self._predictions) if not is_main_process(): return all_prediction = {} for p in self._predictions: all_prediction.update(p) else: all_prediction = self._predictions image_unique_ids = list(all_prediction.keys()) all_mIoU = [] all_inter = [] all_union = [] all_mIoU_bg = [] for img_sent_id in image_unique_ids: result = all_prediction[img_sent_id] all_mIoU.append(result[0]) all_mIoU_bg.append(result[1]) all_inter.append(result[2]) all_union.append(result[3]) MIoU = np.array(all_mIoU).mean() MIoU_bg = np.array(all_mIoU_bg).mean() OverIoU = np.array(all_inter).sum() / np.array(all_union).sum() if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join(self._output_dir, "prediction.pkl") with PathManager.open(file_path, "wb") as f: pickle.dump(all_prediction, f) self._logger.info('evaluation on {} expression instances'.format( len(image_unique_ids))) results = OrderedDict({ "MeanIoU": MIoU, "OverIoU": OverIoU, "MeanIoU_bg": MIoU_bg }) return results
def evaluate(self): if self._distributed: synchronize() conf_matrix_list = all_gather(self._conf_matrix) if not is_main_process(): return self._conf_matrix = np.zeros_like(self._conf_matrix) for conf_matrix in conf_matrix_list: self._conf_matrix += conf_matrix # for fast auto augmentation loss = self.ave_loss.average() # calc iou and acc acc = np.zeros(self._N - 1, dtype=np.float) iou = np.zeros(self._N - 1, dtype=np.float) tp = self._conf_matrix.diagonal()[:-1].astype(np.float) pos_gt = np.sum(self._conf_matrix[:-1, :-1], axis=0).astype(np.float) class_weights = pos_gt / np.sum(pos_gt) pos_pred = np.sum(self._conf_matrix[:-1, :-1], axis=1).astype(np.float) acc_valid = pos_gt > 0 acc[acc_valid] = tp[acc_valid] / pos_gt[acc_valid] iou_valid = (pos_gt + pos_pred) > 0 union = pos_gt + pos_pred - tp iou[acc_valid] = tp[acc_valid] / union[acc_valid] macc = np.sum(acc) / np.sum(acc_valid) miou = np.sum(iou) / np.sum(iou_valid) fiou = np.sum(iou * class_weights) pacc = np.sum(tp) / np.sum(pos_gt) # f = open("output/autoaug/miou", "a") # import json # f.write(json.dumps({"loss": loss, "miou":miou*100, "fiou":fiou*100, "macc":macc*100, "pacc":pacc*100})) # f.write("\n") return OrderedDict({ "loss": loss, "miou": miou * 100, "fiou": fiou * 100, "macc": macc * 100, "pacc": pacc * 100 })
def evaluate(self): results = {} # The evaluation will get stuck sometimes if the follwoing code is not used. # `SemSegEvaluator` will do synchronization between processes when computing # the metrics. In some cases the number of self.evaluators will not be the # same between processes and the code will stuck in synchronization. # For example, evaluate 10 images on 8 GPUs, only 5 GPUs # will be used for evaluation, each has 2 images, the rest 3 GPUs will have # zero self.evaluators as they are constructed on-the-fly when calling # self.process()) # We create additional evaluators so that all processes have the same size # of evaluators so that the synchronization will not get stuck. evaluator_size = len(self.evaluators) synchronize() evaluator_size_list = all_gather(evaluator_size) max_evaluator_size = max(evaluator_size_list) if evaluator_size < max_evaluator_size: # create additional evaluators so that all processes have the same # size of evaluators metadata = MetadataCatalog.get(self.dataset_name) mcs_metadata = metadata.get("mcs_metadata") for idx in range(max_evaluator_size - evaluator_size): dummy_key = f"{self._DUMMY_KEY_PREFIX}_{idx}" assert dummy_key not in self.evaluators if mcs_metadata: for k in mcs_metadata: self._get_evaluator(dummy_key, superclass_name=k).reset() else: self._get_evaluator(dummy_key).reset() for name, evaluator in self.evaluators.items(): result = evaluator.evaluate() # NOTE: .evaluate() returns None for non-main process if result is not None: results[name] = result["sem_seg"] return results
def _initialize_diskcache(self): from mobile_cv.common.misc.local_cache import LocalCache cache_dir = "{}/{}".format(ROOT_CACHE_DIR, uuid.uuid4().hex[:8]) cache_dir = comm.all_gather(cache_dir)[0] # use same cache_dir logger.info("Creating diskcache database in: {}".format(cache_dir)) self._cache = LocalCache(cache_dir=cache_dir, num_shards=8) # self._cache.cache.clear(retry=True) # seems faster if index exists if comm.get_local_rank() == 0: if self._diskcache_strategy == "naive": for i, item in enumerate(self._lst): ret = self._write_to_local_db((i, item)) assert ret, "Error writing index {} to local db".format(i) pct = 100.0 * i / len(self._lst) self._log_progress(pct) # NOTE: each item might be small in size (hundreds of bytes), # writing million of them can take a pretty long time (hours) # because of frequent disk access. One solution is grouping a batch # of items into larger blob. elif self._diskcache_strategy == "batched_static": TARGET_BYTES = 50 * 1024 average_bytes = np.average([ self._lst[int(x)].size for x in np.linspace(0, len(self._lst) - 1, 1000) ]) self._chuck_size = max(1, int(TARGET_BYTES / average_bytes)) logger.info( "Average data size: {} bytes; target chuck data size {} KiB;" " {} items per chuck; {} chucks in total".format( average_bytes, TARGET_BYTES / 1024, self._chuck_size, int(len(self._lst) / self._chuck_size), )) for i in range(0, len(self._lst), self._chuck_size): chunk = self._lst[i:i + self._chuck_size] chunk_i = int(i / self._chuck_size) ret = self._write_to_local_db((chunk_i, chunk)) assert ret, "Error writing index {} to local db".format( chunk_i) pct = 100.0 * i / len(self._lst) self._log_progress(pct) # NOTE: instead of using fixed chuck size, items can be grouped dynamically elif self._diskcache_strategy == "batched_dynamic": raise NotImplementedError() else: raise NotImplementedError(self._diskcache_strategy) comm.synchronize() logger.info( "Finished writing to local disk, db size: {:.2f} MiB".format( self._cache.cache.volume() / 1024**2)) # Optional sync for some strategies if self._diskcache_strategy == "batched_static": # propagate chuck size and make sure all local rank 0 uses the same value self._chuck_size = _local_master_gather(lambda: self._chuck_size, check_equal=True)[0] logger.info("Gathered chuck size: {}".format(self._chuck_size)) # free the memory of self._lst self._size = _local_master_gather(lambda: len(self._lst), check_equal=True)[0] logger.info("Gathered list size: {}".format(self._size)) del self._lst
def all_gather(data, group=None): return comm.all_gather(data, group=group)
def evaluate(self): """ Evaluates Referring Segmentation IoU: """ if self._distributed: synchronize() self._predictions = all_gather(self._predictions) if not is_main_process(): return all_prediction = {} for p in self._predictions: all_prediction.update(p) else: all_prediction = self._predictions image_unique_ids = list(all_prediction.keys()) total_num = 0 recall_num = 0 recall_t2_num = 0 recall_t2_fusion_num = 0 num_type = {} recall_type = {} acc_type = {} recall_topk_num = {5: 0, 10: 0} point_recall_num = 0 point_recall_t2_num = 0 point_recall_fusion_t2_num = 0 for img_sent_id in image_unique_ids: result = all_prediction[img_sent_id] phrase_ids = result[0] phrase_types = result[1] pred_boxes = result[2] pred_similarity = result[3] targets = result[4] precomp_boxes = result[5] topk_pred_boxes = result[6] topk_fusion_pred_boxes = result[8] pred_boxes.clip() ious = pairwise_iou( targets, pred_boxes ) # this function will change the target_boxes into cuda mode iou = ious.numpy().diagonal() total_num += iou.shape[0] recall_num += int((iou >= cfg.MODEL.VG.EVAL_THRESH).sum()) # 0.5 pred_boxes_tensor = pred_boxes.tensor pred_center = (pred_boxes_tensor[:, :2] + pred_boxes_tensor[:, 2:]) / 2.0 pred_center = pred_center.repeat(1, 2) ## x_c, y_c, x_c, y_c targets_tensor = targets.tensor fall_tensor = targets_tensor - pred_center fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + ( fall_tensor[:, 2:] >= 0).float().sum(1) point_recall_num += (fall_tensor == 4).float().numpy().sum() topk_pred_boxes.clip() ious_topk = pairwise_iou(targets, topk_pred_boxes) recall_t2_num += int((ious_topk.numpy().diagonal() > cfg.MODEL.VG.EVAL_THRESH).sum()) topk_boxes_tensor = topk_pred_boxes.tensor pred_center = (topk_boxes_tensor[:, :2] + topk_boxes_tensor[:, 2:]) / 2.0 pred_center = pred_center.repeat(1, 2) ## x_c, y_c, x_c, y_c fall_tensor = targets_tensor - pred_center fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + ( fall_tensor[:, 2:] >= 0).float().sum(1) point_recall_t2_num += (fall_tensor == 4).float().numpy().sum() topk_fusion_pred_boxes.clip() ious_fusion_topk = pairwise_iou(targets, topk_fusion_pred_boxes) recall_t2_fusion_num += int((ious_fusion_topk.numpy().diagonal() > cfg.MODEL.VG.EVAL_THRESH).sum()) topk_fusion_boxes_tensor = topk_fusion_pred_boxes.tensor pred_center = (topk_fusion_boxes_tensor[:, :2] + topk_fusion_boxes_tensor[:, 2:]) / 2.0 pred_center = pred_center.repeat(1, 2) ## x_c, y_c, x_c, y_c fall_tensor = targets_tensor - pred_center fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + ( fall_tensor[:, 2:] >= 0).float().sum(1) point_recall_fusion_t2_num += ( fall_tensor == 4).float().numpy().sum() for pid, p_type in enumerate(phrase_types): p_type = p_type[0] num_type[p_type] = num_type.setdefault(p_type, 0) + 1 recall_type[p_type] = recall_type.setdefault( p_type, 0) + (iou[pid] >= cfg.MODEL.VG.EVAL_THRESH) precomp_boxes.clip() ious_top = pairwise_iou(targets, precomp_boxes).cpu() for k in [5, 10]: top_k = torch.topk(pred_similarity, k=k, dim=1)[0][:, [-1]] pred_similarity_topk = (pred_similarity >= top_k).float() ious_top_k = (ious_top * pred_similarity_topk).numpy() recall_topk_num[k] += int( ((ious_top_k >= cfg.MODEL.VG.EVAL_THRESH).sum(1) > 0).sum()) acc = recall_num / total_num acc_top5 = recall_topk_num[5] / total_num acc_top10 = recall_topk_num[10] / total_num acc_s2 = recall_t2_num / total_num acc_s2_fusion = recall_t2_fusion_num / total_num point_acc = point_recall_num / total_num point_acc_s2 = point_recall_t2_num / total_num point_acc_s2_fusion = point_recall_fusion_t2_num / total_num for type, type_num in num_type.items(): acc_type[type] = recall_type[type] / type_num if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join( self._output_dir, "prediction_{}.pkl".format(str(acc).replace('.', '_')[:6])) with PathManager.open(file_path, "wb") as f: pickle.dump(all_prediction, f) del all_prediction self._logger.info( 'evaluation on {} expression instances, detailed_iou: {}'.format( len(image_unique_ids), acc_type)) self._logger.info( 'Evaluate Pointing Accuracy: PointAcc:{}, PointAccS2:{}, PointAccS2Fusion:{}' .format(point_acc, point_acc_s2, point_acc_s2_fusion)) results = OrderedDict({ "acc": acc, "acc_top5": acc_top5, "acc_top10": acc_top10, 'acc_s2': acc_s2, 'acc_s2_fusion': acc_s2_fusion }) return results
def evaluate(self): all_corr_total = comm.all_gather([self.corr, self.total]) corr = sum(x[0] for x in all_corr_total) total = sum(x[1] for x in all_corr_total) return {"accuracy": corr / total}
def _get_tensor_of_main_processing(self, tensor): tensor_list = comm.all_gather(tensor) tensor = tensor_list[0].to(self.device) return tensor
def main( cfg, output_dir, runner=None, is_train=True, ): setup_after_launch(cfg, output_dir, runner) if is_train: data_loader = runner.build_detection_train_loader(cfg) else: assert len(cfg.DATASETS.TEST) > 0, cfg.DATASETS.TEST data_loader = runner.build_detection_test_loader( cfg, cfg.DATASETS.TEST[0]) TOTAL_BENCHMARK_TIME = (100 if get_launch_environment() == "local" else 600 ) # run benchmark for 10 min LOGGING_METER_WINDOW_SIZE = 20 LOGGING_METER_TIME_INTERVAL = 5 WARMUP_ITERS = 5 # initialize time_per_iter = HistoryBuffer(max_length=10000) total_time = 0 start = time.time() for no, batch in enumerate(data_loader): data_time = time.time() - start time_per_iter.update(data_time) total_time += data_time if no == 0: logger.info("Show the first batch as example:\n{}".format(batch)) # Assume batch size is constant batch_size = cfg.SOLVER.IMS_PER_BATCH // comm.get_world_size() assert len(batch) * batch_size median = time_per_iter.median(window_size=LOGGING_METER_WINDOW_SIZE) avg = time_per_iter.avg(window_size=LOGGING_METER_WINDOW_SIZE) log_every_n_seconds( logging.INFO, "iter: {};" " recent per-iter seconds: {:.4f} (avg) {:.4f} (median);" " recent per-image seconds: {:.4f} (avg) {:.4f} (median).".format( no, avg, median, avg / batch_size, median / batch_size, ), n=LOGGING_METER_TIME_INTERVAL, ) # Synchronize between processes, exit when all processes are running for enough # time. This mimic the loss.backward(), the logged time doesn't include the time # for synchronize. finished = comm.all_gather(total_time >= TOTAL_BENCHMARK_TIME) if all(x for x in finished): logger.info( "Benchmarking finished after {} seconds".format(total_time)) break start = time.time() dataset_name = ":".join( cfg.DATASETS.TRAIN) if is_train else cfg.DATASETS.TEST[0] time_per_iter = [x[0] for x in time_per_iter.values()] time_per_iter = time_per_iter[ min(WARMUP_ITERS, max(len(time_per_iter) - WARMUP_ITERS, 0)):] results = { "environment": { "num_workers": cfg.DATALOADER.NUM_WORKERS, "world_size": comm.get_world_size(), "processes_per_machine": get_num_processes_per_machine(), }, "main_processes_stats": { "batch_size_per_process": batch_size, "per_iter_avg": np.average(time_per_iter), "per_iter_p1": np.percentile(time_per_iter, 1, interpolation="nearest"), "per_iter_p10": np.percentile(time_per_iter, 10, interpolation="nearest"), "per_iter_p50": np.percentile(time_per_iter, 50, interpolation="nearest"), "per_iter_p90": np.percentile(time_per_iter, 90, interpolation="nearest"), "per_iter_p99": np.percentile(time_per_iter, 99, interpolation="nearest"), "per_image_avg": np.average(time_per_iter) / batch_size, "per_image_p1": np.percentile(time_per_iter, 1, interpolation="nearest") / batch_size, "per_image_p10": np.percentile(time_per_iter, 10, interpolation="nearest") / batch_size, "per_image_p50": np.percentile(time_per_iter, 50, interpolation="nearest") / batch_size, "per_image_p90": np.percentile(time_per_iter, 90, interpolation="nearest") / batch_size, "per_image_p99": np.percentile(time_per_iter, 99, interpolation="nearest") / batch_size, }, "data_processes_stats": {}, # TODO: add worker stats } # Metrics follows the hierarchy of: name -> dataset -> task -> metrics -> number metrics = {"_name_": {dataset_name: results}} print_metrics_table(metrics) return { "accuracy": metrics, "metrics": metrics, }
def all_gather(data, group=None): global _USE_HVD if _USE_HVD: return all_gather_hvd(data, group=group) return comm.all_gather(data, group=group)