def process(self, inputs, outputs): for input, output in zip(inputs, outputs): prediction = {"image_id": input["image_id"]} instances = output["instances"].to(self._cpu_device) prediction["instances"] = instances_to_coco_json( instances, input["image_id"]) self._predictions.append(prediction)
def process(self, inputs, outputs): """ Args: inputs: the inputs to a COCO model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a COCO model. It is a list of dicts with key "instances" that contains :class:`Instances`. """ for input, output in zip(inputs, outputs): prediction = { "image_id": input["image_id"], "file_name": input['file_name'] } instances = output["instances"].to(self._cpu_device) prediction["instances"] = instances_to_coco_json( instances, input["image_id"]) for x in prediction["instances"]: x['file_name'] = input['file_name'] # if len(prediction['instances']) == 0: # self._logger.info("No prediction for {}".format(x['file_name'])) # prediction['instances'] = [ # {'file_name': input['file_name'], # ''}] self._predictions.append(prediction)
def process(self, inputs, outputs): for inp, out in zip(inputs, outputs): prediction = {"image_id": inp["image_id"]} # TODO this is ugly if "instances" in out: instances = out["instances"].to(self._cpu_device) prediction["instances"] = instances_to_coco_json( instances, inp["image_id"]) if "proposals" in out: prediction["proposals"] = out["proposals"].to(self._cpu_device) self._predictions.append(prediction)
def create_new_dataset(self, fused_results: List[Dict]): """ Args: fused_results: the predictions results on the oracle dataset with """ super().create_new_dataset() image_scores = [fs['image_score'] for fs in fused_results] selected_image_ids = [] selected_annotations = [] allocated_budget = self.budget.allocate(self._round) used_budget = 0 labeling_history = [] if self.sampling_method == 'top': sorted_image_scores = np.argsort(image_scores).tolist() while allocated_budget>used_budget and sorted_image_scores!=[]: idx = sorted_image_scores.pop() image_id = fused_results[idx]['image_id'] instances = fused_results[idx]['instances'] annotations = instances_to_coco_json(instances, image_id) selected_image_ids.append(image_id) selected_annotations.extend(annotations) # Currently, there will be an 'score' field in each of the # annotations, and it will be saved in the JSON. The existence # of this field won't affect the coco loading, and will make # it easier to compute the score. cur_cost = fused_results[idx]['labeled_inst_from_gt'] + \ self.budget.eta * fused_results[idx]['recovered_inst'] used_budget += round(cur_cost) labeling_history.append({ "image_id": fused_results[idx]['image_id'], "labeled_inst_from_gt":fused_results[idx]['labeled_inst_from_gt'], "used_inst_from_pred": fused_results[idx]['dropped_inst_from_pred'], "recovered_inst": fused_results[idx]['recovered_inst'] }) else: raise NotImplementedError self.create_dataset_with_annotations(selected_annotations, selected_image_ids, labeling_history, num_objects=round(used_budget)) dataset_eval = self.evaluate_merged_dataset(self._round) pd.Series(dataset_eval).to_csv(self.cur_dataset_jsonpath.replace('.json', 'eval.csv'))
def process(self, output): prediction = {"0": {}, "1": {}} tmp_instances = {"0": {}, "1": {}} for i in range(2): if "instances" in output[str(i)]: instances = output[str(i)]["instances"].to(self._cpu_device) prediction[str(i)]["instances"] = instances_to_coco_json( instances, "demo") prediction[str(i)]["pred_plane"] = output[str( i)]["instances"].pred_plane.to(self._cpu_device) tmp_instances[str(i)]["embeddingbox"] = { "pred_boxes": instances.pred_boxes, "scores": instances.scores, } if "proposals" in output[str(i)]: prediction[str(i)]["proposals"] = output[str( i)]["proposals"].to(self._cpu_device) if output["depth"][str(i)] is not None: prediction[str(i)]["pred_depth"] = output["depth"][str(i)].to( self._cpu_device) xyz = self.depth2XYZ(output["depth"][str(i)]) prediction[str(i)] = self.override_depth( xyz, prediction[str(i)]) if self._embedding_on: if "pred_aff" in output: tmp_instances["pred_aff"] = output["pred_aff"].to( self._cpu_device) if "geo_aff" in output: tmp_instances["geo_aff"] = output["geo_aff"].to( self._cpu_device) if "emb_aff" in output: tmp_instances["emb_aff"] = output["emb_aff"].to( self._cpu_device) prediction["corrs"] = tmp_instances if self._camera_on: camera_dict = { "logits": { "tran": output["camera"]["tran"].to(self._cpu_device), "rot": output["camera"]["rot"].to(self._cpu_device), }, "logits_sms": { "tran": softmax(output["camera"]["tran"].to(self._cpu_device)), "rot": softmax(output["camera"]["rot"].to(self._cpu_device)), }, } prediction["camera"] = camera_dict return prediction
def main(args): logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) predictor = DefaultPredictor(cfg) cpu_device = torch.device("cpu") metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") if args.input_file: with Path(args.input_file).open() as file: image_names = [ str(Path(session) / "lri_1refl" / "image_COMBINED.png") for session in map(str.strip, file) if session ] output_folder = Path(args.output) output_folder.mkdir(exist_ok=True, parents=True) for path in tqdm.tqdm(image_names, disable=not args.output): img = read_image(path, format="BGR") start_time = time.time() predictions = predictor(img) num_predictions = len(predictions["instances"]) time_spent = time.time() - start_time logger.info( f"{path}: detected {num_predictions} instances in {time_spent:.2f}s" ) instances = predictions["instances"].to(cpu_device) out_i_folder = output_folder / Path(path).parents[1].name out_i_folder.mkdir(exist_ok=True, parents=True) output_json_file = out_i_folder / "result.json" results = instances_to_coco_json(instances, -1) with output_json_file.open("w") as f: json.dump(results, f) if args.plot_output: out_filename = out_i_folder / "predicted.png" visualizer = Visualizer(img, metadata) vis_output = visualizer.draw_instance_predictions( predictions=instances) vis_output.save(str(out_filename))
def process(self, inputs, outputs): """ Override the base process method. This provides the same processing with the addition of the filter step """ for input, output in zip(inputs, outputs): prediction = {"image_id": input["image_id"]} # TODO this is ugly if "instances" in output: instances = output["instances"].to(self._cpu_device) instances = instances_to_coco_json(instances, input["image_id"]) instances = self.filter_instances( input["image_id"], (input['width'], input['height']), instances) prediction["instances"] = instances if "proposals" in output: prediction["proposals"] = output["proposals"].to( self._cpu_device) self._predictions.append(prediction)
if w_img < 1 or h_img < 1: continue # use PIL, to be consistent with evaluation cv_img = cv2.imread(image_path) img = read_image(image_path, format="BGR") start_time = time.time() # predictions, _ = demo.run_on_image(img) predictions = predictor(img) logger.info("{}: detected {} instances in {:.2f}s".format( image_path, len(predictions["instances"]), time.time() - start_time)) if "instances" in predictions: instances = predictions["instances"].to(_cpu_device) predictions["instances"] = instances_to_coco_json( instances, img_id) else: raise NotImplementedError for result in predictions["instances"]: category_id = result["category_id"] # assert category_id < num_classes, ( # f"A prediction has class={category_id}, " # f"but the dataset only has {num_classes} classes and " # f"predicted class id should be in [0, {num_classes - 1}]." # ) result["category_id"] = reverse_id_mapping[category_id] # apply grabcut algorithm to refine the masks fgModel = np.zeros((1, MODEL_MEM_ALLOT), dtype="float") bgModel = np.zeros((1, MODEL_MEM_ALLOT), dtype="float")
def process(self, inputs, outputs): """ Args: inputs: the inputs to a model (e.g., GeneralizedRCNN). It is a list of dict. Each dict corresponds to an image and contains keys like "height", "width", "file_name", "image_id". outputs: the outputs of a model. It is a list of dicts with key "instances" that contains :class:`Instances`. """ for input, output in zip(inputs, outputs): prediction = {"0": {}, "1": {}} tmp_instances = {"0": {}, "1": {}} for i in range(2): # TODO this is ugly prediction[str(i)]["image_id"] = input[str(i)]["image_id"] prediction[str(i)]["file_name"] = input[str(i)]["file_name"] if "instances" in output[str(i)]: instances = output[str(i)]["instances"].to( self._cpu_device) prediction[str(i)]["instances"] = instances_to_coco_json( instances, input[str(i)]["image_id"]) tmp_instances[str(i)]["embeddingbox"] = { "pred_boxes": instances.pred_boxes, "scores": instances.scores, } if "proposals" in output[str(i)]: prediction[str(i)]["proposals"] = output[str( i)]["proposals"].to(self._cpu_device) if "annotations" in input[str(i)]: tmp_instances[str(i)]["gt_bbox"] = [ ann["bbox"] for ann in input[str(i)]["annotations"] ] if len(input[str(i)]["annotations"]) > 0: tmp_instances[str(i)]["gt_bbox"] = np.array( tmp_instances[str(i)]["gt_bbox"]).reshape( -1, 4) # xywh from coco original_mode = input[str( i)]["annotations"][0]["bbox_mode"] tmp_instances[str(i)]["gt_bbox"] = BoxMode.convert( tmp_instances[str(i)]["gt_bbox"], BoxMode(original_mode), BoxMode.XYXY_ABS, ) if hasattr(output[str(i)]["instances"], "pred_plane"): prediction[str(i)]["pred_plane"] = output[str( i)]["instances"].pred_plane.to( self._cpu_device) if output["depth"][str(i)] is not None: prediction[str(i)]["pred_depth"] = output["depth"][str( i)].to(self._cpu_device) xyz = self.depth2XYZ(output["depth"][str(i)]) prediction[str(i)] = self.override_offset( xyz, prediction[str(i)], output[str(i)]) depth_rst = get_depth_err( output["depth"][str(i)], input[str(i)]["depth"].to(self._device)) prediction[str(i)]["depth_l1_dist"] = depth_rst.to( self._cpu_device) if "pred_aff" in output: tmp_instances["pred_aff"] = output["pred_aff"].to( self._cpu_device) if "geo_aff" in output: tmp_instances["geo_aff"] = output["geo_aff"].to( self._cpu_device) if "emb_aff" in output: tmp_instances["emb_aff"] = output["emb_aff"].to( self._cpu_device) if "gt_corrs" in input: tmp_instances["gt_corrs"] = input["gt_corrs"] prediction["corrs"] = tmp_instances if "embedding" in self._plane_tasks: if self._eval_gt_box: aff_rst = get_affinity_label_score( tmp_instances, filter_iou=self._filter_iou, filter_score=self._filter_score, device=self._device, ) else: aff_rst = get_affinity_label_score( tmp_instances, hungarian_threshold=[], filter_iou=self._filter_iou, filter_score=self._filter_score, device=self._device, ) prediction.update(aff_rst) if "camera" in self._plane_tasks: camera_dict = { "logits": { "tran": output["camera"]["tran"].to(self._cpu_device), "rot": output["camera"]["rot"].to(self._cpu_device), }, "gts": { "tran": input["rel_pose"]["position"], "rot": input["rel_pose"]["rotation"], "tran_cls": input["rel_pose"]["tran_cls"], "rot_cls": input["rel_pose"]["rot_cls"], }, } prediction["camera"] = camera_dict self._predictions.append(prediction)