def create_panoptic_segmentation(img, cls_boxes, cls_segms, cls_keyps, thres=0.7): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) dataset = dummy_datasets.get_coco_dataset() ade_out = np.zeros(img.shape[:2], dtype="uint8") coco_out = np.zeros(img.shape[:2], dtype="uint8") inst_out = np.zeros(img.shape[:2], dtype="uint8") if segms is not None: masks = mask_util.decode(segms) cnt = 1 # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for i in sorted_inds: if boxes[i, -1] < thres: # Score too low continue mask = masks[..., i] mask = np.nonzero(mask) class_name = dataset.classes[classes[i]] ade_idx = ade20k_utils.category_to_idx(class_name) if ade_idx is not None: ade_out[mask] = ade_idx coco_out[mask] = i inst_out[mask] = cnt cnt += 1 out = np.stack([ade_out, coco_out, inst_out], axis=-1) return out
def detect(self, im, id): timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(id): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( self.model, im, None, timers=timers) self.logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): self.logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) imsw = vis_utils.vis_one_image_opencv(im, cls_boxes, cls_segms, cls_keyps, self.confidence, 2, show_box=True, dataset=self.dummy_coco_dataset, show_class=True) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) bboxes = BBoxDetArray() bboxes.header = std_msgs.msg.Header() if boxes is not None: for i in range(len(boxes)): box = boxes[i][0:4] score = boxes[i][4] cls = self.dummy_coco_dataset.classes[classes[i]] if (score >= self.confidence): bbox = BBox(box[0], box[1], box[2], box[3]) bbox_det = BBoxDet(bbox, score, cls) bboxes.bboxes.append(bbox_det) return imsw, bboxes
def run_model_cfg(args, im, check_blobs): workspace.ResetWorkspace() model, _ = load_model(args) with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = test_engine.im_detect_all( model, im, None, None, ) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) # sort the results based on score for comparision boxes, segms, keypoints, classes = _sort_results( boxes, segms, keypoints, classes) # write final results back to workspace def _ornone(res): return np.array(res) if res is not None else np.array([], dtype=np.float32) with c2_utils.NamedCudaScope(0): workspace.FeedBlob(core.ScopedName('result_boxes'), _ornone(boxes)) workspace.FeedBlob(core.ScopedName('result_segms'), _ornone(segms)) workspace.FeedBlob(core.ScopedName('result_keypoints'), _ornone(keypoints)) workspace.FeedBlob(core.ScopedName('result_classids'), _ornone(classes)) # get result blobs with c2_utils.NamedCudaScope(0): ret = _get_result_blobs(check_blobs) return ret
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.NUM_GPUS = 1 args.weights = cache_url(args.weights, cfg.DOWNLOAD_CACHE) assert_and_infer_cfg(cache_urls=False) model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() for root_dir_path_1, sub_dir_path_list_1, sub_file_path_list_1 in os.walk( args.im_or_folder): sub_dir_path_list_1 = sorted(sub_dir_path_list_1) for i, sub_dir_path_1 in enumerate(sub_dir_path_list_1): for root_dir_path_2, sub_dir_path_list_2, sub_file_path_list_2 in os.walk( os.path.join(root_dir_path_1, sub_dir_path_1)): sub_file_path_list_2 = sorted(sub_file_path_list_2) out_file = open( os.path.join(args.output_dir, sub_dir_path_1 + "_Det_ffasta.txt"), "wb") for img_idx, sub_file_path_2 in enumerate( sub_file_path_list_2): im = cv2.imread( os.path.join(root_dir_path_2, sub_file_path_2)) timers = defaultdict(Timer) t = time.time() if (img_idx + 1) % 1000 == 0: sys.stdout.write( "\rFinish {} images\n".format(img_idx + 1)) sys.stdout.flush() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) if isinstance(cls_boxes, list): cls_boxes, cls_segms, cls_keyps, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if cls_boxes is None or cls_boxes.shape[0] == 0: continue obj_idx = 0 for cls_box, cls in zip(cls_boxes, classes): if int(cls) != 3 and int(cls) != 6: continue out_file.write("{},{},{},{},{},{},{}\n".format( img_idx + 1, obj_idx + 1, cls_box[0], cls_box[1], cls_box[2] - cls_box[0], cls_box[3] - cls_box[1], cls_box[4])) obj_idx += 1 out_file.close() print("Finish {} / {} of video sequences".format( i + 1, len(sub_dir_path_list_1))) break
def mask_non_bbox(config, frame: FrameType, cls_segms: bytes) -> FrameType: cls_segms = pickle.loads(cls_segms) _, segms, _, _ = vis_utils.convert_from_cls_format([], cls_segms, None) if segms is not None and len(segms) > 0: masks = mask_util.decode(segms) sum_mask = np.zeros_like(frame)[..., 0] for mi in range(masks.shape[2]): sum_mask = np.logical_or(sum_mask, masks[:, :, mi]) idx = np.nonzero(np.invert(sum_mask)) img = frame.copy() img[idx[0], idx[1], :] = 0 return img
def get_result_json(boxes, segms, keypoints, thresh=0.7, dataset=None): if isinstance(boxes, list): boxes, segms, keypoints, classes = convert_from_cls_format( boxes, segms, keypoints) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return dataset_keypoints, _ = keypoint_utils.get_keypoints() if segms is not None: masks = mask_util.decode(segms) # Display in largest to smallest order to reduce occlusion areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) sorted_inds = np.argsort(-boxes[:, 4]) results = {'mask_rle': segms, 'objects': []} for i in sorted_inds: score = boxes[i, -1] if score < thresh: continue bbox = boxes[i, :4] class_idx = classes[i] class_text = dataset.classes[class_idx] mask_idx = i mask = masks[:, :, mask_idx] #kps = keypoints[i] _, contour, hier = cv2.findContours(mask.copy(), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE) contours = [c.reshape((-1, 2)).tolist() for c in contour] obj = { 'box': bbox.tolist(), 'class': class_text, 'mask_idx': mask_idx, 'contours': contours, 'score': float(score) } results['objects'].append(obj) return results
def _main(video_folder, download_path, output, redownload, model_config, write_videos, outputdir): if os.path.exists(output): with open(output, 'r') as fi: videos_information = json.load(fi) else: videos_information = {} if os.path.isdir(video_folder): detectron = Detectron(model_config) for file in os.listdir(video_folder): print('Bearbeite Video:',file) if os.path.isdir(os.path.join(video_folder,file)) and (file!='a5arrD39XjY.mp4') and (file not in videos_information): video_dimensions=None video_boxes=[] video_segments=[] video_information=[] frame_count=0 diff = 0 for filename in sorted(os.listdir(os.path.join(video_folder,file))): if filename.endswith('jpg'): found=False class_names = [] #print(os.path.join(video_folder, file,filename)) frame=cv2.imread(os.path.join(video_folder, file,filename)) if video_dimensions is None: video_dimensions = frame.shape[1], frame.shape[0] frame_boxes, frame_segments = detectron.infer_image(frame) frame_information = [] print('Nach infer:', type(frame_boxes),type(frame_segments)) if isinstance(frame_boxes, list): frame_boxes, frame_segments, _, classes = vis_utils.convert_from_cls_format(frame_boxes,frame_segments, None) print('Nach convert:', type(frame_boxes),type(frame_segments), classes) if frame_boxes is not None and frame_boxes.shape[0] != 0: video_area = video_dimensions[0] * video_dimensions[1] box_areas = (frame_boxes[:, 2] - frame_boxes[:, 0]) * (frame_boxes[:, 3] - frame_boxes[:, 1]) sorted_inds = np.argsort(-box_areas) print(box_areas, sorted_inds) for i in sorted_inds: try: class_name = detectron.get_class_name(classes[i]) if class_name != '__background__': class_names.append(class_name) except IndexError as e: log.error("Cannot get_class_name: %s", e) log.debug("sorted_inds: %s", sorted_inds) log.debug("box_areas: %s", box_areas) log.debug("frame_boxes: %s", frame_boxes) log.debug("frame_segments: %s", frame_segments) score = float(frame_boxes[i, -1]) if not(score < THRESHOLD or class_name == '__background__'): found=True log.debug("Frame %s: found class '%s' with score '%s'", frame_count, class_name, score) segment_area = int(mask_utils.area(frame_segments[i])) frame_information.append({ 'label': class_name, 'total_area': segment_area, 'percentage': float(segment_area) / float(video_area), 'score': score, 'bbox': frame_boxes[i, :4].astype(np.int).tolist() }) frame = detectron.vis_one_image_opencv(im=frame, boxes=frame_boxes[i], segms=frame_segments[i], class_str=class_name) else: log.debug("Found nothing in frame %s", frame_count) if found: img = cv2.resize(frame, video_dimensions, cv2.INTER_NEAREST) if not os.path.exists(os.path.join(outputdir, file)): os.makedirs(os.path.join(outputdir, file), 0o755) cv2.imwrite(os.path.join(outputdir, file, filename), img) with open(os.path.join(outputdir, file, (filename.split('.')[0]+'.json')), 'w') as fo: json.dump(frame_information, fo, indent=2) video_information.append(frame_information) frame_count+=1 print('video',file,'bearbeitet. Ergebisse:',video_dimensions,video_information) log.info("Write intermediate file") videos_information[file] = video_information with open(output, 'w') as fo: json.dump(videos_information, fo, indent=2) #videos = _download_videos(video_text_file=video_text_file, download_path=download_path, redownload=redownload) for idx, (video_id, video_file) in enumerate(videos, start=1): log.info("Video %s/%s: Start inference for video_id '%s' on file '%s'", idx, len(videos), video_id, video_file)
def _main(video_text_file, download_path, output, redownload, model_config, write_videos, save_res): outputname = video_text_file.split('/')[-1] + '_' + model_config.split( '/')[-1].split('.')[0] print(model_config) detectron = Detectron(model_config) if not os.path.exists(outputname + '_out/'): os.makedirs(outputname + '_out/', 0o755) if not os.path.exists(outputname + '_out/'): os.makedirs(outputname + '_out/cropped/', 0o755) imgs_information = {} #if os.path.exists(output): # with open(output, 'r') as fi: # videos_information = json.load(fi) imgs_kp = {} dfkps = pd.DataFrame(columns=[ 'Bild', 'Achse', 'Person', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 ]) for img in os.listdir(video_text_file): dataset_keypoints, _ = get_keypoints() kp_lines = kp_connections(dataset_keypoints) cmap = plt.get_cmap('rainbow') colors = [cmap(i) for i in np.linspace(0, 1, len(kp_lines) + 2)] log.info(img) frame = cv2.imread(video_text_file + "/" + img, 1) print('Bilder:', img) frame_boxes, frame_segments, frame_keypoints, frame_dimensions = detectron.infer_img( frame) frame_area = frame_dimensions[0] * frame_dimensions[1] frame_information = [] log.debug("frame_boxes: %s", frame_boxes) log.debug("frame_segments: %s", frame_segments) log.debug("frame_keypoints: %s", frame_keypoints) if isinstance(frame_boxes, list): frame_boxes, frame_segments, frame_keypoints, _ = vis_utils.convert_from_cls_format( frame_boxes, frame_segments, frame_keypoints) if frame_boxes is not None and frame_boxes.shape[0] != 0: print('lenframeboxes', len(frame_boxes)) sorted_inds = range(len(frame_boxes)) # for i in sorted_inds: # try: # class_name = detectron.get_class_name(i) # except IndexError as e: # log.error("Cannot get_class_name: %s", e) # log.debug("sorted_inds: %s", sorted_inds) # log.debug("frame_boxes: %s", frame_boxes) # log.debug("frame_segments: %s", frame_segments) # log.debug("score: %s", score) # # score = float(frame_boxes[i, -1]) # # if score < THRESHOLD or class_name == '__background__': # continue # log.debug("Frame %s: found class '%s' with score '%s'", img, class_name, score) # # frame_information.append({ # 'label': class_name, # 'total_area': str(frame_keypoints), # 'percentage': 0, # 'score': score, # 'bbox': frame_boxes[i, :4].astype(np.int).tolist() # }) # #print('schreibe in Pickle Bild:', img,frame_segments) #imgs_kp[str(img)]= {'kp':frame_keypoints, 'score':score,'bbox':frame_boxes,'segm':frame_segments} keypoints = frame_keypoints for i in range(len(frame_keypoints)): if (keypoints is not None and len(keypoints) > i) and (frame_boxes[i, -1] > thresh): print('Boundingbox', str(img), frame_boxes[i, 1], ':', frame_boxes[i, 3], ',', frame_boxes[i, 0], ':', frame_boxes[i, 2], frame.shape[0], frame.shape[1]) framecropped = frame[ int(frame_boxes[i, 1]):int(frame_boxes[i, 3]), int(frame_boxes[i, 0]):int(frame_boxes[i, 2])] #cv2.imwrite('messigray.png', framecropped) framex = int(frame_boxes[i, 0]) framey = int(frame_boxes[i, 1]) fig = plt.figure(frameon=False) fig.set_size_inches( float(framecropped.shape[1]) / dpi, float(framecropped.shape[0]) / dpi) print('framecropped:', framecropped.shape, float(framecropped.shape[1]) / dpi, float(framecropped.shape[0]) / dpi) ax = plt.Axes(fig, [0., 0., 1., 1.]) ax.axis('off') fig.add_axes(ax) #fig.savefig('test_' + str(img) + '.jpg', dpi=dpi) im2 = cv.cvtColor(framecropped, cv.COLOR_BGR2RGB) ax.imshow(im2) kps = keypoints[i] #print('Kps x', kps[0]) #print('Kps y', kps[1]) # print('kps', kps) ind = len(dfkps) dfkps.set_value(ind, 'Achse', 'x') dfkps.set_value(ind, 'Bild', img.split('/')[0]) dfkps.set_value(ind, 'Person', i) dfkps.set_value(ind + 1, 'Achse', 'y') dfkps.set_value(ind + 1, 'Bild', img.split('/')[0]) dfkps.set_value(ind + 1, 'Person', i) #fig.savefig('test_' + str(img) + '.jpg', dpi=dpi) for z in range(len(kps[1])): if 2 < kps[2][z]: dfkps.set_value(ind, z, kps[0][z]) dfkps.set_value(ind + 1, z, kps[1][z]) if save_res == 'True': #print(dfkps) plt.autoscale(False) for l in range(len(kp_lines)): i1 = kp_lines[l][0] i2 = kp_lines[l][1] if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: x = [kps[0, i1] - framex, kps[0, i2] - framex] y = [kps[1, i1] - framey, kps[1, i2] - framey] print('keypoint', l, ':', x, y) line = plt.plot(x, y) plt.setp(line, color=colors[l], linewidth=3.0, alpha=0.7) if kps[2, i1] > kp_thresh: plt.plot(kps[0, i1] - framex, kps[1, i1] - framey, '.', color=colors[l], markersize=3.0, alpha=0.7) if kps[2, i2] > kp_thresh: plt.plot(kps[0, i2] - framex, kps[1, i2] - framey, '.', color=colors[l], markersize=3.0, alpha=0.7) #fig.savefig('test_'+str(img)+'_'+str(l)+'.jpg', dpi=dpi) # add mid shoulder / mid hip for better visualization mid_shoulder = ( kps[:2, dataset_keypoints.index('right_shoulder')] + kps[:2, dataset_keypoints.index('left_shoulder')] ) / 2.0 sc_mid_shoulder = np.minimum( kps[2, dataset_keypoints.index('right_shoulder')], kps[2, dataset_keypoints.index('left_shoulder')]) mid_hip = ( kps[:2, dataset_keypoints.index('right_hip')] + kps[:2, dataset_keypoints.index('left_hip')]) / 2.0 sc_mid_hip = np.minimum( kps[2, dataset_keypoints.index('right_hip')], kps[2, dataset_keypoints.index('left_hip')]) if (sc_mid_shoulder > kp_thresh and kps[2, dataset_keypoints.index('nose')] > kp_thresh): x = [ mid_shoulder[0] - framex, kps[0, dataset_keypoints.index('nose')] - framex ] y = [ mid_shoulder[1] - framey, kps[1, dataset_keypoints.index('nose')] - framey ] line = plt.plot(x, y) print(x, y) plt.setp(line, color=colors[len(kp_lines)], linewidth=3.0, alpha=0.7) if sc_mid_shoulder > kp_thresh and sc_mid_hip > kp_thresh: x = [mid_shoulder[0] - framex, mid_hip[0] - framex] y = [mid_shoulder[1] - framey, mid_hip[1] - framey] print(x, y) line = plt.plot(x, y) plt.setp(line, color=colors[len(kp_lines) + 1], linewidth=3.0, alpha=0.7) output_name = os.path.basename(img) + str( i) + '_kp.jpg' size = fig.get_size_inches() * fig.dpi print(size) fig.savefig(os.path.join(outputname + '_out', '{}'.format(output_name)), dpi=dpi) plt.close('all') #dfkps.to_csv(outputname + '_dfkps.csv', sep='\t') dfkps.to_pickle(outputname + '_dfkps.p') else: log.debug("Found nothing in picture %s", img) imgs_information[img] = frame_information log.info("Write intermediate file") with open(output, 'w') as fo: json.dump(imgs_information, fo, indent=2) pickle.dump(imgs_kp, open(outputname + "_kps.p", "wb"))
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--detectron-dir', required=True) parser.add_argument('--output-dir', required=True) parser.add_argument( '--recursive', action='store_true', help="""Search recursively in detectron-dir for pickle files. Any subdirectory containing a pickle file is considered to be a sequence.""") parser.add_argument( '--detectron-dataset', default='coco', choices=['coco']) args = parser.parse_args() detectron_dir = Path(args.detectron_dir) output_dir = Path(args.output_dir) output_dir.mkdir(exist_ok=True, parents=True) setup_logging(str(output_dir / (Path(__file__).stem + '.log'))) input_sequences = set(x.parent for x in detectron_dir.rglob('*.pickle')) logging.info('Input sequences: %s' % pformat(map(str, input_sequences))) label_list = get_classes(args.detectron_dataset) for sequence_path in tqdm(input_sequences): output_path = output_dir / ( sequence_path.relative_to(detectron_dir)).with_suffix('.txt') detections = {} for pickle_path in sequence_path.glob('*.pickle'): timestamp = int(pickle_path.stem) with open(pickle_path, 'rb') as f: data = pickle.load(f) boxes, _, _, labels = convert_from_cls_format( data['boxes'], data['segmentations'], data['keypoints']) detections[timestamp] = [ Detection(box[:4], box[4], label, timestamp) for box, label in zip(boxes, labels) if label_list[label] == 'person' ] output_str = '' for frame, frame_detections in sorted( detections.items(), key=lambda x: x[0]): for detection in frame_detections: x0, y0, x1, y1 = detection.box width = x1 - x0 height = y1 - y0 output_str += DETECTION_FORMAT.format( frame=frame, track_id=-1, left=x0, top=y0, width=width, height=height, conf=detection.score, x=-1, y=-1, z=-1) with open(output_path, 'w') as f: f.write(output_str)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] train_cfg = open(args.cfg, 'r').read() num_classes = yaml.load(train_cfg)['MODEL']['NUM_CLASSES'] infer_cfg = open('/detectron/tools/infer_list.yaml', 'r').read() infer_list = yaml.load(infer_cfg)['thresholds'] if (num_classes == len(dummy_coco_dataset.classes)) & (len( dummy_coco_dataset.classes) == len(infer_list) + 1): if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open( os.path.join(args.output_dir, "output_%s.csv" % dummy_coco_dataset.classes[1]), "w") as csvfile: writer = csv.writer(csvfile) writer.writerow( ["pic_name", "xmin", "ymin", "xmax", "ymax", "class", "score"]) for i, im_name in enumerate(im_list): logger.info('No.{} pic ({})starts predict'.format(i, im_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max( boxes[:, 4]) < min(infer_list.values()): continue areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) for j in sorted_inds: bbox = boxes[j, :4] score = boxes[j, -1] class_text = dummy_coco_dataset.classes[classes[ j]] if dummy_coco_dataset is not None else 'id{:d}'.format( classes[j]) current_thresh = infer_list[class_text] if score < current_thresh: continue else: writer.writerow([ os.path.basename(im_name).split('.')[0], bbox[0], bbox[1], bbox[2], bbox[3], class_text, score ]) csvfile.close() else: logger.info('!!!!!!!wrong categories num')
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg() dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] #Sort frames by number im_list = list(im_list) im_list.sort() json_output = [] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes, cls_segms, cls_keyps = infer_engine.im_detect_all( model, im, None, timers=timers) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is None: boxes = [] else: boxes = boxes.tolist() json_output.append({'frame': i, 'boxes': boxes}) # Skip writing PDF output # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dummy_coco_dataset, # box_alpha=0.3, # show_class=True, # thresh=0.7, # kp_thresh=2 # ) with open(args.output_dir + '/boxes.json', 'w') as outfile: json.dump(json_output, outfile, indent=4)
def main(args): logger = logging.getLogger(__name__) merge_cfg_from_file(args.cfg) cfg.TEST.WEIGHTS = args.weights cfg.NUM_GPUS = 1 assert_and_infer_cfg() model = infer_engine.initialize_model_from_cfg(args.weights) dummy_coco_dataset = dummy_datasets.get_coco_dataset() if os.path.isdir(args.im_or_folder): im_list = glob.iglob(args.im_or_folder + '/*.' + args.image_ext) else: im_list = [args.im_or_folder] for i, im_name in enumerate(im_list): out_name = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name) + '.pdf')) logger.info('Processing {} -> {}'.format(im_name, out_name)) im = cv2.imread(im_name) h, w = im.shape[:2] subimages = [] for x in range(3): for y in range(3): x1, y1 = x * h // 4, y * w // 4 x2, y2 = (x + 2) * h // 4, (y + 2) * w // 4 subimages.append([x1, y1, x2, y2]) timers = defaultdict(Timer) t = time.time() with c2_utils.NamedCudaScope(0): cls_boxes = [] cls_segms = [] cls_keyps = [] for index in range(len(subimages)): x1, y1, x2, y2 = subimages[index] _cls_boxes, _cls_segms, _cls_keyps = infer_engine.im_detect_all( model, im[x1:x2, y1:y2, :], None, timers=timers) cls_boxes.append(_cls_boxes) cls_segms.append(_cls_segms) cls_keyps.append(_cls_keyps) logger.info('Inference time: {:.3f}s'.format(time.time() - t)) if i == 0: logger.info( ' \ Note: inference on the first image will be slower than the ' 'rest (caches and auto-tuning need to warm up)') t = time.time() out_name_yml = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name)[:-4] + '.yml')) _mask = np.zeros((h, w), dtype=np.uint8) all_boxes = np.zeros((0, 5)) all_classes = [] all_segs = [] for index in range(len(subimages)): x1, y1, x2, y2 = subimages[index] boxes, segms, keyps, classes = vis_utils.convert_from_cls_format( cls_boxes[index], cls_segms[index], cls_keyps[index]) if boxes is None: continue for i in range(boxes.shape[0]): _tmp = np.zeros((h, w), dtype=np.uint8, order='F') __segm = mask_util.decode(segms[i]) _tmp[x1:x2, y1:y2] = __segm __tmp = mask_util.encode(_tmp) all_segs.append(__tmp) _mask[x1:x2, y1:y2] += __segm all_classes.append(classes[i]) boxes[:, 0] += y1 boxes[:, 2] += y1 boxes[:, 1] += x1 boxes[:, 3] += x1 all_boxes = np.vstack((all_boxes, boxes)) _mask = _mask.astype(bool).astype(int) out_name_mask = os.path.join( args.output_dir, '{}'.format(os.path.basename(im_name)[:-4] + '.png')) cv2.imwrite(out_name_mask, _mask * 255) with open(out_name_yml, 'w') as outfile: yaml.dump( { 'boxes': all_boxes, 'segms': all_segs, 'classes': all_classes }, outfile, default_flow_style=False) logger.info('Saving time: {:.3f}s'.format(time.time() - t)) for k, v in timers.items(): logger.info(' | {}: {:.3f}s'.format(k, v.average_time))
def main(): """main function""" if not torch.cuda.is_available(): sys.exit("Need a CUDA device to run the code.") args = parse_args() print('Called with args:') print(args) assert args.image_dir or args.images assert bool(args.image_dir) ^ bool(args.images) if args.dataset.startswith("coco"): dataset = datasets.get_coco_dataset() cfg.MODEL.NUM_CLASSES = len(dataset.classes) elif args.dataset.startswith("keypoints_coco"): dataset = datasets.get_coco_dataset() cfg.MODEL.NUM_CLASSES = 2 elif args.dataset.startswith("gangjin"): dataset = datasets.get_gangjin_dataset() cfg.MODEL.NUM_CLASSES = len(dataset.classes) else: raise ValueError('Unexpected dataset name: {}'.format(args.dataset)) print('load cfg from file: {}'.format(args.cfg_file)) cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 'Exactly one of --load_ckpt and --load_detectron should be specified.' cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False # Don't need to load imagenet pretrained weights assert_and_infer_cfg() maskRCNN = Generalized_RCNN() if args.cuda: maskRCNN.cuda() if args.load_ckpt: load_name = args.load_ckpt print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) net_utils.load_ckpt(maskRCNN, checkpoint['model']) if args.load_detectron: print("loading detectron weights %s" % args.load_detectron) load_detectron_weight(maskRCNN, args.load_detectron) maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], minibatch=True, device_ids=[0]) # only support single GPU maskRCNN.eval() if args.image_dir: imglist = misc_utils.get_imagelist_from_dir(args.image_dir) else: imglist = args.images num_images = len(imglist) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) img_ids = [] rects = [] for i in range(num_images): print('img', i) im = cv2.imread(imglist[i]) assert im is not None timers = defaultdict(Timer) cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers) boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps) if boxes is not None: for j in range(len(boxes)): # print(boxes[j][-1]) if float(boxes[j][-1]) < 0.99: # 阀值 continue xmin = float(boxes[j, 0]) xmax = float(boxes[j, 2]) ymin = float(boxes[j, 1]) ymax = float(boxes[j, 3]) img_ids.append(os.path.basename(imglist[i])) rects.append( str(xmin) + " " + str(ymin) + " " + str(xmax) + " " + str(ymax)) # im_name, _ = os.path.splitext(os.path.basename(imglist[i])) # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dataset, # box_alpha=0.3, # show_class=False, # thresh=0.99, # kp_thresh=2, # ext="jpg" # ) result_dict = {"ID": img_ids, "rects": rects} import pandas as pd result = pd.DataFrame.from_dict(result_dict) result.to_csv('submit/submit1.csv', header=None, index=False)
def main(): """main function""" if not torch.cuda.is_available(): sys.exit("Need a CUDA device to run the code.") args = parse_args() print('Called with args:') print(args) assert args.image_dir or args.images assert bool(args.image_dir) ^ bool(args.images) prefix_path = args.output_dir os.makedirs(prefix_path, exist_ok=True) if args.dataset.startswith("coco"): dataset = datasets.get_coco_dataset() cfg.MODEL.NUM_CLASSES = len(dataset.classes) elif args.dataset.startswith("keypoints_coco"): dataset = datasets.get_coco_dataset() cfg.MODEL.NUM_CLASSES = 2 else: raise ValueError('Unexpected dataset name: {}'.format(args.dataset)) print('load cfg from file: {}'.format(args.cfg_file)) cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 'Exactly one of --load_ckpt and --load_detectron should be specified.' cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False # Don't need to load imagenet pretrained weights assert_and_infer_cfg() maskRCNN = Generalized_RCNN() if args.cuda: maskRCNN.cuda() if args.load_ckpt: load_name = args.load_ckpt print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) net_utils.load_ckpt(maskRCNN, checkpoint['model']) if args.load_detectron: print("loading detectron weights %s" % args.load_detectron) load_detectron_weight(maskRCNN, args.load_detectron) maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], minibatch=True, device_ids=[0]) # only support single GPU maskRCNN.eval() if args.image_dir: imglist = misc_utils.get_imagelist_from_dir(args.image_dir) else: imglist = args.images num_images = len(imglist) writen_results = [] # validate demo_im = cv2.imread(imglist[0]) print(np.shape(demo_im)) h, w, _ = np.shape(demo_im) #print(h) #print(args.height) assert h == args.height assert w == args.width h_scale = 720 / args.height w_scale = 1280 / args.width for i in tqdm(range(num_images)): im = cv2.imread(imglist[i]) assert im is not None timers = defaultdict(Timer) cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers) im_name, _ = os.path.splitext(os.path.basename(imglist[i])) # boxs = [[x1, y1, x2, y2, cls], ...] boxes, _, _, classes = convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if boxes is None: continue # scale boxes[:, 0] = boxes[:, 0] * w_scale boxes[:, 2] = boxes[:, 2] * w_scale boxes[:, 1] = boxes[:, 1] * h_scale boxes[:, 3] = boxes[:, 3] * h_scale if classes == []: continue for instance_idx, cls_idx in enumerate(classes): cls_name = dataset.classes[cls_idx] if cls_name == 'motorcycle': cls_name = 'motor' elif cls_name == 'stop sign': cls_name = 'traffic sign' elif cls_name == 'bicycle': cls_name = 'bike' if cls_name not in bdd_category: continue writen_results.append({ "name": imglist[i].split('/')[-1], "timestamp": 1000, "category": cls_name, "bbox": boxes[instance_idx, :4], "score": boxes[instance_idx, -1] }) with open(os.path.join(prefix_path, args.name + '.json'), 'w') as outputfile: json.dump(writen_results, outputfile, cls=MyEncoder)
def infere(self, image, imageId=None, thresh=0.5, debug=False, pixel_size=(0.3, 0.3)): assert image is not None timers = defaultdict(Timer) cls_boxes, cls_segms, cls_keyps = im_detect_all(self.maskRCNN, image, timers=timers) if isinstance(cls_boxes, list): boxes, segms, keypoints, classes = vis_utils.convert_from_cls_format( cls_boxes, cls_segms, cls_keyps ) #self.convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if boxes is None or boxes.shape[0] == 0 or max(boxes[:, 4]) < thresh: return [] if segms is not None: masks = mask_util.decode(segms) result = [] #print("Number of boxes=",len(boxes)) #print("Boxes",boxes) #print("Boxes Shape=",boxes.shape) #masks = masks.T #print("Mask Shape=",masks.shape) areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) sorted_inds = np.argsort(-areas) #sorted_inds = areas #for i in range(len(sorted_inds)) for i in sorted_inds: bbox = boxes[i, :4] score = boxes[i, -1] if score < thresh: continue class_id = i label = self.dataset.classes[classes[i]] area, area_m2, perimeter, cv2Poly = self.getMaskInfo( masks[:, :, i], image.shape, pixel_size=pixel_size) #masks[i].T, kernel=(10, 10) if cv2Poly is None: #print("Warning: Object is recognized, but contour is empty!") continue verts = cv2Poly[:, 0, :] r = { 'classId': class_id, 'score': score, 'label': label, 'area': area, 'area_m2': area_m2, 'perimetr': perimeter, 'verts': verts } if imageId is not None: r['objId'] = "{}_obj-{}".format(imageId, i) result.append(r) return result
def track(frame_paths, frame_detections, tracking_params, progress=True, filter_label=None): """ Args: frame_paths (list): List of paths to frames. frame_detections (list): List of detection results for each frame. Each element is a dictionary containing keys 'boxes', 'masks', and 'keypoints'. tracking_params (dict): See add_tracking_arguments() for details. label_list (list): List of label names. filter_label (str): """ all_tracks = [] # detections[i] contains list of Detections for frame_paths[i] detections = [] dummy_image = None for t, (frame_path, image_results) in enumerate(zip(frame_paths, frame_detections)): if tracking_params['appearance_feature'] == 'none': if dummy_image is None: dummy_image = np.zeros_like(cv2.imread(str(frame_path))) image = dummy_image else: image = cv2.imread(str(frame_path))[:, :, ::-1] # BGR -> RGB boxes, masks, _, labels = vis.convert_from_cls_format( image_results['boxes'], image_results['segmentations'], image_results['keypoints']) if boxes is None: logging.info('No predictions for image %s', frame_path.name) boxes, masks = [], [] if ('features' in image_results and tracking_params['appearance_feature'] == 'mask'): # features are of shape (num_segments, d) features = list(image_results['features']) else: features = [None for _ in masks] current_detections = [] for box, mask, label, feature in zip(boxes, masks, labels, features): low_scoring = box[4] <= tracking_params['score_continue_min'] label_mismatch = filter_label is not None and label != filter_label if low_scoring or label_mismatch: continue current_detections.append( Detection(box[:4], box[4], label, t, image, mask, feature)) detections.append(current_detections) if tracking_params['bidirectional']: directions = ['forward', 'backward'] else: directions = ['forward'] for direction in directions: forward = direction == 'forward' timestamps = range(len(frame_paths)) if not forward: timestamps = reversed(timestamps) for t in tqdm(timestamps, disable=not progress, total=len(frame_paths), desc='track ' + direction): frame_path = frame_paths[t] frame_detections = [d for d in detections[t] if not d.tracked()] if not frame_detections: continue for track in all_tracks: for detection in track.detections: detection.clear_cache() if forward: active_tracks = [ track for track in all_tracks if ((t - track.detections[-1].timestamp ) <= tracking_params['frames_skip_max']) ] else: active_tracks = [] for track in all_tracks: # Keep tracks that # (1) end at or after t, # (2) start before t + frames_skip_max # (3) don't have a detection at time t ends_after = track.detections[-1].timestamp > t starts_before_skip = (track.detections[0].timestamp < t + tracking_params['frames_skip_max']) needs_detection = t not in track.detections_by_time if ends_after and starts_before_skip and needs_detection: active_tracks.append(track) if not active_tracks: continue matched_tracks = match_detections(active_tracks, frame_detections, tracking_params, backward=not forward) # Tracks that were assigned a detection in this frame. for detection in frame_detections: track = matched_tracks[detection.id] if track is None: if detection.score > tracking_params['score_init_min']: track = Track() all_tracks.append(track) else: continue track.add_detection(detection) for index, t in enumerate(all_tracks): t.friendly_id = index return all_tracks
def main(): # Use first line of file docstring as description if it exists. parser = argparse.ArgumentParser( description=__doc__.split('\n')[0] if __doc__ else '', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( '--davis-data-root', required=True, help='Directory containing a subdirectory for each sequence') parser.add_argument( '--davis-eval-root', required=True, help='DAVIS evaluation code root directory.') parser.add_argument( '--detectron-root', required=True, help=('Contains subdirectory for each sequence, containing pickle ' 'files of detectron outputs for each frame.')) parser.add_argument( '--set', choices=['train', 'val'], default='val') parser.add_argument('--output-dir', required=True) args = parser.parse_args() davis_eval_root = pathlib.Path(args.davis_eval_root) davis_root = pathlib.Path(args.davis_data_root) detectron_root = pathlib.Path(args.detectron_root) output_root = pathlib.Path(args.output_dir) output_root.mkdir(exist_ok=True) db_info_path = davis_eval_root / 'data' / 'db_info.yaml' assert db_info_path.exists(), ( 'DB info file (%s) does not exist' % db_info_path) with open(db_info_path, 'r') as f: davis_info = yaml.load(f) palette_path = davis_eval_root / 'data' / 'palette.txt' assert palette_path.exists(), ( 'DAVIS palette file (%s) does not exist' % palette_path) palette = np.loadtxt(palette_path, dtype=np.uint8).reshape(-1, 3) for sequence_info in davis_info['sequences']: if sequence_info['set'] != args.set: continue if sequence_info['year'] != 2016: continue sequence = sequence_info['name'] output_sequence = output_root / sequence output_sequence.mkdir(exist_ok=True) detectron_sequence = detectron_root / sequence davis_sequence = davis_root / sequence assert detectron_sequence.exists(), ( 'Detectron path %s does not exist' % detectron_sequence) assert davis_sequence.exists(), ( 'DAVIS path %s does not exist' % davis_sequence) detectron_frames = sorted( detectron_sequence.glob('*.pickle'), key=lambda x: int(x.stem)) davis_frames = sorted( davis_sequence.glob('*.png'), key=lambda x: int(x.stem)) num_frames = sequence_info['num_frames'] assert len(detectron_frames) == len(davis_frames) == num_frames for frame, detectron_path, davis_path in zip( range(num_frames), detectron_frames, davis_frames): output_frame = output_sequence / ('%05d.png' % frame) groundtruth = np.array(Image.open(davis_path)) # 255 is used as an 'unknown' object in 2017, but it is used as # the single object in 2016. Re-map it to '1', so that the rest # of the code works as with 2017, pretending we have a single # known object. groundtruth[groundtruth == 255] = 1 object_ids = get_unique_objects(groundtruth) groundtruth_masks = [groundtruth == i for i in object_ids] with open(detectron_path, 'rb') as f: data = pickle.load(f) predicted_boxes, predicted_masks, _, _ = ( vis.convert_from_cls_format( data['boxes'], data['segmentations'], data['keypoints'])) if not predicted_masks: final_mask = np.zeros( groundtruth_masks[0].shape, dtype=np.uint8) output = Image.fromarray(final_mask) output.putpalette(palette.ravel()) output.save(output_frame, format='png') continue # Can threshold scores if necessary # scores = predicted_boxes[:, -1] predicted_masks = mask_util.decode(predicted_masks) predicted_masks = [ predicted_masks[:, :, i] for i in range(predicted_masks.shape[2]) ] mask_distance = np.zeros( (len(groundtruth_masks), len(predicted_masks))) mask_iou = mask_util.iou( [mask_util.encode(p) for p in predicted_masks], [mask_util.encode(np.asfortranarray(g.astype('uint8'))) for g in groundtruth_masks], pyiscrowd=np.zeros(len(groundtruth_masks))) mask_distance = 1 - mask_iou # Array of length num_matches, containing tuples of # (predicted_mask_index, groundtruth_mask_index) assignments = list(zip(*linear_sum_assignment(mask_distance))) final_mask = np.zeros(groundtruth_masks[0].shape, dtype=np.uint8) for predicted_mask_index, groundtruth_id in assignments: predicted_mask = predicted_masks[predicted_mask_index] final_mask[predicted_mask != 0] = object_ids[groundtruth_id] output = Image.fromarray(final_mask) output.putpalette(palette.ravel()) output.save(output_frame, format='png')
def main(): """main function""" if not torch.cuda.is_available(): sys.exit("Need a CUDA device to run the code.") args = parse_args() print('Called with args:') print(args) assert args.image_dir or args.images assert bool(args.image_dir) ^ bool(args.images) prefix_path = args.output_dir + '_results' if os.path.exists(prefix_path): shutil.rmtree(prefix_path) os.mkdir(prefix_path) else: os.mkdir(prefix_path) if args.dataset.startswith("coco"): dataset = datasets.get_coco_dataset() cfg.MODEL.NUM_CLASSES = len(dataset.classes) elif args.dataset.startswith("keypoints_coco"): dataset = datasets.get_coco_dataset() cfg.MODEL.NUM_CLASSES = 2 else: raise ValueError('Unexpected dataset name: {}'.format(args.dataset)) print('load cfg from file: {}'.format(args.cfg_file)) cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 'Exactly one of --load_ckpt and --load_detectron should be specified.' cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False # Don't need to load imagenet pretrained weights assert_and_infer_cfg() maskRCNN = Generalized_RCNN() if args.cuda: maskRCNN.cuda() if args.load_ckpt: load_name = args.load_ckpt print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) net_utils.load_ckpt(maskRCNN, checkpoint['model']) if args.load_detectron: print("loading detectron weights %s" % args.load_detectron) load_detectron_weight(maskRCNN, args.load_detectron) maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], minibatch=True, device_ids=[0]) # only support single GPU maskRCNN.eval() if args.image_dir: imglist = misc_utils.get_imagelist_from_dir(args.image_dir) else: imglist = args.images num_images = len(imglist) for i in tqdm(range(num_images)): im = cv2.imread(imglist[i]) assert im is not None timers = defaultdict(Timer) cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers) im_name, _ = os.path.splitext(os.path.basename(imglist[i])) boxes, _, _, classes = convert_from_cls_format(cls_boxes, cls_segms, cls_keyps) if classes == []: continue voc_boxes = np.zeros_like(boxes) voc_boxes[:, 0:1] = boxes[:, 4:5] voc_boxes[:, 1:3] = boxes[:, 0:2] + 1 voc_boxes[:, 3:5] = boxes[:, 2:4] + 1 for instance_idx, cls_idx in enumerate(classes): cls_name = dataset.classes[cls_idx] if cls_name == 'motorcycle': cls_name = 'motorbike' f = open(os.path.join(prefix_path, cls_name + ".txt"), "a+") f.write("%s " % im_name) for item in voc_boxes[instance_idx]: f.write("%f " % item) f.write("\n") f.close()
def process_sequences(fbms_dir, detectron_dir, output_dir, save_images, detectron_threshold, iou_threshold): assert fbms_dir.exists() assert detectron_dir.exists() output_dir.mkdir(exist_ok=True) sequence_paths = list(fbms_dir.iterdir()) sequence_names = [x.name for x in sequence_paths] output_paths = [] for sequence, sequence_path in zip(tqdm(sequence_names), sequence_paths): groundtruth_path = sequence_path / 'GroundTruth' assert groundtruth_path.exists(), ('Path %s does not exists' % groundtruth_path) groundtruth = FbmsGroundtruth(groundtruth_path) frame_number_to_labels = groundtruth.frame_labels() detectron_paths = (detectron_dir / sequence).glob('*.pickle') detectron_paths = sorted(detectron_paths, key=lambda x: get_framenumber(x.stem)) final_masks = {} for frame_number, frame_labels in frame_number_to_labels.items(): groundtruth_masks = [] for color, region_id in groundtruth.color_to_region.items(): if region_id == 0: # ppms have full white (255 * 256**2 + 255 * 256 + 255) # as background, pgms have 0 as background. assert color == 16777215 or color == 0 continue # Ignore background groundtruth_masks.append(frame_labels == region_id) # Last frame may not have predictions, use second to last frame. if frame_number == len(detectron_paths): logging.info( ("No predictions found for frame %s in sequence %s, " "using previous frame (%s) instead.") % (frame_number, sequence, frame_number - 1)) frame_number -= 1 detectron_path = detectron_paths[frame_number] assert detectron_path.exists(), ('%s does not exist.' % detectron_path) with open(detectron_path, 'rb') as f: data = pickle.load(f) predicted_boxes, predicted_masks, _, _ = ( vis.convert_from_cls_format(data['boxes'], data['segmentations'], data['keypoints'])) if predicted_boxes is None: final_masks[frame_number] = np.zeros( groundtruth_masks[0].shape, dtype=np.uint8) continue scores = predicted_boxes[:, -1] if np.all(scores <= detectron_threshold): logging.info('No masks above threshold (%s) Using most ' 'confident mask only.' % detectron_threshold) predicted_masks = [predicted_masks[np.argmax(scores)]] else: predicted_masks = [ m for i, m in enumerate(predicted_masks) if scores[i] > detectron_threshold ] predicted_masks = mask_util.decode(predicted_masks) predicted_masks = [ predicted_masks[:, :, i] for i in range(predicted_masks.shape[2]) ] mask_distance = np.zeros( (len(groundtruth_masks), len(predicted_masks))) mask_iou = mask_util.iou( [mask_util.encode(p) for p in predicted_masks], [ mask_util.encode(np.asfortranarray(g.astype('uint8'))) for g in groundtruth_masks ], pyiscrowd=np.zeros(len(groundtruth_masks))) assert isinstance(mask_iou, np.ndarray), ( 'Unknown type of mask_iou (%s) for sequence %s, frame %s' % (type(mask_iou), sequence, frame_number)) filtered_prediction_indices = np.where( np.any(mask_iou >= iou_threshold, axis=1))[0] mask_iou = mask_iou[filtered_prediction_indices] filtered_predictions = [ predicted_masks[x] for x in filtered_prediction_indices ] mask_distance = 1 - mask_iou # Array of length num_matches, containing tuples of # (predicted_mask_index, groundtruth_mask_index) assignments = list(zip(*linear_sum_assignment(mask_distance))) final_mask = np.zeros(groundtruth_masks[0].shape, dtype=np.uint8) if False: from matplotlib import pyplot as plt plt.close() _, ax = plt.subplots(len(assignments), 2) plt.suptitle('Frame %s' % frame_number) for predicted_mask_index, groundtruth_id in assignments: predicted_mask = filtered_predictions[predicted_mask_index] final_mask[predicted_mask != 0] = groundtruth_id + 1 if False: ax[groundtruth_id, 0].imshow(groundtruth_masks[groundtruth_id]) ax[groundtruth_id, 0].title.set_text('Groundtruth') ax[groundtruth_id, 1].imshow(predicted_mask) ax[groundtruth_id, 1].title.set_text( 'Predicted; iou: %.4f' % (1 - mask_distance[predicted_mask_index, groundtruth_id])) if False: plt.show() final_masks[frame_number] = final_mask tracks = masks_to_tracks(final_masks) tracks_str = get_tracks_text(tracks, groundtruth.num_frames) output_file = output_dir / (sequence + '.dat') output_paths.append(output_file) with open(output_file, 'w') as f: f.write(tracks_str) if save_images: output_images = output_dir / (sequence + '-images') output_images.mkdir(exist_ok=True) colors = colormap() # list(range(0, 251, 25)) full_output = None for frame_number, frame_labels in frame_number_to_labels.items(): groundtruth_output = np.zeros( (frame_labels.shape[0], frame_labels.shape[1], 3)) predictions_output = np.zeros( (frame_labels.shape[0], frame_labels.shape[1], 3)) for color, region_id in groundtruth.color_to_region.items(): if region_id == 0: color = (255, 255, 255) else: color = colors[region_id - 1] groundtruth_output[frame_labels == region_id] = color predictions_output[final_masks[frame_number] == region_id] = (color) concatenated = np.hstack( (groundtruth_output, predictions_output)) if full_output is None: full_output = concatenated else: full_output = np.vstack((full_output, concatenated)) # imsave(output_images / ('groundtruth-%s.jpg' % frame_number), # groundtruth_output) # imsave(output_images / ('predictions-%s.jpg' % frame_number), # predictions_output) imsave(output_images / 'final.jpg', full_output) with open(output_dir / 'all_tracks.txt', 'w') as f: for output_path in output_paths: f.write(str(output_path.resolve()) + '\n') with open(output_dir / 'all_shots.txt', 'w') as f: f.write(str(len(sequence_paths)) + '\n') for sequence, sequence_path in zip(sequence_names, sequence_paths): groundtruth_path = sequence_path / 'GroundTruth' / (sequence + 'Def.dat') f.write(str(groundtruth_path.resolve()) + '\n')