def load_all_gt(video_list, det_dir): ''' Helper function. Load all detections (txt) and merge into single dict. ''' det_dict_all = {} for video_name in video_list: dets_file_name = osp.join(det_dir, video_name + '.txt') det_dict = face_utils.parse_wider_gt(dets_file_name) det_dict_all.update(det_dict) return det_dict_all
def convert_wider_annots(data_dir, out_dir, data_set='WIDER', conf_thresh=0.5): """Convert from WIDER FDDB-style format to COCO bounding box""" # http://cocodataset.org/#format-data: [x,w,width,height] json_name = 'wider_face_train_annot_coco_style.json' img_id = 0 ann_id = 0 cat_id = 1 print('Starting %s' % data_set) ann_dict = {} categories = [{"id": 1, "name": 'face'}] images = [] annotations = [] ann_file = os.path.join(data_dir, 'wider_face_train_annot.txt') wider_annot_dict = face_util.parse_wider_gt( ann_file) # [im-file] = [[x,y,w,h], ...] for filename in wider_annot_dict.keys(): if len(images) % 50 == 0: print("Processed %s images, %s annotations" % (len(images), len(annotations))) image = {} image['id'] = img_id img_id += 1 im = Image.open(os.path.join(data_dir, filename)) image['width'] = im.height image['height'] = im.width image['file_name'] = filename images.append(image) for gt_bbox in wider_annot_dict[filename]: ann = {} ann['id'] = ann_id ann_id += 1 ann['image_id'] = image['id'] ann['segmentation'] = [] ann['category_id'] = cat_id # 1:"face" for WIDER ann['iscrowd'] = 0 ann['area'] = gt_bbox[2] * gt_bbox[3] ann['bbox'] = gt_bbox annotations.append(ann) ann_dict['images'] = images ann_dict['categories'] = categories ann_dict['annotations'] = annotations print("Num categories: %s" % len(categories)) print("Num images: %s" % len(images)) print("Num annotations: %s" % len(annotations)) with open(os.path.join(out_dir, json_name), 'w', encoding='utf8') as outfile: outfile.write(json.dumps(ann_dict))
def convert_to_json(fddb_file, format='eval'): ds_json = { 'images': [], 'categories': [{ 'id': 1, 'name': 'person' }], 'annotations': [] } # If a dir, concatenate text files in it: all videos if os.path.isdir(fddb_file): txt_files = [ t for t in sorted(os.listdir(fddb_file)) if t.endswith('.txt') ] txt_files = txt_files[:num_vid] all_det = '' for t in txt_files: with open(os.path.join(fddb_file, t), 'r') as f: all_det += f.read() f.close() tempfile.tempdir = '/mnt/nfs/scratch1/pchakrabarty/' # set temp dir to scratch1 if no space left here with tempfile.NamedTemporaryFile(mode='w', delete=True) as tmp: tmp.write(all_det) ann = parse_wider_gt(tmp.name) else: # Parse fddb file to a dict ann = parse_wider_gt(fddb_file) bbox_id = 0 all_keys = list(ann.keys()) def has_dets(ann, fname, conf_thresh): if fname.endswith('.jpg'): frame_id = int(fname[(fname.index('_') + 1):-4]) else: frame_id = int(fname[(fname.index('_') + 1):]) #if int(fname[(fname.index('_')+1):]) > 1100: # BAD hack: to handle incomplete videos if frame_id > 1000: # BAD hack: to handle incomplete videos return False if len(ann[fname]) == 0: return False scores = np.array([b[4] for b in ann[fname]]) if np.sum(scores >= conf_thresh) == 0: return False return True # randomly sample 100k frames #all_keys = [k for k in all_keys if len(ann[k]) > 0] # keep only frames with detections #all_keys = [k for k in all_keys if has_dets(ann,k,conf_thresh)] # keep only frames with detections all_keys = [k for k in all_keys if has_dets(ann, k, conf_thresh) ] # keep only frames with detections """ # HACK to use the same frames as in a given HP json with open('data/bdd_jsons/bdd_HP18k.json','r') as f: d = json.load(f) f.close() all_keys = [os.path.split(img['file_name'])[-1] for img in d['images']] #print('>>>',all_keys[:10],ann.keys()) """ random.shuffle(all_keys) all_keys = all_keys[:num_samples] all_keys.sort() ##### end of sampling ##### all_files = [] for fid, filename in enumerate(all_keys): all_files.append(filename) w, h, c = (1024, 2048, 3) #(720,1280,3) if filename.endswith('.jpg'): im_file_name = filename else: im_file_name = filename + '.jpg' if len(os.path.split(im_file_name)) > 1: im_file_name = os.path.split(im_file_name)[-1] """ filename = 'frames/'+im_file_name.split('_')[0].strip()+'/'+im_file_name #BAD hack if not filename in ann: continue """ im_prop = { 'width': w, 'height': h, 'id': fid, #'file_name':filename+'.jpg' 'file_name': im_file_name } ds_json['images'].append(im_prop) bboxes = ann[filename] print('Reading:', filename) for bbox in bboxes: #input(str(bbox)) score = bbox[4] source = bbox[5] bbox[:4] = map(int, bbox[:4]) if format == 'coco': bbox = bbox[: 4] #throw away score for coco format. preserve to use for eval bbox_prop = { 'id': bbox_id, 'image_id': fid, 'segmentation': [], 'category_id': 1, 'iscrowd': 0, 'bbox': bbox, 'area': bbox[2] * bbox[3], 'score': score, 'source': source, } bbox_id += 1 if score < conf_thresh: continue ds_json['annotations'].append(bbox_prop) del ann[filename] #ds_json['annotations'] = ds_json['annotations'][:num_samples] print('Number of annotations:', len(ds_json['annotations'])) print('Total:', fid, 'files saved in JSON format') # saving final json with open(json_file, 'w') as f: json.dump(ds_json, f) f.close() if len(det_vid_dir) == 0: return # bin frames to corresponding videos print('Grouping video frames...') video_frames = bin_video_files(all_files) vid_list = list(video_frames.keys()) print('Total of ' + str(len(video_frames.items())) + ' videos with ' + str(np.sum([len(t) for _, t in video_frames.items()])) + ' frames') # get list of rotations vid_path_list = [os.path.join(det_vid_dir, v + '.mov') for v in vid_list] print('Making list of rotations for video files...') rot_dict = dict( zip(vid_list, list(map(_ffmpeg_extract_rotation, vid_path_list)))) print('...Done. ' + str(len(rot_dict.keys())) + ' rotations for ' + str(len(vid_list)) + ' videos') # save meta data print('Saving video metadata...') metadata_file = os.path.join(save_det_dir, 'metadata.pkl') with open(metadata_file, 'wb') as f: pickle.dump([vid_list, video_frames, rot_dict], f) f.close() print('..Done. Saved to:', metadata_file)
bbox = dets[i, :4] conf_score = dets[i, 4] x0, y0, x1, y1 = [int(x) for x in bbox] line_color = _GREEN cv2.rectangle(im_det, (x0, y0), (x1, y1), line_color, thickness=2) disp_str = '%d: %.2f' % (i, conf_score) face_utils._draw_string(im_det, (x0, y0), disp_str) return im_det if __name__ == '__main__': args = parse_args() det_dict = face_utils.parse_wider_gt(args.det_file) out_dir = osp.join(args.output_dir, osp.splitext(osp.basename(args.det_file))[0]) if not osp.exists(out_dir): os.makedirs(out_dir, exist_ok=True) i = 0 for (image_name, dets) in det_dict.items(): if len(dets) == 0: continue print(image_name) im = cv2.imread(osp.join(args.imdir, image_name)) assert im.size > 0 im_det = draw_detection_list(im, np.array(dets)) out_path = osp.join(out_dir, image_name.replace('/', '_'))
def convert_to_json(fddb_file,format='eval'): ds_json = {'images' : [], 'categories' : [{'id':1,'name':'person'}], 'annotations' : [] } # If a dir, concatenate text files in it: all videos if os.path.isdir(fddb_file): txt_files = [t for t in os.listdir(fddb_file) if t.endswith('.txt')][:15000] all_det = '' for t in txt_files: with open(os.path.join(fddb_file,t),'r') as f: all_det += f.read() f.close() with tempfile.NamedTemporaryFile(mode='w',delete=True) as tmp: tmp.write(all_det) ann = parse_wider_gt(tmp.name) else: # Parse fddb file to a dict ann = parse_wider_gt(fddb_file) bbox_id = 0 all_keys = list(ann.keys()) # randomly sample 100k frames def has_dets(ann,fname,conf_thresh): if int(fname[(fname.index('_')+1):]) > 1100: # BAD hack: to handle incomplete videos return False if len(ann[fname]) == 0: return False scores = np.array([b[4] for b in ann[fname]]) if np.sum(scores>=conf_thresh) == 0: return False return True #all_keys = [k for k in all_keys if len(ann[k]) > 0] # keep only frames with detections all_keys = [k for k in all_keys if has_dets(ann,k,conf_thresh)] # keep only frames with detections random.shuffle(all_keys) all_keys = all_keys[:n_samples] all_keys.sort() ##### end of sampling ##### all_files = [] for fid,filename in enumerate(all_keys): all_files.append(filename) w,h,c = (1024, 2048, 3) #(720,1280,3) im_prop = {'width':w, 'height':h, 'id':fid, 'file_name':filename+'.jpg' } ds_json['images'].append(im_prop) bboxes = ann[filename] print('Reading:',filename) for bbox in bboxes: score = bbox[4] bbox[:4] = map(int,bbox[:4]) if format == 'coco': bbox = bbox[:4] #throw away score for coco format. preserve to use for eval bbox_prop = {'id':bbox_id, 'image_id':fid, 'segmentation':[], 'category_id':1, 'iscrowd':0, 'bbox':bbox, 'area':bbox[2]*bbox[3], 'score':score, 'source':'detection', } bbox_id += 1 if score < conf_thresh: continue ds_json['annotations'].append(bbox_prop) del ann[filename] print('Number of annotations:',len(ds_json['annotations'])) print('Total:',fid,'files saved in JSON format') # saving final json with open(json_file,'w') as f: json.dump(ds_json,f) f.close() if len(det_vid_dir) == 0: return # bin frames to corresponding videos print('Grouping video frames...') video_frames = bin_video_files(all_files) vid_list = list(video_frames.keys()) print('Total of '+str(len(video_frames.items()))+' videos with '+str(np.sum([len(t) for _,t in video_frames.items()]))+' frames') # get list of rotations print('Making list of rotations for video files...') vid_path_list = [os.path.join(det_vid_dir,v+'.mov') for v in vid_list] rot_list = list(map(_ffmpeg_extract_rotation,vid_path_list)) print('...Done. '+str(len(rot_list))+' rotations for '+str(len(vid_list))+' videos') # save metadata #print('Saving metadata...') #metadata = zip(vid_list,vid_path_list,rot_list) #with open('metadata.pkl') #print('...Done. Metadata saved to:',) # saving video frames for v,vid_name in enumerate(vid_list): vid_file_path = os.path.join(det_vid_dir,vid_name+'.mov') vid_sel_frames = video_frames[vid_name] save_vid_frames(vid_name,vid_file_path,vid_sel_frames,save_det_dir,rotation=rot_list[v])
gt_imlist_file = osp.join(gt_out_dir, 'cs6_annot_eval_imlist_%s.txt' % args.split) # ----------------------------------------------------------------------------------- # Eval-format ground-truth annots for CS6 # ----------------------------------------------------------------------------------- with open(gt_out_file, 'w') as fid_gt: with open(gt_imlist_file, 'w') as fid_imlist: for video_name in vid_list: # Load ground-truth annots for that video gt_file = osp.join(args.gt_dir, video_name.split('.')[0] + '.txt') gt_annots = face_utils.parse_wider_gt(gt_file) if len(gt_annots) == 0: continue # no gt faces in this video image_list = np.array(list(gt_annots.keys())) # # Select a subset of frames, or use all frames (much slower) # if args.subset: # assert len(image_list) != 0 # subset_size = min( (NUM_IM_VID, len(image_list)) ) # sel = np.random.randint(len(image_list), size=NUM_IM_VID) # image_list = image_list[sel] print('Video annot: %s' % gt_file) # Output bboxes lists for evaluation for i, im_name in enumerate(image_list):
if args.load_detectron: print("loading detectron weights %s" % args.load_detectron) load_detectron_weight(net, args.load_detectron) net = mynn.DataParallel(net, cpu_keywords=['im_info', 'roidb'], minibatch=True, device_ids=[0]) # only support single GPU net.eval() # ----------------------------------------------------------------------------------- # Data setup # ----------------------------------------------------------------------------------- annot_file = args.annot_file annot_dict = face_utils.parse_wider_gt(annot_file) image_subset = np.array(list(annot_dict.keys())) np.random.shuffle(image_subset) image_subset = image_subset[:args.num_im] # output folders img_output_dir = osp.join(args.output_dir, args.det_name + '_' + args.dataset) if not osp.exists(img_output_dir): os.makedirs(img_output_dir) with open(osp.join(args.output_dir, 'config_args.json'), 'w') as fp: json.dump(vars(args), fp, indent=4, sort_keys=True) # ----------------------------------------------------------------------------------- # Detect faces on CS6 frames
args = parse_args() if not args.output_dir: args.output_dir = osp.abspath( osp.join(args.det_dir, '..', 'mining-detections')) print('Called with args:') print(args) # List of video files with open(osp.join('data/CS6', args.video_list_file), 'r') as f: video_list = [y.split('.')[0] for y in [x.strip() for x in f]] # List of detector conf. thresholds thresh_list = [float(x.strip()) for x in args.thresh_list.split(',')] for conf_thresh in thresh_list: det_output_dir = osp.join(args.output_dir, '%s_%.2f' % (args.split, conf_thresh)) if not osp.exists(det_output_dir): os.makedirs(det_output_dir) for video_name in video_list: print('CONF_THRESH: %.2f, VIDEO: %s' % (conf_thresh, video_name)) dets_file_name = osp.join(args.det_dir, video_name + '.txt') det_dict = face_utils.parse_wider_gt(dets_file_name) out_file_name = osp.join(det_output_dir, video_name + '.txt') write_formatted_dets(video_name, out_file_name, det_dict, conf_thresh)
def convert_cs6_annots(ann_file, im_dir, out_dir, data_set='CS6-subset', conf_thresh=0.5): """Convert from WIDER FDDB-style format to COCO bounding box""" # cs6 subsets if data_set == 'CS6-subset': json_name = 'cs6-subset_face_train_annot_coco_style.json' elif data_set == 'CS6-subset-score': # include "scores" as soft-labels json_name = 'cs6-subset_face_train_score-annot_coco_style.json' elif data_set == 'CS6-subset-gt': json_name = 'cs6-subset-gt_face_train_annot_coco_style.json' elif data_set == 'CS6-train-gt': # full train set of CS6 (86 videos) json_name = 'cs6-train-gt.json' elif data_set == 'CS6-train-det-score': # soft-labels used in distillation json_name = 'cs6-train-det-score_face_train_annot_coco_style.json' elif data_set == 'CS6-train-det-score-0.5': # soft-labels used in distillation, keeping dets with score > 0.5 json_name = 'cs6-train-det-score-0.5_face_train_annot_coco_style.json' conf_thresh = 0.5 elif data_set == 'CS6-train-det': json_name = 'cs6-train-det_face_train_annot_coco_style.json' elif data_set == 'CS6-train-det-0.5': json_name = 'cs6-train-det-0.5_face_train_annot_coco_style.json' elif data_set == 'CS6-train-easy-hp': json_name = 'cs6-train-easy-hp.json' elif data_set == 'CS6-train-easy-gt': json_name = 'cs6-train-easy-gt.json' elif data_set == 'CS6-train-easy-det': json_name = 'cs6-train-easy-det.json' elif data_set == 'CS6-train-hp': json_name = 'cs6-train-hp.json' else: raise NotImplementedError img_id = 0 ann_id = 0 cat_id = 1 print('Starting %s' % data_set) ann_dict = {} categories = [{"id": 1, "name": 'face'}] images = [] annotations = [] wider_annot_dict = face_util.parse_wider_gt( ann_file) # [im-file] = [[x,y,w,h], ...] for filename in wider_annot_dict.keys(): if len(images) % 50 == 0: print("Processed %s images, %s annotations" % (len(images), len(annotations))) if 'score' in data_set: dets = np.array(wider_annot_dict[filename]) if not any(dets[:, 4] > conf_thresh): continue image = {} image['id'] = img_id img_id += 1 im = Image.open(os.path.join(im_dir, filename)) image['width'] = im.height image['height'] = im.width image['file_name'] = filename images.append(image) for gt_bbox in wider_annot_dict[filename]: ann = {} ann['id'] = ann_id ann_id += 1 ann['image_id'] = image['id'] ann['segmentation'] = [] ann['category_id'] = cat_id # 1:"face" for WIDER ann['iscrowd'] = 0 ann['area'] = gt_bbox[2] * gt_bbox[3] ann['bbox'] = gt_bbox[:4] ann['dataset'] = data_set score = gt_bbox[4] if score < conf_thresh: continue if 'hp' in data_set: ann['score'] = score # for soft-label distillation ann['source'] = gt_bbox[ 5] # annot source: {1: detection, 2:tracker} if data_set == 'CS6-train-easy-det': if gt_bbox[5] != 1: continue # ignore if annot source is not detection (i.e. skip HP) annotations.append(ann) ann_dict['images'] = images ann_dict['categories'] = categories ann_dict['annotations'] = annotations print("Num categories: %s" % len(categories)) print("Num images: %s" % len(images)) print("Num annotations: %s" % len(annotations)) with open(os.path.join(out_dir, json_name), 'w', encoding='utf8') as outfile: outfile.write(json.dumps(ann_dict, indent=2))