def worker(output_dir, video_dir): image_names = glob(os.path.join(video_dir, 'img/*.jpg')) if config.MACHINE_TYPE == Machine_type.Windows: image_names = sorted( image_names, key=lambda x: int( x.replace('/', "\\").split("\\")[-1].split('.')[0])) video_name = video_dir.replace('/', "\\").split("\\")[-1] else: image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('.')[0])) video_name = video_dir.split('/')[-1] save_folder = os.path.join(output_dir, video_name) if not os.path.exists(save_folder): os.mkdir(save_folder) trajs = {0: []} trkid = 0 anno_str = "groundtruth_rect.txt" # if video_name == "Jogging": # ToDo:这个视频有两个人的跟踪框,暂时用一个的训练 # anno_str = "groundtruth_rect.1.txt" vid_anno_path = os.path.join(video_dir, anno_str) with open(vid_anno_path, 'r') as f: bboxs = f.readlines() # 有些是,号分隔;有些是空格或者制表符分隔 if ',' in bboxs[0]: bboxs = [list(map(int, box.split(','))) for box in bboxs] else: bboxs = [list(map(int, box.split())) for box in bboxs] # gt的cx,cy需要减1 bboxs = [np.array(box) - [1, 1, 0, 0] for box in bboxs] assert len(bboxs) == len(image_names), 'bboxs的数量必须要和image_names的一致' for i, image_name in enumerate(image_names): img = cv2.imread(image_name) img_mean = tuple(map(int, img.mean(axis=(0, 1)))) bbox = bboxs[i] # 这里的bbox是 x,y,w,h (x,y为左上角的坐标) if (bbox == [-1, -1, 0, 0]).all(): continue # ToDo:有两个视频的最后一句是四个0,先跳过,如:Board filename = os.path.basename(image_name).split('.')[0] trajs[0].append(filename) instance_crop_size = int( np.ceil((config.instance_size + config.max_translate * 2) * (1 + config.scale_resize))) # 转换成cx,cy,w,h格式 bbox = np.array([ bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2, bbox[2], bbox[3] ]) # (213.5, 253.0, 34, 81) instance_img, w, h, _ = get_instance_image(img, bbox, config.exemplar_size, instance_crop_size, config.context_amount, img_mean) instance_img_name = os.path.join( save_folder, filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h)) cv2.imwrite(instance_img_name, instance_img) return video_name, trajs
def update(self, frame): """track object based on the previous frame Args: frame: an RGB image Returns: bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax) """ instance_img, _, _, scale_x = get_instance_image(frame, self.bbox, config.exemplar_size, config.instance_size, config.context_amount, self.img_mean) instance_img = self.transforms(instance_img)[None, :, :, :] pred_score, pred_regression = self.model.track(instance_img.cuda()) pred_conf = pred_score.reshape(-1, 2, config.anchor_num * config.score_size * config.score_size).permute(0, 2, 1) pred_offset = pred_regression.reshape(-1, 4, config.anchor_num * config.score_size * config.score_size).permute(0, 2, 1) delta = pred_offset[0].cpu().detach().numpy() box_pred = box_transform_inv(self.anchors, delta) score_pred = F.softmax(pred_conf, dim=2)[ 0, :, 1].cpu().detach().numpy() def change(r): return np.maximum(r, 1. / r) def sz(w, h): pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) s_c = change(sz(box_pred[:, 2], box_pred[:, 3]) / (sz_wh(self.target_sz * scale_x))) # scale penalty r_c = change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3])) # ratio penalty penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k) pscore = penalty * score_pred pscore = pscore * (1 - config.window_influence) + \ self.window * config.window_influence best_pscore_id = np.argmax(pscore) target = box_pred[best_pscore_id, :] / scale_x lr = penalty[best_pscore_id] * \ score_pred[best_pscore_id] * config.lr_box res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[1]) res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[0]) res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0], config.max_scale * self.origin_target_sz[0]) res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1], config.max_scale * self.origin_target_sz[1]) self.pos = np.array([res_x, res_y]) self.target_sz = np.array([res_w, res_h]) bbox = np.array([res_x, res_y, res_w, res_h]) self.bbox = ( np.clip(bbox[0], 0, frame.shape[1]).astype(np.float64), np.clip(bbox[1], 0, frame.shape[0]).astype(np.float64), np.clip(bbox[2], 10, frame.shape[1]).astype(np.float64), np.clip(bbox[3], 10, frame.shape[0]).astype(np.float64)) return self.bbox, score_pred[best_pscore_id]
def worker(output_dir, video_dir): instance_crop_size = 500 if 'YT-BB' in video_dir: image_names = glob(os.path.join(video_dir, '*.jpg')) image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('_')[1])) video_name = '_'.join(os.path.basename(video_dir).split('_')[:-1]) with open('/dataset_ssd/std_xml_ytb/' + video_name + '.pkl', 'rb') as f: std_xml_dict = pickle.load(f) save_folder = os.path.join(output_dir, video_name) if not os.path.exists(save_folder): os.mkdir(save_folder) trajs = {} for image_name in image_names: img = cv2.imread(image_name) h, w, _ = img.shape img_mean = tuple(map(int, img.mean(axis=(0, 1)))) frame = image_name.split('_')[-2] if int(frame) == 0: anno = std_xml_dict[str(int(frame))] else: anno = std_xml_dict[frame] filename = '_'.join(image_name.split('/')[-1].split('_')[:-1]) for class_id in anno.keys(): for track_id in anno[class_id].keys(): class_name, present, xmin_scale, xmax_scale, ymin_scale, ymax_scale = anno[ class_id][track_id] new_track_id = class_id.zfill(3) + track_id.zfill(3) bbox = np.array( list( map(float, [ xmin_scale, xmax_scale, ymin_scale, ymax_scale ]))) * np.array([w, w, h, h]) if present == 'present': if new_track_id in trajs.keys(): trajs[new_track_id].append(filename) else: trajs[new_track_id] = [filename] bbox = np.array([(bbox[1] + bbox[0]) / 2, (bbox[3] + bbox[2]) / 2, bbox[1] - bbox[0] + 1, bbox[3] - bbox[2] + 1]) instance_img, w, h, _ = get_instance_image( img, bbox, config.exemplar_size, instance_crop_size, config.context_amount, img_mean) instance_img_name = os.path.join( save_folder, filename + ".{}.x_{:.2f}_{:.2f}.jpg".format( new_track_id, w, h)) cv2.imwrite(instance_img_name, instance_img) elif present == 'absent': continue else: image_names = glob(os.path.join(video_dir, '*.JPEG')) image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('.')[0])) video_name = video_dir.split('/')[-1] save_folder = os.path.join(output_dir, video_name) if not os.path.exists(save_folder): os.mkdir(save_folder) trajs = {} for image_name in image_names: img = cv2.imread(image_name) img_mean = tuple(map(int, img.mean(axis=(0, 1)))) anno_name = image_name.replace('Data', 'Annotations') anno_name = anno_name.replace('JPEG', 'xml') tree = ET.parse(anno_name) root = tree.getroot() bboxes = [] filename = root.find('filename').text for obj in root.iter('object'): bbox = obj.find('bndbox') bbox = list( map(int, [ bbox.find('xmin').text, bbox.find('ymin').text, bbox.find('xmax').text, bbox.find('ymax').text ])) trkid = int(obj.find('trackid').text) if trkid in trajs: trajs[trkid].append(filename) else: trajs[trkid] = [filename] bbox = np.array([(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2, bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1]) instance_img, w, h, _ = get_instance_image( img, bbox, config.exemplar_size, instance_crop_size, config.context_amount, img_mean) instance_img_name = os.path.join( save_folder, filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h)) cv2.imwrite(instance_img_name, instance_img) return video_name, trajs
def update(self, frame): """track object based on the previous frame Args: frame: an RGB image Returns: bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax) """ # ToDo:看看这几个返回的值都是些什么东西 instance_img, _, _, scale_x = get_instance_image(frame, self.bbox, config.exemplar_size, config.instance_size, config.context_amount, self.img_mean) # cv2.imshow("update", instance_img) instance_img = self.transforms(instance_img)[None, :, :, :] if config.CUDA: instance_img = instance_img.cuda() pred_scores, pred_regressions = self.model.track(instance_img) def change(r): # np.maximum:(X, Y, out=None);X 与 Y 逐位比较取其大者 return np.maximum(r, 1. / r) def sz(w, h): # 在bounding_box pad = (w + h) * 0.5 sz2 = (w + pad) * (h + pad) return np.sqrt(sz2) def sz_wh(wh): pad = (wh[0] + wh[1]) * 0.5 sz2 = (wh[0] + pad) * (wh[1] + pad) return np.sqrt(sz2) # 这里比较复杂,我们先分层预测,每层选最佳的匹配,并记录下score # 后面对score进行排序,返回最高得分的预测结果 # PS:记录下不是 19*19 组获得最高评分的次数,分析FPN的效果 results_bboxs = [] results_scores = [] for i in range(len(pred_scores)): if i!=1: continue pred_score = pred_scores[i] # torch.Size([1, 6, 37, 37]) pred_regression = pred_regressions[i] # torch.Size([1, 12, 37, 37]) score_size = config.FEATURE_MAP_SIZE[i] # 37 anchor_num = 3 # 暂时定为3 即[0.5,1,2] pred_conf = pred_score.reshape(-1, 2, anchor_num * score_size * score_size).permute(0,2,1) # torch.Size([1, 4107, 2]) pred_offset = pred_regression.reshape(-1, 4, anchor_num * score_size * score_size).permute(0,2,1) # # torch.Size([1, 4107, 4]) delta = pred_offset[0].cpu().detach().numpy() # (4107, 4) box_pred = box_transform_inv(self.anchors[i], delta) # (4107, 4) score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy() # (4107,) # # 不进行后面的尺度惩罚等等,直接选最大得分的试试 # best_pscore_id = np.argmax(score_pred) # target = box_pred[best_pscore_id, :] / scale_x # res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[0]) # res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[1]) # res_w = np.clip(target[2], # config.min_scale * self.origin_target_sz[0], # config.max_scale * self.origin_target_sz[0]) # res_h = np.clip(target[3], # config.min_scale * self.origin_target_sz[1], # config.max_scale * self.origin_target_sz[1]) # bbox = np.array([res_x, res_y, res_w, res_h]) # results_bboxs.append(bbox) # results_scores.append(score_pred[best_pscore_id]) # continue # 进行尺度惩罚等措施,但是相关的超参数不知道怎么确定 s_c = change(sz(box_pred[:, 2], box_pred[:, 3]) / (sz_wh(self.target_sz * scale_x))) # scale penalty (4107,) r_c = change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3])) # ratio penalty (4107,) penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k) # (4107,) penalty_k=0.22 pscore = penalty * score_pred # (4107,) # window_influence = 0.4 pscore = pscore * (1 - config.window_influence) + self.windows[i] * config.window_influence # (4107,) best_pscore_id = np.argmax(pscore) target = box_pred[best_pscore_id, :] / scale_x lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box # lr_box = 0.3 res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[0]) res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[1]) # min_scale = 0.1 max_scale = 10 # numpy.clip(a, a_min, a_max, out=None) # 将数组中的元素限制在a_min, a_max之间,大于a_max的就使得它等于 a_max,小于a_min,的就使得它等于a_min res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0], config.max_scale * self.origin_target_sz[0]) res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1], config.max_scale * self.origin_target_sz[1]) bbox = np.array([res_x, res_y, res_w, res_h]) results_bboxs.append(bbox) results_scores.append(pscore[best_pscore_id]) max_score_id = np.argmax(results_scores) _box = results_bboxs[max_score_id] _socre = results_scores[max_score_id] # results = sorted(results.items,key=lambda x:x[1], reverse=True) # 按照得分进行排序 # _box = results.keys[0] # _socre = results[0] x, y, w, h = _box self.pos = np.array([x, y]) self.target_sz = np.array([w, h]) self.bbox = ( np.clip(_box[0], 0, frame.shape[1]).astype(np.float64), np.clip(_box[1], 0, frame.shape[0]).astype(np.float64), np.clip(_box[2], 10, frame.shape[1]).astype(np.float64), np.clip(_box[3], 10, frame.shape[0]).astype(np.float64)) return self.bbox, _socre
def worker(output_dir, video_dir): if 'YT-BB' in video_dir: image_names = glob(os.path.join(video_dir, '*.jpg')) image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('_')[1])) video_name = video_dir.split('/')[-1] save_folder = os.path.join(output_dir, video_name) anno_path = '/mnt/diska1/YT-BB/xml/youtube_dection_frame_xml_temp' if not os.path.exists(save_folder): os.mkdir(save_folder) trajs = {} for image_name in image_names: img = cv2.imread(image_name) img_mean = tuple(map(int, img.mean(axis=(0, 1)))) anno_name = os.path.join(anno_path, video_name, image_name.split('/')[-1]).replace('.jpg', '.xml') tree = ET.parse(anno_name) root = tree.getroot() bboxes = [] filename = root.find('filename').text for obj in root.iter('object'): bbox = obj.find('bndbox') bbox = list(map(int, [bbox.find('xmin').text, bbox.find('ymin').text, bbox.find('xmax').text, bbox.find('ymax').text])) trkid = int(obj.find('trackid').text) if trkid in trajs: trajs[trkid].append(filename) else: trajs[trkid] = [filename] instance_crop_size = int( np.ceil((config.instance_size + config.max_translate * 2) * (1 + config.scale_resize))) bbox = np.array( [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2, bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1]) instance_img, w, h, _ = get_instance_image(img, bbox, config.exemplar_size, instance_crop_size, config.context_amount, img_mean) instance_img_name = os.path.join(save_folder, filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h)) cv2.imwrite(instance_img_name, instance_img) else: image_names = glob(os.path.join(video_dir, '*.JPEG')) image_names = sorted(image_names, key=lambda x: int(x.split('/')[-1].split('.')[0])) video_name = video_dir.split('/')[-1] save_folder = os.path.join(output_dir, video_name) if not os.path.exists(save_folder): os.mkdir(save_folder) trajs = {} for image_name in image_names: img = cv2.imread(image_name) img_mean = tuple(map(int, img.mean(axis=(0, 1)))) anno_name = image_name.replace('Data', 'Annotations') anno_name = anno_name.replace('JPEG', 'xml') tree = ET.parse(anno_name) root = tree.getroot() bboxes = [] filename = root.find('filename').text for obj in root.iter('object'): bbox = obj.find('bndbox') bbox = list(map(int, [bbox.find('xmin').text, bbox.find('ymin').text, bbox.find('xmax').text, bbox.find('ymax').text])) trkid = int(obj.find('trackid').text) if trkid in trajs: trajs[trkid].append(filename) else: trajs[trkid] = [filename] instance_crop_size = int( np.ceil((config.instance_size + config.max_translate * 2) * (1 + config.scale_resize))) bbox = np.array( [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2, bbox[2] - bbox[0] + 1, bbox[3] - bbox[1] + 1]) instance_img, w, h, _ = get_instance_image(img, bbox, config.exemplar_size, instance_crop_size, config.context_amount, img_mean) instance_img_name = os.path.join(save_folder, filename + ".{:02d}.x_{:.2f}_{:.2f}.jpg".format(trkid, w, h)) cv2.imwrite(instance_img_name, instance_img) return video_name, trajs