def __init__(self): self.device = torch_utils.select_device(force_cpu=False) torch.backends.cudnn.benchmark = False # Initialize the reid model reidCfg.DATASETS.ROOT_DIR = ('./query/') query_loader, num_query = make_data_loader(reidCfg) self.reidModel = build_model(reidCfg, num_classes=10126) self.reidModel.load_param(reidCfg.TEST.WEIGHT) self.reidModel.to(self.device).eval() self.query_feats = [] self.query_pids = [] self.query_camid = [] for i, batch in enumerate(query_loader): with torch.no_grad(): img, pid, camid = batch img = img.to(self.device) feat = self.reidModel(img) self.query_feats.append(feat) self.query_pids.extend(np.asarray(pid)) self.query_camid.extend(np.asarray(camid)) self.query_feats = torch.cat(self.query_feats, dim=0) self.query_feats = torch.nn.functional.normalize(self.query_feats, dim=1, p=2) print(self.query_feats) print("The query feature is normalized")
def detect( cfg, data, weights, images='data/samples', # input folder output='output', # output folder fourcc='mp4v', # video codec img_size=416, conf_thres=0.25, nms_thres=0.4, dist_thres=1.3, save_txt=False, save_images=True): # Initialize device = torch_utils.select_device(force_cpu=False) torch.backends.cudnn.benchmark = False # set False for reproducible results if os.path.exists(output): shutil.rmtree(output) # delete output folder os.makedirs(output) # make new output folder ############# 行人重识别模型初始化 ############# query_loader, num_query = make_data_loader(reidCfg) reidModel = build_model(reidCfg, num_classes=10126) reidModel.load_param(reidCfg.TEST.WEIGHT) reidModel.to(device).eval() print('num query %d' % num_query) query_feats = defaultdict(list) query_pids = [] for i, batch in enumerate(query_loader): with torch.no_grad(): img, pid, camid = batch img = img.to(device) feat = reidModel( img) # 一共2张待查询图片,每张图片特征向量2048 torch.Size([2, 2048]) for j, f in enumerate(feat): if (not pid[j] in query_pids): query_pids.append(pid[j]) print(f.cpu().numpy()) query_feats[pid[j]].append(f.cpu().numpy()) for pid in query_pids: temp = np.array(query_feats[pid]) print(temp) query_feats[pid] = torch.from_numpy(temp).float().to( device) # torch.Size([2, 2048]) print(query_feats[pid]) query_feats[pid] = torch.nn.functional.normalize(query_feats[pid], dim=1, p=2) # 计算出查询图片的特征向量 print(query_feats[pid]) print("The query feature is normalized") ############# 行人检测模型初始化 ############# model = Darknet(cfg, img_size) # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) # Eval mode model.to(device).eval() # Half precision opt.half = opt.half and device.type != 'cpu' # half precision only supported on CUDA if opt.half: model.half() # Set Dataloader vid_path, vid_writer = None, None if opt.webcam: save_images = False dataloader = LoadWebcam(img_size=img_size, half=opt.half) else: dataloader = LoadImages(images, img_size=img_size, half=opt.half) # Get classes and colors # parse_data_cfg(data)['names']:得到类别名称文件路径 names=data/coco.names classes = load_classes( parse_data_cfg(data)['names']) # 得到类别名列表: ['person', 'bicycle'...] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # 对于每种类别随机使用一种颜色画框 # Run inference t0 = time.time() for i, (path, img, im0, vid_cap) in enumerate(dataloader): t = time.time() # if i < 500 or i % 5 == 0: # continue save_path = str(Path(output) / Path(path).name) # 保存的路径 # Get detections shape: (3, 416, 320) img = torch.from_numpy(img).unsqueeze(0).to( device) # torch.Size([1, 3, 416, 320]) pred, _ = model(img) # 经过处理的网络预测,和原始的 det = non_max_suppression(pred.float(), conf_thres, nms_thres)[0] # torch.Size([5, 7]) if det is not None and len(det) > 0: # Rescale boxes from 416 to true image size 映射到原图 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s) print('%gx%g ' % img.shape[2:], end='') # print image size '288x416' for c in det[:, -1].unique(): # 对图片的所有类进行遍历循环 n = (det[:, -1] == c).sum() # 得到了当前类别的个数,也可以用来统计数目 if classes[int(c)] == 'person': print('%g %ss' % (n, classes[int(c)]), end=', ') # 打印个数和类别'5 persons' # Draw bounding boxes and labels of detections # (x1y1x2y2, obj_conf, class_conf, class_pred) count = 0 gallery_img = [] gallery_loc = [] for *xyxy, conf, cls_conf, cls in det: # 对于最后的预测框进行遍历 # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)] if save_txt: # Write to file with open(save_path + '.txt', 'a') as file: file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) # Add bbox to the image label = '%s %.2f' % (classes[int(cls)], conf) # 'person 1.00' if classes[int(cls)] == 'person': #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)]) xmin = int(xyxy[0]) ymin = int(xyxy[1]) xmax = int(xyxy[2]) ymax = int(xyxy[3]) w = xmax - xmin # 233 h = ymax - ymin # 602 # 如果检测到的行人太小了,感觉意义也不大 # 这里需要根据实际情况稍微设置下 if w * h > 500: gallery_loc.append((xmin, ymin, xmax, ymax)) crop_img = im0[ymin:ymax, xmin:xmax] # HWC (602, 233, 3) crop_img = Image.fromarray( cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)) # PIL: (233, 602) crop_img = build_transforms(reidCfg)( crop_img).unsqueeze( 0) # torch.Size([1, 3, 256, 128]) gallery_img.append(crop_img) if gallery_img: gallery_img = torch.cat(gallery_img, dim=0) # torch.Size([7, 3, 256, 128]) gallery_img = gallery_img.to(device) gallery_feats = reidModel(gallery_img) # torch.Size([7, 2048]) print("The gallery feature is normalized") gallery_feats = torch.nn.functional.normalize( gallery_feats, dim=1, p=2) # 计算出查询图片的特征向量 for pid in query_pids: m, n = query_feats[pid].shape[0], gallery_feats.shape[0] distmat = torch.pow(query_feats[pid], 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t() distmat.addmm_(1, -2, query_feats[pid], gallery_feats.t()) # distmat = (qf - gf)^2 # distmat = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410], # [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]]) distmat = distmat.cpu().numpy() # <class 'tuple'>: (3, 12) distmat = distmat.sum(axis=0) / len( query_feats[pid]) # 平均一下query中同一行人的多个结果 index = distmat.argmin() if distmat[index] < dist_thres: print('距离:%s' % distmat[index]) plot_one_box(gallery_loc[index], im0, label=str(pid), color=colors[int(cls)]) # cv2.imshow('person search', im0) # cv2.waitKey() print('Done. (%.3fs)' % (time.time() - t)) if opt.webcam: # Show live webcam cv2.imshow(weights, im0) if save_images: # Save image with detections if dataloader.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (width, height)) vid_writer.write(im0) if save_images: print('Results saved to %s' % os.getcwd() + os.sep + output) if platform == 'darwin': # macos os.system('open ' + output + ' ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def train(cfg): # prepare dataset num_classes = cfg.DATASETS.NUM_CLASSES train_loader, val_loader, num_query = make_data_loader(cfg) # prepare model model = build_model(cfg, num_classes) if torch.cuda.is_available(): model = torch.nn.DataParallel(model).cuda() if cfg.MODEL.IF_WITH_CENTER == 'no': print('Train without center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) optimizer = make_optimizer(cfg, model) loss_func = make_loss(cfg, num_classes) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) model.load_state_dict( torch.load(cfg.MODEL.PRETRAIN_PATH).state_dict()) optimizer.load_state_dict( torch.load(path_to_optimizer).state_dict()) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) elif cfg.MODEL.IF_WITH_CENTER == 'yes': print('Train with center loss, the loss type is', cfg.MODEL.METRIC_LOSS_TYPE) cosface = None if cfg.MODEL.METRIC_LOSS_TYPE == "triplet_cosface_center": loss_func, center_criterion, cosface = make_loss_with_center( cfg, num_classes) optimizer, optimizer_center = make_optimizer_with_cosface_center( cfg, model, cosface, center_criterion) else: loss_func, center_criterion = make_loss_with_center( cfg, num_classes) optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion) # Add for using self trained model if cfg.MODEL.PRETRAIN_CHOICE == 'self': start_epoch = eval( cfg.MODEL.PRETRAIN_PATH.split('/')[-1].split('.')[0].split('_') [-1]) print('Start epoch:', start_epoch) path_to_optimizer = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer') print('Path to the checkpoint of optimizer:', path_to_optimizer) path_to_center_param = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'center_param') print('Path to the checkpoint of center_param:', path_to_center_param) if cfg.MODEL.METRIC_LOSS_TYPE == "triplet_cosface_center": path_to_cosface_param = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'cosface_param') print('Path to the checkpoint of cosface_param:', path_to_cosface_param) cosface.load_state_dict( torch.load(path_to_cosface_param).state_dict()) path_to_optimizer_center = cfg.MODEL.PRETRAIN_PATH.replace( 'model', 'optimizer_center') print('Path to the checkpoint of optimizer_center:', path_to_optimizer_center) model.load_state_dict( torch.load(cfg.MODEL.PRETRAIN_PATH).state_dict()) optimizer.load_state_dict( torch.load(path_to_optimizer).state_dict()) center_criterion.load_state_dict( torch.load(path_to_center_param).state_dict()) optimizer_center.load_state_dict( torch.load(path_to_optimizer_center).state_dict()) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD, start_epoch) elif cfg.MODEL.PRETRAIN_CHOICE == 'imagenet': start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) elif cfg.MODEL.PRETRAIN_CHOICE == "weight": checkpoint = torch.load(cfg.MODEL.PRETRAIN_PATH) state_dict = checkpoint[ "state_dict"] if "state_dict" in checkpoint else checkpoint new_state_dict = OrderedDict() for key in state_dict.keys(): new_key = key.replace("module", "module.base") new_state_dict[new_key] = state_dict[key] missing_keys, unexpected_keys = model.load_state_dict( new_state_dict, strict=False) print("loading model weights, missing_keys:{}, unexcepted_keys:{}". format(missing_keys, unexpected_keys)) start_epoch = 0 scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_ITERS, cfg.SOLVER.WARMUP_METHOD) else: print( 'Only support pretrain_choice for imagenet and self, but got {}' .format(cfg.MODEL.PRETRAIN_CHOICE)) do_train_with_center( cfg, model, cosface, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, # modify for using self trained model loss_func, num_query, start_epoch # add for using self trained model ) else: print( "Unsupported value for cfg.MODEL.IF_WITH_CENTER {}, only support yes or no!\n" .format(cfg.MODEL.IF_WITH_CENTER))
def detect( cfg, data, weights, images='data/samples', # input folder output='output', # output folder fourcc='mp4v', # video codec img_size=416, conf_thres=0.5, nms_thres=0.5, dist_thres=1.0, save_txt=False, save_images=True): # Initialize device = torch_utils.select_device(force_cpu=False) # 不强制使用cpu print("using device:", type(device), device) torch.backends.cudnn.benchmark = False # set False for reproducible results if os.path.exists(output): shutil.rmtree(output) # delete output folder os.makedirs(output) # make new output folder ############# 加载<pid, 中文名, 颜色>列表 ############# pidNameInfo = make_pidNames_loader(reidCfg) print("pidNameInfo:", pidNameInfo) ############# 行人重识别模型初始化 ############# query_loader, num_query = make_data_loader(reidCfg) print("query_loader:", type(query_loader), query_loader) reidModel = build_model(reidCfg, num_classes=10126) reidModel.load_param(reidCfg.TEST.WEIGHT) reidModel.to(device).eval() query_feats = [] query_pids = [] for i, batch in enumerate(query_loader): with torch.no_grad(): img, pid, camid = batch # 图像,行人ID,相机ID img = img.to(device) feat = reidModel( img) # 一共2张待查询图片,每张图片特征向量2048 torch.Size([2, 2048]) print("feat:", type(feat), feat.shape, feat) query_feats.append(feat) query_pids.extend(np.asarray( pid)) # extend() 函数用于在列表末尾一次性追加另一个序列中的多个值(用新列表扩展原来的列表)。 query_feats = torch.cat(query_feats, dim=0) # torch.Size([2, 2048]) print("The query feature is normalized") query_feats = torch.nn.functional.normalize(query_feats, dim=1, p=2) # 计算出查询图片的特征向量 ############# 行人检测模型初始化 ############# model = Darknet(cfg, img_size) # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) # model.load_state_dict(torch.load(weights, map_location='cpu')['model']) # 使用cpu else: # darknet format _ = load_darknet_weights(model, weights) # Eval mode model.to(device).eval() # Half precision opt.half = opt.half and device.type != 'cpu' # half precision only supported on CUDA if opt.half: model.half() # Set Dataloader vid_path, vid_writer = None, None if opt.webcam: save_images = False dataloader = LoadWebcam(img_size=img_size, half=opt.half) else: dataloader = LoadImages(images, img_size=img_size, half=opt.half) # Get classes and colors # parse_data_cfg(data)['names']:得到类别名称文件路径 names=data/coco.names classes = load_classes( parse_data_cfg(data)['names']) # 得到类别名列表: ['person', 'bicycle'...] # colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # 对于每种类别随机使用一种颜色画框 # colors = [[40, 92, 230] for _ in range(len(classes))] # 只检测人,使用相同的颜色 # Run inference t0 = time.time() for i, (path, img, im0, vid_cap) in enumerate(dataloader): t = time.time() # if i < 500 or i % 5 == 0: # continue save_path = str(Path(output) / Path(path).name) # 保存的路径 # Get detections shape: (3, 416, 320) img = torch.from_numpy(img).unsqueeze(0).to( device) # torch.Size([1, 3, 416, 320]) pred, _ = model(img) # 经过处理的网络预测,和原始的 det = non_max_suppression(pred.float(), conf_thres, nms_thres)[0] # torch.Size([5, 7]) if det is not None and len(det) > 0: # Rescale boxes from 416 to true image size 映射到原图 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s) print('%gx%g ' % img.shape[2:], end='') # print image size '288x416' for c in det[:, -1].unique(): # 对图片的所有类进行遍历循环 n = (det[:, -1] == c).sum() # 得到了当前类别的个数,也可以用来统计数目 if classes[int(c)] == 'person': print('%g %ss' % (n, classes[int(c)]), end=', ') # 打印个数和类别'5 persons' # Draw bounding boxes and labels of detections # (x1y1x2y2, obj_conf, class_conf, class_pred) count = 0 gallery_img = [] gallery_loc = [] for *xyxy, conf, cls_conf, cls in det: # 对于最后的预测框进行遍历 # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)] if save_txt: # Write to file with open(save_path + '.txt', 'a') as file: file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) # Add bbox to the image label = '%s %.2f' % (classes[int(cls)], conf) # 'person 1.00' if classes[int(cls)] == 'person': #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)]) xmin = int(xyxy[0]) ymin = int(xyxy[1]) xmax = int(xyxy[2]) ymax = int(xyxy[3]) w = xmax - xmin # 233 h = ymax - ymin # 602 # 如果检测到的行人太小了,感觉意义也不大 # 这里需要根据实际情况稍微设置下 if w * h > 500: gallery_loc.append((xmin, ymin, xmax, ymax)) crop_img = im0[ymin:ymax, xmin:xmax] # HWC (602, 233, 3) crop_img = Image.fromarray( cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)) # PIL: (233, 602) crop_img = build_transforms(reidCfg)( crop_img).unsqueeze( 0) # torch.Size([1, 3, 256, 128]) gallery_img.append(crop_img) if gallery_img: gallery_img = torch.cat(gallery_img, dim=0) # torch.Size([7, 3, 256, 128]) gallery_img = gallery_img.to(device) gallery_feats = reidModel(gallery_img) # torch.Size([7, 2048]) print("The gallery feature is normalized") gallery_feats = torch.nn.functional.normalize( gallery_feats, dim=1, p=2) # 计算出查询图片的特征向量 # m: 2,待查询人的个数 # n: 7,yolo检测到人的个数 m, n = query_feats.shape[0], gallery_feats.shape[0] # print("query_feats.shape:", query_feats.shape, "gallery_feats.shape:", gallery_feats.shape) distmat = torch.pow(query_feats, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t() # .t(),矩阵转置 # out=(beta∗M)+(alpha∗mat1@mat2) # qf^2 + gf^2 - 2 * [email protected]() # distmat - 2 * [email protected]() # distmat: qf^2 + gf^2 # qf: torch.Size([2, 2048]) # gf: torch.Size([7, 2048]) distmat.addmm_(1, -2, query_feats, gallery_feats.t()) # distmat行数:待检人数;列数:库图片中人数 # distmat = (qf - gf)^2 # distmat = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410], # [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]]) distmat = distmat.cpu().numpy( ) # <class 'tuple'>: (3, 12),cuda转到内存中 print("distmat:", type(distmat), m, n, distmat.shape) pidMatDict = splitDistmat(query_pids, distmat) # 按pid拆分,按行拆分,<pid, mat> pid_freq = Counter(query_pids) for k in pidMatDict.keys(): # k表示pid tmp_mat = pidMatDict[k] # 某一个人的矩阵 tmp_mat = tmp_mat.sum( axis=0) / pid_freq[k] # 平均一下query中同一行人的多个结果 # tmp_mat = tmp_mat.sum(axis=0) index = tmp_mat.argmin() # 返回距离最小值的下标(图中哪个人最像待检测的人) # print("tmp_mat:", type(tmp_mat), m, n, tmp_mat.shape, tmp_mat) print('距 离:%s' % tmp_mat[index], index, dist_thres, n) # print("gallery_loc:", gallery_loc) if tmp_mat[index] < dist_thres: print('距离:%s' % tmp_mat[index]) # # cv2显示中文出现乱码,需转为PIL添加中文后再转回来。这样在方法中调用的话最后显示为颜色涂层filled,没有内容。 # # 解决办法:把画框和添加中文label过程直接放到这里来,避免函数调用式转写。(英文可以直接调用该方法,不会出现乱码) # plot_one_box(gallery_loc[index], im0, label='%s:%s' % (pidNameInfo[str(k)][1], tmp_mat[index]), color=getColorArr(pidNameInfo[str(k)][2])) # 准备plot_one_box()方法的各项参数 x = gallery_loc[index] # 画框范围(左上、右下坐标) label = '%s:%s' % (pidNameInfo[str(k)][1], tmp_mat[index]) # 要标记的内容(中文) # label = '%s' % pidNameInfo[str(k)][0] color = getColorArr(pidNameInfo[str(k)][2]) # 画框颜色 tl = round(0.002 * (im0.shape[0] + im0.shape[1]) / 2) + 1 # line thickness,线条厚度 color = color or [ random.randint(0, 255) for _ in range(3) ] # 如果没指定颜色,则随机 c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]) ) # 左上xy,右下xy cv2.rectangle(im0, c1, c2, color, thickness=tl) if label: # 对于内容框,c1为左下,c2为右上 tf = max(tl - 1, 1) # font thickness,字体粗细 # t_size = cv2.getTextSize(label, fontFace=0, fontScale=tl / 3, thickness=tf)[0] # fontFace=1时,((310, 22), 10) # t_size = cv2.getTextSize(label, fontFace=1, fontScale=tl / 3, thickness=tf)[0] # fontFace=1时,((156, 10), 6) zh_cn_nums = get_zhcn_number( label) # 中文的字数(一个中文字20个像素宽,一个英文字10个像素宽) t_size = (20 * zh_cn_nums + 10 * (len(label) - zh_cn_nums), 22) c2 = c1[0] + t_size[0], c1[1] - t_size[ 1] - 3 # 纵坐标,多减3目的是字上方稍留空 cv2.rectangle(im0, c1, c2, color, -1) # filled print("t_size:", t_size, " c1:", c1, " c2:", c2) # Draw a label with a name below the face # cv2.rectangle(im0, c1, c2, (0, 0, 255), cv2.FILLED) font = cv2.FONT_HERSHEY_DUPLEX # 将CV2转为PIL,添加中文label后再转回来 pil_img = Image.fromarray( cv2.cvtColor(im0, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_img) font = ImageFont.truetype('simhei.ttf', 20, encoding='utf-8') draw.text((c1[0], c1[1] - 20), label, (255, 255, 255), font=font) im0 = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) # PIL转CV2 # cv2.imshow('person search', im0) # cv2.waitKey() print('Done. (%.3fs)' % (time.time() - t)) if opt.webcam: # Show live webcam cv2.imshow(weights, im0) if save_images: # Save image with detections if dataloader.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (width, height)) vid_writer.write(im0) if save_images: print('Results saved to %s' % os.getcwd() + os.sep + output) if platform == 'darwin': # macos os.system('open ' + output + ' ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def yolo_frames(unique_name): cam_id = unique_name[1] device = torch_utils.select_device(force_cpu=False) torch.backends.cudnn.benchmark = False # set False for reproducible results cfg = 'cfg/yolov3.cfg' data = 'data/coco.data' weights = 'weights/yolov3.weights' half = False img_size = 416 conf_thres = 0.25 nms_thres = 0.4 dist_thres = 1.4 #reid 모델 생성 query_loader, num_query = make_data_loader(reidCfg) reidModel = build_model(reidCfg, num_classes=10126) reidModel.load_param(reidCfg.TEST.WEIGHT) reidModel.to(device).eval() #reid하기 위한 query 정보 query_feats = defaultdict(list) query_pids = [] #query 정보 가져오기 for i, batch in enumerate(query_loader): with torch.no_grad(): img, pid, camid = batch img = img.to(device) feat = reidModel(img) for j, f in enumerate(feat): if (not pid[j] in query_pids): query_pids.append(pid[j]) query_feats[pid[j]].append(f.cpu().numpy()) #query 정보 torch형식으로 변경 for pid in query_pids: temp = np.array(query_feats[pid]) query_feats[pid] = torch.from_numpy(temp).float().to(device) query_feats[pid] = torch.nn.functional.normalize(query_feats[pid], dim=1, p=2) print("The query feature is normalized") model = Darknet(cfg, img_size) #config로 디텍션 모델 생성 # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) # Eval mode model.to(device).eval() # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader dataloader = LoadWebcam(cam_id, img_size=img_size, half=half) # Get classes and colors # parse_data_cfg(data)['names'] names=data/coco.names classes = load_classes(parse_data_cfg( data)['names']) # 코코 네임 파일에서 클래스 전부 가져옴 ['person', 'bicycle'...] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # 이 클래스 수대로 박스의 색을 random으로 생성 #count = 0 #이미지 자르기 위한 카운트 # Run inference t0 = time.time() for i, (path, img, im0, vid_cap) in enumerate(dataloader): patient_map = map.Map now = time.localtime() # If the saved minute is not same with the current minute, it means this camera is the first one that access to the map information in this minute. # Since we have to clear the second array every minute, change the minute and clear the second array. if (now.tm_min != patient_map.minute): patient_map.minute = now.tm_min patient_map.sec_array.clear() # If there is no information about current second, it means this is the first access to the map in this second. # We should init the map information each second. if (now.tm_sec not in patient_map.sec_array): patient_map.sec_array.append(now.tm_sec) patient_map.exist_id = [] patient_map.camera_map = {0: [], 1: [], 2: []} patient_map.total_count = {0: 0, 1: 0, 2: 0} if i % 5 != 0: #이미지 처리 부하 줄이기 continue # Get detections shape: (3, 416, 320) img = torch.from_numpy(img).unsqueeze(0).to( device) # torch.Size([1, 3, 416, 320]) #이미지 torch 형식으로 바꾸기 pred, _ = model(img) #이미지 디텍션 det = non_max_suppression(pred.float(), conf_thres, nms_thres)[ 0] # torch.Size([5, 7]) #threshold로 이미지 거른 후 det변수에 초기화 if det is not None and len(det) > 0: # Rescale boxes from 416 to true image size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s) #print('%gx%g ' % img.shape[2:], end='') # print image size '288x416' #for c in det[:, -1].unique(): # 对图片的所有类进行遍历循环 # n = (det[:, -1] == c).sum() # 得到了当前类别的个数,也可以用来统计数目 #if classes[int(c)] == 'person': # print('%g %ss' % (n, classes[int(c)]), end=', ') # 打印个数和类别'5 persons' # print(" ") # Draw bounding boxes and labels of detections # (x1y1x2y2, obj_conf, class_conf, class_pred) gallery_img = [] #사람의 이미지만 따로 저장하는 Array gallery_loc = [] #사진의 좌표를 저장하는 Array for *xyxy, conf, cls_conf, cls in det: # det의 정보엔 박스의 좌표, 정확도, 클래스인지 확인하는 정확도가 있음 # *xyxy: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)] tensor로 좌표가 있음 # Add bbox to the image # label = '%s %.2f' % (classes[int(cls)], conf) # 'person 1.00' if classes[int( cls)] == 'person': # detect한 클래스가 person class라면 xmin = int(xyxy[0]) ymin = int(xyxy[1]) xmax = int(xyxy[2]) ymax = int(xyxy[3]) w = xmax - xmin # 233 h = ymax - ymin # 602 if w * h > 500: gallery_loc.append((xmin, ymin, xmax, ymax)) crop_img = im0[ymin:ymax, xmin:xmax] # HWC (602, 233, 3) #cv2.imwrite('./temp/'+str(count)+'.jpg',crop_img) #query 이미지로 쓰기위해 temp 폴더에 내 이미지 저장 #count=count+1 crop_img = Image.fromarray( cv2.cvtColor( crop_img, cv2.COLOR_BGR2RGB)) # PIL: (233, 602) crop_img = build_transforms(reidCfg)( crop_img).unsqueeze( 0) # torch.Size([1, 3, 256, 128]) gallery_img.append(crop_img) plot_one_box(xyxy, im0, color=[128, 128, 128]) #사람을 흰색으로 박스 치기 map.Map.total_count[int(cam_id)] = len(gallery_img) if gallery_img: #사람의 이미지만 자른 Array의 데이터가 존재하면 gallery_img = torch.cat( gallery_img, dim=0) # torch.Size([7, 3, 256, 128]) gallery_img = gallery_img.to(device) gallery_feats = reidModel( gallery_img) # torch.Size([7, 2048]) #print("The gallery feature is normalized") gallery_feats = torch.nn.functional.normalize( gallery_feats, dim=1, p=2) # 计算出查询图片的特征向量 #그 이미지의 특징을 뽑아옴 # m: 2 # n: 7 for pid in query_pids: m, n = query_feats[pid].shape[0], gallery_feats.shape[ 0] distmat = torch.pow(query_feats[pid], 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t() # out=(beta∗M)+(alpha∗mat1@mat2) # qf^2 + gf^2 - 2 * [email protected]() # distmat - 2 * [email protected]() # distmat: qf^2 + gf^2 # qf: torch.Size([2, 2048]) # gf: torch.Size([7, 2048]) distmat.addmm_(1, -2, query_feats[pid], gallery_feats.t()) # distmat = (qf - gf)^2 # distma2 번 목표 찾음 2번 카메라:1.2738347 #t = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410], # [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]]) distmat = distmat.cpu().detach().numpy( ) # <class 'tuple'>: (3, 12) distmat = distmat.sum(axis=0) / len( query_feats[pid]) # 쿼리의 특징과 현재 이미지의 특징의 차이를 계산 index = distmat.argmin() if distmat[ index] < dist_thres: #그 차이가 위에서 지정한 treshold보다 작으면 일치하다 print('%i 번 목표 찾음 %s번 카메라:%s' % (pid, cam_id, distmat[index])) plot_one_box(gallery_loc[index], im0, label=str(pid), color=[0, 0, 255]) if (pid not in patient_map.exist_id): patient_map.exist_id.append(pid) if (pid not in patient_map.camera_map[int(cam_id)] ): patient_map.camera_map[int(cam_id)].append(pid) #print("exist id : ", patient_map.exist_id) #print(patient_map.camera_map) print("total : ", len(gallery_img)) filename = time.strftime( "%Y%m%d", time.localtime(time.time()) ) + '_person' + str(pid) + '.txt' f = open(filename, 'a') f.write('\n' + cam_id + ' - ' + time.strftime('%H : %M : %S')) f.close #If the map of this camera ID is still false, it means there was no identified query in this second. #if(patient_map.camera_map[int(cam_id)] == False): yield cam_id, im0
def search_detect(dataloader_item, model, reidModel, device, classes, colors, weights): global query_time_last global query_time_now global query_feats conf_thres = 0.1 nms_thres = 0.4 dist_thres = 3.0 output = 'output' fourcc = 'mp4v' t = time.time() path, img, im0, vid_cap = dataloader_item # print(path, img.shape, im0.shape, vid_cap) # print(aaa) # data/samples/c1s1_001051.jpg (3, 320, 416) (480, 640, 3) None vid_path, vid_writer = None, None ############# query初始化 ############# if len(os.listdir('query')) < 1: print('not enough query') return else: if query_time_now == query_time_last or query_time_now - query_time_last >= 1: query_loader, num_query = make_data_loader(reidCfg) query_feats = [] for i, batch in enumerate(query_loader): with torch.no_grad(): img_q, pid, camid = batch img_q = img_q.to(device) feat = reidModel( img_q) # 一共2张待查询图片,每张图片特征向量2048 torch.Size([2, 2048]) query_feats.append(feat) query_feats = torch.cat(query_feats, dim=0) # torch.Size([2, 2048]) query_feats = torch.nn.functional.normalize(query_feats, dim=1, p=2) # 计算出查询图片的特征向量 print("The query feature is normalized") query_time_last = query_time_now elif len(query_feats) == 0: print('no query_feats') return query_time_now = time.time() ############# query初始化 END ############# if not os.path.exists(output): os.makedirs(output) save_path = str(Path(output) / Path(path).name) # 保存的路径 # Get detections shape: (3, 416, 320) img = torch.from_numpy(img).unsqueeze(0).to( device) # torch.Size([1, 3, 416, 320]) pred, _ = model(img) # 经过处理的网络预测,和原始的 det = non_max_suppression(pred.float(), conf_thres, nms_thres)[0] # torch.Size([5, 7]) if det is not None and len(det) > 0: # Rescale boxes from 416 to true image size 映射到原图 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results to screen image 1/3 data\samples\000493.jpg: 288x416 5 persons, Done. (0.869s) print('%gx%g ' % img.shape[2:], end='') # print image size '288x416' for c in det[:, -1].unique(): # 对图片的所有类进行遍历循环 n = (det[:, -1] == c).sum() # 得到了当前类别的个数,也可以用来统计数目 if classes[int(c)] == 'person': print('%g %ss' % (n, classes[int(c)]), end=', ') # 打印个数和类别'5 persons' # Draw bounding boxes and labels of detections # (x1y1x2y2, obj_conf, class_conf, class_pred) count = 0 gallery_img = [] gallery_loc = [] for *xyxy, conf, cls_conf, cls in det: # 对于最后的预测框进行遍历 # *xyxy: 对于原图来说的左上角右下角坐标: [tensor(349.), tensor(26.), tensor(468.), tensor(341.)] '''Write to file''' # with open(save_path + '.txt', 'a') as file: # file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) # Add bbox to the image label = '%s %.2f' % (classes[int(cls)], conf) # 'person 1.00' if classes[int(cls)] == 'person': #plot_one_bo x(xyxy, im0, label=label, color=colors[int(cls)]) xmin = int(xyxy[0]) ymin = int(xyxy[1]) xmax = int(xyxy[2]) ymax = int(xyxy[3]) w = xmax - xmin # 233 h = ymax - ymin # 602 # 如果检测到的行人太小了,感觉意义也不大 # 这里需要根据实际情况稍微设置下 # if h>2*w and h*w > 100*50: if h > 100 and w > 50: gallery_loc.append((xmin, ymin, xmax, ymax)) crop_img = im0[ymin:ymax, xmin:xmax] # HWC (602, 233, 3) crop_img = Image.fromarray( cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB)) # PIL: (233, 602) crop_img = build_transforms(reidCfg)(crop_img).unsqueeze( 0) # torch.Size([1, 3, 256, 128]) gallery_img.append(crop_img) '''flip image and box''' im0 = cv2.flip(im0, 1) if gallery_img: gallery_img = torch.cat(gallery_img, dim=0) # torch.Size([7, 3, 256, 128]) gallery_img = gallery_img.to(device) gallery_feats = reidModel(gallery_img) # torch.Size([7, 2048]) gallery_feats = torch.nn.functional.normalize(gallery_feats, dim=1, p=2) # 计算出查询图片的特征向量 print("The gallery feature is normalized") # m: 2 # n: 7 m, n = query_feats.shape[0], gallery_feats.shape[0] distmat = torch.pow(query_feats, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gallery_feats, 2).sum(dim=1, keepdim=True).expand(n, m).t() # out=(beta∗M)+(alpha∗mat1@mat2) # qf^2 + gf^2 - 2 * [email protected]() # distmat - 2 * [email protected]() # distmat: qf^2 + gf^2 # qf: torch.Size([2, 2048]) # gf: torch.Size([7, 2048]) distmat.addmm_(1, -2, query_feats, gallery_feats.t()) # distmat = (qf - gf)^2 # distmat = np.array([[1.79536, 2.00926, 0.52790, 1.98851, 2.15138, 1.75929, 1.99410], # [1.78843, 1.96036, 0.53674, 1.98929, 1.99490, 1.84878, 1.98575]]) distmat = distmat.cpu().numpy() # <class 'tuple'>: (3, 12) distmat = distmat.sum(axis=0) / len( query_feats) # 平均一下query中同一行人的多个结果 index = distmat.argmin() if distmat[index] < dist_thres: print('距离:%s' % distmat[index]) # print(gallery_loc[index]) xmin = im0.shape[1] - gallery_loc[index][2] ymin = im0.shape[0] - gallery_loc[index][3] xmax = im0.shape[1] - gallery_loc[index][0] ymax = im0.shape[0] - gallery_loc[index][1] # plot_one_box(gallery_loc[index], im0, label='find!', color=colors[int(cls)]) plot_one_box((xmin, ymin, xmax, ymax), im0, label='find!', color=colors[int(cls)]) # cv2.imshow('person search', im0) # cv2.waitKey() print('Done. (%.3fs)' % (time.time() - t)) '''show image''' # cv2.imshow(weights, im0) '''save image''' # cv2.imwrite(save_path, im0) '''save webcam''' # if vid_path != save_path: # new video # vid_path = save_path # if isinstance(vid_writer, cv2.VideoWriter): # vid_writer.release() # release previous video writer # fps = vid_cap.get(cv2.CAP_PROP_FPS) # width = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (width, height)) # vid_writer.write(im0) return im0
from utils.utils import * from reid.data import make_data_loader from reid.data.transforms import build_transforms from reid.modeling import build_model from reid.config import cfg as reidCfg import numpy as np device = torch_utils.select_device(force_cpu=False) torch.backends.cudnn.benchmark = False reidCfg.DATASETS.ROOT_DIR = ('./query/') query_loader, num_query = make_data_loader(reidCfg) reidModel = build_model(reidCfg, num_classes=10126) reidModel.load_param(reidCfg.TEST.WEIGHT) reidModel.to(device).eval() img = Image.open('./query/0001_c1s1_0_146.jpg') crop_img = build_transforms(reidCfg)(img).unsqueeze(0) gallery_img = [] gallery_img.append(crop_img) gallery_img = torch.cat(gallery_img, dim=0) gallery_img = gallery_img.to(device) gallery_feats = reidModel(gallery_img) print("The gallery feature is normalized") gallery_feats = torch.nn.functional.normalize(gallery_feats, dim=1, p=2) print(gallery_feats)