def Net_Prediction(model, image, device, backbone='Mobilenet'): scale_search = [1] stride = 8 padValue = 128 heatmap_avg = np.zeros((image.shape[0], image.shape[1], 19)) paf_avg = np.zeros((image.shape[0], image.shape[1], 38)) for m in range(len(scale_search)): scale = scale_search[m] imageToTest = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) imageToTest_padded, pad = padRightDownCorner(imageToTest, stride, padValue) # pad right and down corner to make sure image size is divisible by 8 im = np.transpose(np.float32(imageToTest_padded), (2, 0, 1)) / 256 - 0.5 im = np.ascontiguousarray(im) data = torch.from_numpy(im).float().unsqueeze(0).to(device) with torch.no_grad(): if backbone == 'CMU': Mconv7_stage6_L1, Mconv7_stage6_L2 = model(data) _paf = Mconv7_stage6_L1.cpu().numpy() _heatmap = Mconv7_stage6_L2.cpu().numpy() elif backbone == 'Mobilenet': stages_output = model(data) _paf = stages_output[-1].cpu().numpy() _heatmap = stages_output[-2].cpu().numpy() # extract outputs, resize, and remove padding heatmap = np.transpose(np.squeeze(_heatmap), (1, 2, 0)) # output 1 is heatmaps heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC) #print(heatmap.shape) paf = np.transpose(np.squeeze(_paf), (1, 2, 0)) # output 0 is PAFs paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] paf = cv2.resize(paf, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC) #print(paf.shape) heatmap_avg += heatmap / len(scale_search) paf_avg += paf / len(scale_search) return heatmap_avg, paf_avg
def process(input_image, params, model_params, heat_layers, paf_layers): oriImg = cv2.imread( input_image) # B,G,R order. 训练数据的读入也是用opencv,因此也是B, G, R顺序 # oriImg = cv2.resize(oriImg, (768, 768)) # oriImg = cv2.flip(oriImg, 1) 因为训练时作了flip,所以用这种方式提升并没有作用 multiplier = [ x * model_params['boxsize'] / oriImg.shape[0] for x in params['scale_search'] ] # 按照图片高度进行缩放 # multipier = [0.21749408983451538, 0.43498817966903075, 0.6524822695035462, 0.8699763593380615], # 首先把输入图像高度变成368,然后再做缩放 heatmap_avg = np.zeros( (oriImg.shape[0], oriImg.shape[1], heat_layers)) # fixme if you change the number of keypoints paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], paf_layers)) for m in range(len(multiplier)): scale = multiplier[m] if scale * oriImg.shape[0] > 2300 or scale * oriImg.shape[1] > 3200: scale = min(2300 / oriImg.shape[0], 3200 / oriImg.shape[1]) print("Input image is too big, shrink it !") imageToTest = cv2.resize( oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # cv2.INTER_CUBIC imageToTest_padded, pad = util.padRightDownCorner( imageToTest, model_params['max_downsample'], model_params['padValue']) # ################################# Important! ########################################### # ############################# We use OpenCV to read image (BGR) all the time ####################### # Input Tensor: a batch of images within [0,1], required shape in this project : (1, height, width, channels) input_img = np.float32(imageToTest_padded / 255) # input_img -= np.array(config.img_mean[::-1]) # Notice: OpenCV uses BGR format, reverse the last axises # input_img /= np.array(config.img_std[::-1]) # ################################## add flip image ################################ swap_image = input_img[:, ::-1, :].copy() # plt.imshow(swap_image[:, :, [2, 1, 0]]) # Opencv image format: BGR # plt.show() input_img = np.concatenate( (input_img[None, ...], swap_image[None, ...]), axis=0) # (2, height, width, channels) input_img = torch.from_numpy(input_img).cuda() # ################################################################################### # output tensor dtype: float 16 output_tuple = posenet(input_img) # ############ different scales can be shown ############# output = output_tuple[-1][0].cpu().numpy() output_blob = output[0].transpose((1, 2, 0)) output_blob0 = output_blob[:, :, :config.paf_layers] output_blob1 = output_blob[:, :, config.paf_layers:config.num_layers] output_blob_flip = output[1].transpose((1, 2, 0)) output_blob0_flip = output_blob_flip[:, :, : config.paf_layers] # paf layers output_blob1_flip = output_blob_flip[:, :, config.paf_layers:config. num_layers] # keypoint layers # ################################## flip ensemble ################################ output_blob0_avg = ( output_blob0 + output_blob0_flip[:, ::-1, :][:, :, flip_paf_ord]) / 2 output_blob1_avg = ( output_blob1 + output_blob1_flip[:, ::-1, :][:, :, flip_heat_ord]) / 2 # extract outputs, resize, and remove padding heatmap = cv2.resize(output_blob1_avg, (0, 0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC) heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) # output_blob0 is PAFs paf = cv2.resize(output_blob0_avg, (0, 0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC) paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :] paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC) # ############################## 为了让平均heatmap不那么模糊? ################################3 # heatmap[heatmap < params['thre1']] = 0 # paf[paf < params['thre2']] = 0 # ####################################################################################### # heatmap_avg = heatmap_avg + heatmap / len(multiplier) paf_avg = paf_avg + paf / len(multiplier) heatmap_avg[np.isnan(heatmap_avg)] = 0 paf_avg[np.isnan(paf_avg)] = 0 # heatmap_avg = np.maximum(heatmap_avg, heatmap) # paf_avg = np.maximum(paf_avg, paf) # 如果换成取最大,效果会变差,有很多误检 all_peaks = [] peak_counter = 0 # --------------------------------------------------------------------------------------- # # ------------------------ show the limb and foreground channel -----------------------# # --------------------------------------------------------------------------------------- # show_color_vector(oriImg, paf_avg, heatmap_avg) # --------------------------------------------------------------------------------------- # # ####################################################################################### # # ------------------------- find keypoints ---------------------------------------------# # ####################################################################################### # # --------------------------------------------------------------------------------------- # # smoothing = util.GaussianSmoothing(18, 5, 1) # heatmap_avg_cuda = torch.from_numpy(heatmap_avg.transpose((2, 0, 1))).cuda()[None, ...] heatmap_avg = heatmap_avg.astype(np.float32) filter_map = heatmap_avg[:, :, :18].copy().transpose((2, 0, 1))[None, ...] filter_map = torch.from_numpy(filter_map).cuda() # # ####################### Add Gaussian smooth ####################### # smoothing = util.GaussianSmoothing(18, 7, 1) # filter_map = F.pad(filter_map, (3, 3, 3, 3), mode='reflect') # filter_map = smoothing(filter_map) # # ###################################################################### filter_map = util.keypoint_heatmap_nms(filter_map, kernel=3, thre=params['thre1']) filter_map = filter_map.cpu().numpy().squeeze().transpose((1, 2, 0)) for part in range(18): # 没有对背景(序号19)取非极大值抑制NMS map_ori = heatmap_avg[:, :, part] # map = gaussian_filter(map_ori, sigma=3) # 没有高斯滤波貌似效果更好? # map = map_ori # map up 是值 peaks_binary = filter_map[:, :, part] peaks = list( zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse. xy坐标系和图像坐标系 # np.nonzero: Return the indices of the elements that are non-zero # 添加加权坐标计算,根据不同类型关键点弥散程度不同选择加权的范围 refined_peaks_with_score = [ util.refine_centroid(map_ori, anchor, params['offset_radius']) for anchor in peaks ] # peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] # 列表解析式,生产的是list # refined_peaks # [(205, 484, 0.9319216758012772), # # (595, 484, 0.777797631919384), id = range(peak_counter, peak_counter + len(refined_peaks_with_score)) peaks_with_score_and_id = [ refined_peaks_with_score[i] + (id[i], ) for i in range(len(id)) ] # 为每一个相应peak (parts)都依次编了一个号 all_peaks.append(peaks_with_score_and_id) # all_peaks.append 如果此种关节类型没有元素,append一个空的list [],例如all_peaks[19]: # [(205, 484, 0.9319216758012772, 25), # (595, 484, 0.777797631919384, 26), # (343, 490, 0.8145177364349365, 27), .... peak_counter += len(peaks) # refined_peaks # --------------------------------------------------------------------------------------- # # ####################################################################################### # # ----------------------------- find connections -----------------------------------------# # ####################################################################################### # # --------------------------------------------------------------------------------------- # connection_all = [] special_k = [] # 有多少个limb,就有多少个connection,相对应地就有多少个paf channel for k in range(len(limbSeq)): # 最外层的循环是某一个limbSeq score_mid = paf_avg[:, :, k] # 某一个channel上limb的响应热图, 它的长宽与原始输入图片大小一致,前面经过resize了 # score_mid = gaussian_filter(orginal_score_mid, sigma=3) fixme: use gaussisan blure? candA = all_peaks[limbSeq[k][ 0]] # all_peaks是list,每一行也是一个list,保存了检测到的特定的parts(joints) # 注意具体处理时标号从0还是1开始。从收集的peaks中取出某类关键点(part)集合 candB = all_peaks[limbSeq[k][1]] nA = len(candA) nB = len(candB) indexA, indexB = limbSeq[k] if (nA != 0 and nB != 0): connection_candidate = [] for i in range(nA): for j in range(nB): vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) mid_num = min(int(round(norm + 1)), params['mid_num']) # failure case when 2 body parts overlaps if norm == 0: # 为了跳过出现不同节点相互覆盖出现在同一个位置,也有说norm加一个接近0的项避免分母为0,详见: # https://github.com/ZheC/Realtime_Multi-Person_Pose_Estimation/issues/54 continue startend = list( zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num))) limb_response = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0]))] \ for I in range(len(startend))]) score_midpts = limb_response score_with_dist_prior = sum( score_midpts) / len(score_midpts) + min( 0.5 * oriImg.shape[0] / norm - 1, 0) # 这一项是为了惩罚过长的connection, 只有当长度大于图像高度的一半时才会惩罚 todo # The term of sum(score_midpts)/len(score_midpts), see the link below. # https://github.com/michalfaber/keras_Realtime_Multi-Person_Pose_Estimation/issues/48 criterion1 = len( np.nonzero(score_midpts > params['thre2']) [0]) > params['connect_ration'] * len( score_midpts) # fixme: tune 手动调整, 本来是 > 0.8*len # 我认为这个判别标准是保证paf朝向的一致性 param['thre2'] # parm['thre2'] = 0.05 criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([ i, j, score_with_dist_prior, norm, 0.5 * score_with_dist_prior + 0.25 * candA[i][2] + 0.25 * candB[j][2] ]) # todo:直接把两种类型概率相加不合理 # connection_candidate排序的依据是dist prior概率和两个端点heat map预测的概率值 # How to undersatand the criterion? connection_candidate = sorted(connection_candidate, key=lambda x: x[4], reverse=True) # sorted 函数对可迭代对象,按照key参数指定的对象进行排序,revers=True是按照逆序排序,sort之后可以把最可能是limb的留下,而把和最可能是limb的端点竞争的端点删除 connection = np.zeros((0, 6)) for c in range( len(connection_candidate)): # 根据confidence的顺序选择connections i, j, s, limb_len = connection_candidate[c][0:4] if (i not in connection[:, 3] and j not in connection[:, 4]): # 进行判断确保不会出现两个端点集合A,B中,出现一个集合中的点与另外一个集合中两个点同时相连 connection = np.vstack([ connection, [candA[i][3], candB[j][3], s, i, j, limb_len] ]) # 后面会被使用 # candA[i][3], candB[j][3]是part的id编号 if (len(connection) >= min(nA, nB)): # 会出现关节点不够连的情况 break connection_all.append(connection) else: special_k.append(k) connection_all.append([]) # 一个空的[]也能加入到list中,这一句是必须的!因为connection_all的数据结构是每一行代表一类limb connection # --------------------------------------------------------------------------------------- # # ####################################################################################### # # --------------------------------- find people ------------------------------------------# # ####################################################################################### # # --------------------------------------------------------------------------------------- # # last number in each row is the total parts number of that person # the second last number in each row is the score of the overall configuration subset = -1 * np.ones((0, 20, 2)) candidate = np.array([item for sublist in all_peaks for item in sublist]) # candidate[:, 2] *= 0.5 # FIXME: change it? part confidence * 0.5 # candidate.shape = (94, 4). 列表解析式,两层循环,先从all peaks取,再从sublist中取。 all peaks是两层list for k in range(len(limbSeq)): # --------------------------------------------------------- # 外层循环limb 对应论文中,每一个limb就是一个子集,分limb处理,贪心策略? # special_K ,表示没有找到关节点对匹配的肢体 if k not in special_k: # 即 有与之相连的,这个paf(limb)是存在的 partAs = connection_all[ k][:, 0] # limb端点part的序号,也就是保存在candidate中的 id号 partBs = connection_all[ k][:, 1] # limb端点part的序号,也就是保存在candidate中的 id号 # connection_all 每一行是一个类型的limb,每一行格式: N * [idA, idB, score, i, j] indexA, indexB = np.array( limbSeq[k]) # 此时处理limb k,limbSeq的两个端点parts,是parts的类别号. # 根据limbSeq列表的顺序依次考察某种类型的limb,从一个关节点到下一个关节点 for i in range( len(connection_all[k]) ): # 该层循环是分配k类型的limb connection (partAs[i],partBs[i])到某个人 subset[] # ------------------------------------------------ # 每一行的list保存的是一类limb(connection),遍历所有此类limb,一般的有多少个特定的limb就有多少个人 found = 0 subset_idx = [-1, -1] # 每次循环只解决两个part,所以标记只需要两个flag for j in range(len(subset)): # ---------------------------------------------- # 这一层循环是遍历所有的人 # 1:size(subset,1), 若subset.shape=(5,20), 则len(subset)=5,表示有5个人 # subset每一行对应的是一个人的18个关键点和number以及score的结果 if subset[j][indexA][0].astype(int) == (partAs[i]).astype( int) or subset[j][indexB][0].astype( int) == partBs[i].astype(int): # 看看这次考察的limb两个端点之一是否有一个已经在上一轮中出现过了,即是否已经分配给某人了 # 每一个最外层循环都只考虑一个limb,因此处理的时候就只会有两种part,即表示为partAs,partBs subset_idx[found] = j # 标记一下,这个端点应该是第j个人的 found += 1 if found == 1: j = subset_idx[0] if subset[j][indexB][0].astype(int) == -1 and \ params['len_rate'] * subset[j][-1][1] > connection_all[k][i][-1]: # 如果新加入的limb比之前已经组装的limb长很多,也舍弃 # 如果这个人的当前点还没有被找到时,把这个点分配给这个人 # 这一个判断非常重要,因为第18和19个limb分别是 2->16, 5->17,这几个点已经在之前的limb中检测到了, # 所以如果两次结果一致,不更改此时的part分配,否则又分配了一次,编号是覆盖了,但是继续运行下面代码,part数目 # 会加1,结果造成一个人的part之和>18。不过如果两侧预测limb端点结果不同,还是会出现number of part>18,造成多检 # FIXME: 没有利用好冗余的connection信息,最后两个limb的端点与之前循环过程中重复了,但没有利用聚合, # 只是直接覆盖,其实直接覆盖是为了弥补漏检 subset[j][indexB][0] = partBs[ i] # partBs[i]是limb其中一个端点的id号码 subset[j][indexB][1] = connection_all[k][i][ 2] # 保存这个点被留下来的置信度 subset[j][-1][ 0] += 1 # last number in each row is the total parts number of that person # # subset[j][-2][1]用来记录不包括当前新加入的类型节点时的总体初始置信度,引入它是为了避免下次迭代出现同类型关键点,覆盖时重复相加了置信度 # subset[j][-2][1] = subset[j][-2][0] # 因为是不包括此类节点的初始值,所以只会赋值一次 !! subset[j][-2][0] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] # candidate的格式为: (343, 490, 0.8145177364349365, 27), .... subset[j][-1][1] = max(connection_all[k][i][-1], subset[j][-1][1]) # the second last number in each row is the score of the overall configuration elif subset[j][indexB][0].astype(int) != partBs[i].astype( int): if subset[j][indexB][1] >= connection_all[k][i][2]: # 如果考察的这个limb连接没有已经存在的可信,则跳过 pass else: # 否则用当前的limb端点覆盖已经存在的点,并且在这之前,减去已存在关节点的置信度和连接它的limb置信度 if params['len_rate'] * subset[j][-1][ 1] <= connection_all[k][i][-1]: continue # 减去之前的节点置信度和limb置信度 subset[j][-2][0] -= candidate[ subset[j][indexB][0].astype(int), 2] + subset[j][indexB][1] # 添加当前节点 subset[j][indexB][0] = partBs[i] subset[j][indexB][1] = connection_all[k][i][ 2] # 保存这个点被留下来的置信度 subset[j][-2][0] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] subset[j][-1][1] = max(connection_all[k][i][-1], subset[j][-1][1]) # overlap the reassigned keypoint # 如果是添加冗余连接的重复的点,用新的更加高的冗余连接概率取代原来连接的相同的关节点的概率 # 这一个改动没啥影响 elif subset[j][indexB][0].astype(int) == partBs[i].astype(int) and subset[j][indexB][1] <= \ connection_all[k][i][2]: # 否则用当前的limb端点覆盖已经存在的点,并且在这之前,减去已存在关节点的置信度和连接它的limb置信度 if params['len_rate'] * subset[j][-1][ 1] <= connection_all[k][i][-1]: continue # 减去之前的节点置信度和limb置信度 subset[j][-2][0] -= candidate[ subset[j][indexB][0].astype(int), 2] + subset[j][indexB][1] # 添加当前节点 subset[j][indexB][0] = partBs[i] subset[j][indexB][1] = connection_all[k][i][ 2] # 保存这个点被留下来的置信度 subset[j][-2][0] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] subset[j][-1][1] = max(connection_all[k][i][-1], subset[j][-1][1]) elif found == 2: # if found 2 and disjoint, merge them (disjoint:不相交) # ----------------------------------------------------- # 如果肢体组成的关节点A,B分别连到了两个人体,则表明这两个人体应该组成一个人体, # 则合并两个人体(当肢体是按顺序拼接情况下不存在这样的状况) # -------------------------------------------------- # 说明组装的过程中,有断掉的情况(有limb或者说connection缺失),在之前重复开辟了一个sub person,其实他们是同一个人上的 # If humans H1 and H2 share a part index with the same coordinates, they are sharing the same part! # H1 and H2 are, therefore, the same humans. So we merge both sets into H1 and remove H2. # https://arvrjourney.com/human-pose-estimation-using-openpose-with-tensorflow-part-2-e78ab9104fc8 # 该代码与链接中的做法有差异,个人认为链接中的更加合理而且更容易理解 j1, j2 = subset_idx membership1 = ((subset[j1][..., 0] >= 0).astype(int))[:-2] # 用[:,0]也可 membership2 = ((subset[j2][..., 0] >= 0).astype(int))[:-2] membership = membership1 + membership2 # [:-2]不包括最后个数项与scores项 # 这些点应该属于同一个人,将这个人所有类型关键点(端点part)个数逐个相加 if len(np.nonzero(membership == 2) [0]) == 0: # if found 2 and disjoint, merge them min_limb1 = np.min(subset[j1, :-2, 1][membership1 == 1]) min_limb2 = np.min(subset[j2, :-2, 1][membership2 == 1]) min_tolerance = min(min_limb1, min_limb2) # 计算允许进行拼接的置信度 if connection_all[k][i][2] < params['connection_tole'] * min_tolerance or params['len_rate'] * \ subset[j1][-1][1] <= connection_all[k][i][-1]: # 如果merge这两个身体部分的置信度不够大,或者当前这个limb明显大于已存在的limb的长度,则不进行连接 # todo: finetune the tolerance of connection continue # subset[j1][:-2][...] += (subset[j2][:-2][...] + 1) # 对于没有节点标记的地方,因为两行subset相应位置处都是-1,所以合并之后没有节点的部分依旧是-1 # 把不相交的两个subset[j1],[j2]中的id号进行相加,从而完成合并,这里+1是因为默认没有找到关键点初始值是-1 subset[j1][-2:][:, 0] += subset[j2][ -2:][:, 0] # 两行subset的点的个数和总置信度相加 subset[j1][-2][0] += connection_all[k][i][2] subset[j1][-1][1] = max(connection_all[k][i][-1], subset[j1][-1][1]) # 注意: 因为是disjoint的两行subset点的merge,因此先前存在的节点的置信度之前已经被加过了 !! 这里只需要再加当前考察的limb的置信度 subset = np.delete(subset, j2, 0) else: # 出现了两个人同时竞争一个limb的情况,并且这两个人不是同一个人,通过比较两个人包含此limb的置信度来决定, # 当前limb的节点应该分配给谁,同时把之前的那个与当前节点相连的节点(即partsA[i])从另一个人(subset)的节点集合中删除 if connection_all[k][i][0] in subset[j1, :-2, 0]: c1 = np.where(subset[j1, :-2, 0] == connection_all[k][i][0]) c2 = np.where(subset[j2, :-2, 0] == connection_all[k][i][1]) else: c1 = np.where(subset[j1, :-2, 0] == connection_all[k][i][1]) c2 = np.where(subset[j2, :-2, 0] == connection_all[k][i][0]) # c1, c2分别是当前limb连接到j1人的第c1个关节点,j2人的第c2个关节点 c1 = int(c1[0]) c2 = int(c2[0]) assert c1 != c2, "an candidate keypoint is used twice, shared by two people" # 如果当前考察的limb置信度比已经存在的两个人连接的置信度小,则跳过,否则删除已存在的不可信的连接节点。 if connection_all[k][i][2] < subset[j1][c1][ 1] and connection_all[k][i][2] < subset[j2][ c2][1]: continue # the trick here is useful small_j = j1 big_j = j2 remove_c = c1 if subset[j1][c1][1] > subset[j2][c2][1]: small_j = j2 big_j = j1 remove_c = c2 # 删除和当前limb有连接,并且置信度低的那个人的节点 if params['remove_recon'] > 0: subset[small_j][-2][0] -= candidate[subset[small_j][remove_c][0].astype(int), 2] + \ subset[small_j][remove_c][1] subset[small_j][remove_c][0] = -1 subset[small_j][remove_c][1] = -1 subset[small_j][-1][0] -= 1 # if find no partA in the subset, create a new subset # 如果肢体组成的关节点A,B没有被连接到某个人体则组成新的人体 # ------------------------------------------------------------------ # 1.Sort each possible connection by its score. # 2.The connection with the highest score is indeed a final connection. # 3.Move to next possible connection. If no parts of this connection have # been assigned to a final connection before, this is a final connection. # 第三点是说,如果下一个可能的连接没有与之前的连接有共享端点的话,会被视为最终的连接,加入row # 4.Repeat the step 3 until we are done. # 说明见: https://arvrjourney.com/human-pose-estimation-using-openpose-with-tensorflow-part-2-e78ab9104fc8 elif not found and k < len(limbSeq): # Fixme: 检查一下是否正确 # 原始的时候是 k<18,因为我加了limb,所以是24,因为真正的limb是0~16,最后两个17,18是额外的不是limb # 但是后面画limb的时候没有把鼻子和眼睛耳朵的连线画上,要改进 row = -1 * np.ones((20, 2)) row[indexA][0] = partAs[i] row[indexA][1] = connection_all[k][i][2] row[indexB][0] = partBs[i] row[indexB][1] = connection_all[k][i][2] row[-1][0] = 2 row[-1][1] = connection_all[k][i][ -1] # 这一位用来记录上轮连接limb时的长度,用来作为下一轮连接的先验知识 row[-2][0] = sum( candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] # 两个端点的置信度+limb连接的置信度 # print('create a new subset: ', row, '\t') row = row[np.newaxis, :, :] # 为了进行concatenate,需要插入一个轴 subset = np.concatenate((subset, row), axis=0) # delete some rows of subset which has few parts occur deleteIdx = [] for i in range(len(subset)): if subset[i][-1][0] < 4 or subset[i][-2][0] / subset[i][-1][ 0] < 0.45: # (params['thre1'] + params['thre2']) / 2: # todo: tune, it matters much! deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) canvas = cv2.imread(input_image) # B,G,R order # canvas = oriImg keypoints = [] for s in subset[..., 0]: keypoint_indexes = s[:18] # 定义的keypoint一共有18个 person_keypoint_coordinates = [] for index in keypoint_indexes: if index == -1: # "No candidate for keypoint" # 标志为-1的part是没有检测到的 X, Y = 0, 0 else: X, Y = candidate[index.astype(int)][:2] person_keypoint_coordinates.append((X, Y)) person_keypoint_coordinates_coco = [None] * 17 for dt_index, gt_index in dt_gt_mapping.items(): if gt_index is None: continue person_keypoint_coordinates_coco[ gt_index] = person_keypoint_coordinates[dt_index] keypoints.append((person_keypoint_coordinates_coco, 1 - 1.0 / s[-2])) # s[19] is the score for i in range(len(keypoints)): print('the {}th keypoint detection result is : '.format(i), keypoints[i]) # 画所有的峰值 # for i in range(18): # # rgba = np.array(cmap(1 - i/18. - 1./36)) # # rgba[0:3] *= 255 # for j in range(len(all_peaks[i])): # all_peaks保存了坐标,score以及id # # 注意x,y坐标谁在前谁在后,在这个project中有点混乱 # cv2.circle(canvas, all_peaks[i][j][0:2], 3, colors[i], thickness=-1) # 画所有的骨架 color_board = [ 0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 ] color_idx = 0 for i in draw_list: # 画出18个limb Fixme:我设计了25个limb,画的limb顺序需要调整,相应color数也要增加 for n in range(len(subset)): index = subset[n][np.array(limbSeq[i])][..., 0] if -1 in index: # 有-1说明没有对应的关节点与之相连,即有一个类型的part没有缺失,无法连接成limb continue # 在上一个cell中有 canvas = cv2.imread(test_image) # B,G,R order cur_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly( (int(mY), int(mX)), (int(length / 2), 3), int(angle), 0, 360, 1) cv2.circle(cur_canvas, (int(Y[0]), int(X[0])), 4, color=[0, 0, 0], thickness=2) cv2.circle(cur_canvas, (int(Y[1]), int(X[1])), 4, color=[0, 0, 0], thickness=2) cv2.fillConvexPoly(cur_canvas, polygon, colors[color_board[color_idx]]) canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) color_idx += 1 return canvas
def apply_model(oriImg, model, multiplier,numPoints,roi_str): stride = 8 roiPoint = roi_str.split('_') newImg = oriImg[int(roiPoint[0]):int(roiPoint[2]), int(roiPoint[1]):int(roiPoint[3])] height, width, _ = newImg.shape oriImg = newImg #height, width, _ = oriImg.shape #将图像转化成数组形式,类型是float32 normed_img = np.array(oriImg, dtype=np.float32) #新建一个大小一样的0矩阵(图),另外这个图的通道数就是代预测点的个数 heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], numPoints), dtype=np.float32) #遍历尺度因数 for m in range(len(multiplier)): scale = multiplier[m] imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128) #将输入图片补全成标准大小(可能是384*384) imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 32, 128) input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w) input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) # get the features # heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var) #怎么看是经历了几个阶段的特诊图 heat = model(input_var) # get the heatmap heatmap = heat.data.cpu().numpy() heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) heatmap_avg = heatmap_avg + heatmap / len(multiplier) all_peaks = [] # all of the possible points by classes. peak_counter = 0 thre1 = 0.1 for part in range(numPoints - 1): x_list = [] y_list = [] map_ori = heatmap_avg[:, :, part] map = gaussian_filter(map_ori, sigma=3) map_left = np.zeros(map.shape) map_left[1:, :] = map[:-1, :] map_right = np.zeros(map.shape) map_right[:-1, :] = map[1:, :] map_up = np.zeros(map.shape) map_up[:, 1:] = map[:, :-1] map_down = np.zeros(map.shape) map_down[:, :-1] = map[:, 1:] peaks_binary = np.logical_and.reduce( (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] id = range(peak_counter, peak_counter + len(peaks)) peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] all_peaks.append(peaks_with_score_and_id) peak_counter += len(peaks) # sort by score for i in range(numPoints-1): all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True) keypoints = -1*np.ones((numPoints-1, 3)) for i in range(numPoints-1): if len(all_peaks[i]) == 0: continue else: keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1 keypoints[:,0] = keypoints[:,0] + int(roiPoint[1]) keypoints[:,1] = keypoints[:,1] + int(roiPoint[0]) return keypoints
def predict(image, params, model, model_params, heat_layers, paf_layers, input_image_path): # print (image.shape) heatmap_avg = np.zeros((image.shape[0], image.shape[1], heat_layers)) paf_avg = np.zeros((image.shape[0], image.shape[1], paf_layers)) multiplier = [ x * model_params['boxsize'] / image.shape[0] for x in params['scale_search'] ] # 把368boxsize去掉,效果稍微下降了 # multiplier = [1] # fixme , add this line rotate_angle = params['rotation_search'] for item in product(multiplier, rotate_angle): scale, angle = item # if scale * image.shape[0] > 2300 or scale * image.shape[1] > 3400: # scale = min(2300 / image.shape[0], 3400 / image.shape[1]) if scale * image.shape[0] > 2600 or scale * image.shape[ 1] > 3800: # ### 我们更改了这里 scale = min(2600 / image.shape[0], 3800 / image.shape[1]) print("Input image: '{}' is too big, shrink it!".format( input_image_path)) imageToTest = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) imageToTest_padded, pad = util.padRightDownCorner( imageToTest, model_params['max_downsample'], model_params['padValue']) # ################################# Important! ########################################### # ############################# We use OpenCV to read image (BGR) all the time ####################### # Input Tensor: a batch of images within [0,1], required shape in this project : (1, height, width, channels) input_img = np.float32(imageToTest_padded / 255) # ############################## Rotate the input image #####################3 if angle != 0: rotate_matrix = cv2.getRotationMatrix2D( (input_img.shape[0] / 2, input_img.shape[1] / 2), angle, 1) rotate_matrix_reverse = cv2.getRotationMatrix2D( (input_img.shape[0] / 2, input_img.shape[1] / 2), -angle, 1) input_img = cv2.warpAffine(input_img, rotate_matrix, (0, 0)) # input_img -= np.array(config.img_mean[::-1]) # Notice: OpenCV uses BGR format, reverse the last axises # input_img /= np.array(config.img_std[::-1]) # ################################## add flip image ################################ swap_image = input_img[:, ::-1, :].copy() # plt.imshow(swap_image[:, :, [2, 1, 0]]) # Opencv image format: BGR # plt.show() input_img = np.concatenate( (input_img[None, ...], swap_image[None, ...]), axis=0) # (2, height, width, channels) input_img = torch.from_numpy(input_img).cuda() # ################################################################################### # output tensor dtype: float 16 output_tuple = posenet(input_img) output = output_tuple[-1][0].cpu().numpy( ) # different scales can be shown output_blob = output[0].transpose((1, 2, 0)) output_blob0 = output_blob[:, :, :config.paf_layers] output_blob1 = output_blob[:, :, config.paf_layers:config.num_layers] output_blob_flip = output[1].transpose((1, 2, 0)) output_blob0_flip = output_blob_flip[:, :, : config.paf_layers] # paf layers output_blob1_flip = output_blob_flip[:, :, config.paf_layers:config. num_layers] # keypoint layers # ################################################################################### # ################################## flip ensemble ################################ # ################################################################################### output_blob0_avg = ( output_blob0 + output_blob0_flip[:, ::-1, :][:, :, flip_paf_ord]) / 2 output_blob1_avg = ( output_blob1 + output_blob1_flip[:, ::-1, :][:, :, flip_heat_ord]) / 2 # extract outputs, resize, and remove padding heatmap = cv2.resize(output_blob1_avg, (0, 0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC) if angle != 0: heatmap = cv2.warpAffine(heatmap, rotate_matrix_reverse, (0, 0)) heatmap = heatmap[pad[0]:imageToTest_padded.shape[0] - pad[2], pad[1]:imageToTest_padded.shape[1] - pad[3], :] heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC) # output_blob0 is PAFs paf = cv2.resize(output_blob0_avg, (0, 0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC) if angle != 0: paf = cv2.warpAffine(paf, rotate_matrix_reverse, (0, 0)) paf = paf[pad[0]:imageToTest_padded.shape[0] - pad[2], pad[1]:imageToTest_padded.shape[1] - pad[3], :] paf = cv2.resize(paf, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_CUBIC) heatmap_avg = heatmap_avg + heatmap / (len(multiplier) * len(rotate_angle)) paf_avg = paf_avg + paf / (len(multiplier) * len(rotate_angle)) # heatmap_avg = np.maximum(heatmap_avg, heatmap) # paf_avg = np.maximum(paf_avg, paf) # 如果换成取最大,效果会变差,有很多误检 return heatmap_avg, paf_avg
def predict_refactor(image, model, test_cfg, model_cfg, input_image_path, flip_avg=True, config=None): # > scale feature maps up to image size img_h, img_w, _ = image.shape heatmap_avg, paf_avg = None, None # > [1] scale search multiplier = [ x * model_cfg['boxsize'] / img_h for x in test_cfg['scale_search'] ] # > [2] fix scale multiplier = [1.] # > [0.5, 1., 1.5, 2., 3.] rotate_angle = test_cfg['rotation_search'] # > 0.0 max_downsample = model_cfg['max_downsample'] pad_value = model_cfg['padValue'] stride = model_cfg['stride'] flip_heat_ord = config.flip_heat_ord flip_paf_ord = config.flip_paf_ord for item in product(multiplier, rotate_angle): # > #scales scale, angle = item img_max_h, img_max_w = (2600, 3800 ) # CHANGED: (2300, 3200)->(2600,3800) if scale * img_h > img_max_h or scale * img_w > img_max_w: scale = min(img_max_h / img_h, img_max_w / img_w) print("Input image: '{}' is too big, shrink it!".format( input_image_path)) # > `imageToTest`: (scaleH, scaleW, 3) imageToTest = cv2.resize(image, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) # > `imageToTest_padded`: (scale_padH, scale_padW, 3) imageToTest_padded, pad = util.padRightDownCorner( imageToTest, max_downsample, # > 64 pad_value) # > 128 scale_padH, scale_padW, _ = imageToTest_padded.shape # > WARN: `[1-1]`: we use OpenCV to read image`(BGR)` all the time input_img = np.float32(imageToTest_padded / 255) # > `[1-2]` :add rotate image if angle != 0: # ADDED rotate_matrix = cv2.getRotationMatrix2D( (input_img.shape[0] / 2, input_img.shape[1] / 2), angle, 1) rotate_matrix_reverse = cv2.getRotationMatrix2D( (input_img.shape[0] / 2, input_img.shape[1] / 2), -angle, 1) input_img = cv2.warpAffine(input_img, rotate_matrix, (0, 0)) # > `[1-2]` :add flip image swap_image = input_img[:, ::-1, :].copy() # plt.imshow(swap_image[:, :, [2, 1, 0]]) # Opencv image format: BGR # plt.show() input_img = np.concatenate( (input_img[None, ...], swap_image[None, ...]), axis=0) # (2, H, W, C) input_img = torch.from_numpy(input_img).cuda() # > `[1-3]-model`(4,)=(2, 50, featH, featW) x 4, `dtype=float16` output_tuple = model( input_img ) # > NOTE: feed img here -> (#stage, #scales, #img, 50, H, W) # > `[1-4]`: scales vary according to input image size. # > `-1`: last stage, `0`: high-res featmaps output = output_tuple[-1][0].cpu().numpy() # -> (2, 50, featH, featW) output_blob = output[0].transpose((1, 2, 0)) # > (featH, featW, 50) output_paf = output_blob[:, :, :config. paf_layers] # > `PAF`:(featH, featW, 30) output_heatmap = output_blob[:, :, config.paf_layers:config. num_layers] # > `KP`:(featH, featW, 20) # > flipped image output output_blob_flip = output[1].transpose( (1, 2, 0)) # > (featH, featW, 50) output_paf_flip = output_blob_flip[:, :, :config. paf_layers] # `PAF`: (featH, featW, 30) output_heatmap_flip = output_blob_flip[:, :, config.paf_layers:config. num_layers] # > `KP`: (featH, featW, 20) # > `[1-5]`: flip ensemble & average if flip_avg: output_paf_avg = (output_paf + output_paf_flip[:, ::-1, :][:, :, flip_paf_ord] ) / 2 # > (featH, featW, 30) output_heatmap_avg = ( output_heatmap + output_heatmap_flip[:, ::-1, :][:, :, flip_heat_ord] ) / 2 # > (featH, featW, 20) else: output_paf_avg = output_paf # > (featH, featW, 30) output_heatmap_avg = output_heatmap # > (featH, featW, 20) if angle != 0: # ADDED output_heatmap_avg = cv2.warpAffine(output_heatmap_avg, rotate_matrix_reverse, (0, 0)) output_paf_avg = cv2.warpAffine(output_paf_avg, rotate_matrix_reverse, (0, 0)) heatmap_avg = output_heatmap_avg paf_avg = output_paf_avg return heatmap_avg.astype(np.float32), paf_avg.astype(np.float32)