def reset(self): self.time = 0 self.done = False self.env.new_episode() state = self.env.get_state().screen_buffer if self.transpose: state = np.transpose(state, self.transpose) return resize(state.astype(np.uint8))
def step(self, action, train=False): self.time += 1 action_oh = self.one_hot(action) reward = self.env.make_action(action_oh) self.done = self.env.is_episode_finished() or self.done state = np.zeros(self.size) if self.done else self.env.get_state().screen_buffer if self.transpose: state = np.transpose(state, self.transpose) return resize(state.astype(np.uint8)), reward, self.done, None
def multiperson(img, func, mode): """ 1. Resize the image to different scales and pass each scale through the network 2. Merge the outputs across scales and find people by HeatmapParser 3. Find the missing joints of the people with a second pass of the heatmaps """ if mode == 'multi': scales = [2, 1., 0.5] else: scales = [1] height, width = img.shape[0:2] center = (width / 2, height / 2) dets, tags = None, [] for idx, i in enumerate(scales): scale = max(height, width) / 200 input_res = max(height, width) inp_res = int((i * 512 + 63) // 64 * 64) res = (inp_res, inp_res) mat_ = get_transform(center, scale, res)[:2] inp = cv2.warpAffine(img, mat_, res) / 255 def array2dict(tmp): return {'det': tmp[0][:, :, :17], 'tag': tmp[0][:, -1, 17:34]} tmp1 = array2dict(func([inp])) tmp2 = array2dict(func([inp[:, ::-1]])) tmp = {} for ii in tmp1: tmp[ii] = np.concatenate((tmp1[ii], tmp2[ii]), axis=0) det = tmp['det'][0, -1] + tmp['det'][1, -1, :, :, ::-1][flipRef] if det.max() > 10: continue if dets is None: dets = det mat = np.linalg.pinv(np.array(mat_).tolist() + [[0, 0, 1]])[:2] else: dets = dets + resize(det, dets.shape[1:3]) if abs(i - 1) < 0.5: res = dets.shape[1:3] tags += [ resize(tmp['tag'][0], res), resize(tmp['tag'][1, :, :, ::-1][flipRef], res) ] if dets is None or len(tags) == 0: return [], [] tags = np.concatenate([i[:, :, :, None] for i in tags], axis=3) dets = dets / len(scales) / 2 dets = np.minimum(dets, 1) grouped = parser.parse(np.float32([dets]), np.float32([tags]))[0] scores = [i[:, 2].mean() for i in grouped] for i in range(len(grouped)): grouped[i] = refine(dets, tags, grouped[i]) if len(grouped) > 0: grouped[:, :, :2] = kpt_affine(grouped[:, :, :2] * 4, mat) return grouped, scores
def multiperson(img, func, mode): """ 1. Resize the image to different scales and pass each scale through the network 2. Merge the outputs across scales and find people by HeatmapParser 3. Find the missing joints of the people with a second pass of the heatmaps """ if mode == 'multi': scales = [2, 1., 0.5] else: scales = [1] height, width = img.shape[0:2] center = (width / 2, height / 2) dets, tags = None, [] for idx, i in enumerate(scales): scale = max(height, width) / 200 input_res = max(height, width) inp_res = int((i * 512 + 63) // 64 * 64) # 改变输入图像分辨率,为512的倍数; res = (inp_res, inp_res) #resize后的图像分辨率、尺寸大小 mat_ = get_transform(center, scale, res)[:2] inp = cv2.warpAffine(img, mat_, res) / 255 def array2dict( tmp ): # 输入tmp.shape=[1,4,68,128,128];4个堆叠沙漏模块每个都输出68个特征图,前17个为关键点detection scores,17-34为多个标签编码 return { 'det': tmp[0][:, :, :17], #det shape=[1,4,17,128,128] 68维度里面的前17个 'tag': tmp[0][:, -1, 17: 34] #tag shape=[1,17,128,128] 4维度里面的最后一个,以及68维度里面17至34个 } # 神经网络计算,核心过程,func() tmp1 = array2dict(func([inp])) tmp2 = array2dict(func([inp[:, ::-1]])) #inp左右翻转 tmp = {} for ii in tmp1: tmp[ii] = np.concatenate((tmp1[ii], tmp2[ii]), axis=0) #'det'和‘tag’第一维度扩展 # [128 128]:[row col],左右翻转,所以row不变,col倒序。 det = tmp['det'][0, -1] + tmp['det'][1, -1, :, :, ::-1][ flipRef] #tmp中‘det' shape=[2<0>,4<-1>,17,128,128] ;’det' shape=[2<1>,4<-1>,17,128,128<-1>][左右翻转] if det.max( ) > 10: # 最终得到det.shape=[17,128,128];det表示特征图[128,128]的每个坐标是17种关键点的预测分数 continue #为什么?? if dets is None: dets = det mat = np.linalg.pinv(np.array(mat_).tolist() + [[0, 0, 1]])[:2] #仿射变换矩阵求逆过程 else: dets = dets + resize(det, dets.shape[1:3]) if abs(i - 1) < 0.5: res = dets.shape[1:3] tags += [ resize(tmp['tag'][0], res), resize(tmp['tag'][1, :, :, ::-1][flipRef], res) ] if dets is None or len(tags) == 0: return [], [] tags = np.concatenate([i[:, :, :, None] for i in tags], axis=3) #将[2,17,128,128]-->[17,128,128,2] dets = dets / len(scales) / 2 #上面通过左右翻转图像相加,这里除以2求平均 dets = np.minimum( dets, 1) #dets中所有元素和1比较大小,取最小值;这里可以认为将网络得到的detection scores限制在1以下,用来作为检测概率值 grouped = H_parser.parse(np.float32([dets]), np.float32([tags]))[0] scores = [i[:, 2].mean() for i in grouped] for i in range(len(grouped)): grouped[i] = refine(dets, tags, grouped[i]) if len(grouped) > 0: grouped[:, :, :2] = kpt_affine(grouped[:, :, :2] * 4, mat) # 筛选并整合人体关键点信息 此处依据pose-ae-demo修改 persons = [] for val in grouped: # val为某一个人的关键点信息 if val[:, 2].max() > 0: # 某个人的17个关键点中最大的prediction必须大于0 tmp = { "keypoints": [], "score": float(val[:, 2].mean()) } # 将17个关键点的平均值作为score分数值 for j in val: # j表示17个关键点中的某一个 if j[2] > 0.: # 关键点的prediction必须大于0,否则认为检测错误,记为[0,0,0] tmp["keypoints"] += [float(j[0]), float(j[1]), float(j[2])] else: tmp["keypoints"] += [0, 0, 0] persons.append(tmp) # return persons # 返回满足要求的所有人 return persons, grouped, scores
def step(self, env_action, next_state, reward, done, number=None): self.a_rollout.append(env_action) self.s_rollout.append(misc.resize(next_state)) self.r_rollout.append(reward) self.d_rollout.append(done) if done: self.save_rollout(number=number)
def multiperson(img, func, mode): """ 1. Resize the image to different scales and pass each scale through the network 2. Merge the outputs across scales and find people by HeatmapParser 3. Find the missing joints of the people with a second pass of the heatmaps """ if mode == 'multi': scales = [2, 1., 0.5] else: scales = [1] height, width = img.shape[0:2] center = (width / 2, height / 2) dets, tags = None, [] # 存储不同尺度的检测结果 for idx, i in enumerate(scales): # 对于每一个尺度 scale = max(height, width) / 200 input_res = max(height, width) inp_res = int((i * 512 + 63) // 64 * 64) res = (inp_res, inp_res) mat_ = get_transform(center, scale, res)[:2] inp = cv2.warpAffine(img, mat_, res) / 255 #[512, 512, 3] def array2dict(tmp): return { # tmp[0] [bs,4, 68, 128, 128] 'det': tmp[0] [:, :, :17], # 前16个通道作为 [1,4, 17, 128, 128] 注意这里取了全部stage的输出 'tag': tmp[0][:, -1, 17:34] # [1, 17, 128, 128] 注意这里只取了最后stage的tag输出 } tmp1 = array2dict(func([inp])) # 进行网络推理 tmp2 = array2dict(func([inp[:, ::-1]])) # 将图片左右镜像翻转之后再次预测 # import matplotlib.pyplot as plt # plt.imshow(inp[:, ::-1]) # plt.show() tmp = {} for ii in tmp1: tmp[ii] = np.concatenate( (tmp1[ii], tmp2[ii]), axis=0) # det and tag [2,4, 17, 128, 128] [2, 17, 128, 128] # 将翻转之后的图像检测结果也结合起来,主要关节点的序号也需要利用flipref来变换一下, 这里只要最后得一个stage的输出 det = tmp['det'][0, -1] + tmp['det'][1, -1, :, :, ::-1][ flipRef] # [17, 128, 128] if det.max() > 10: continue if dets is None: dets = det # [17, 128, 128] mat = np.linalg.pinv(np.array(mat_).tolist() + [[0, 0, 1]])[:2] # 计算先前图像变换的逆矩阵 else: dets = dets + resize(det, dets.shape[1:3]) if abs(i - 1) < 0.5: # 将tags预测与resize到与det大小一致 res = dets.shape[1:3] # 已有的检测结果对应的像素大小 要关节点的序号也需要利用flipref来变换一下 tags += [ resize(tmp['tag'][0], res), resize(tmp['tag'][1, :, :, ::-1][flipRef], res) ] if dets is None or len(tags) == 0: return [], [] tags = np.concatenate([i[:, :, :, None] for i in tags], axis=3) # [17, 128, 128, 2] 拼接起来 dets = dets / len(scales) / 2 # [17, 128, 128] #将不同尺度的检测结果平均 dets = np.minimum(dets, 1) # [17 128 128] grouped = parser.parse(np.float32([dets]), np.float32([tags]))[0] # [num_person, 17, 5] # 进行group操作得到每个检测个体 scores = [i[:, 2].mean() for i in grouped ] # [num_person] 用heatmap计算分数the score for every instance for i in range(len(grouped)): # 对于检测出来的每一个人 grouped[i] = refine(dets, tags, grouped[i]) # 尝试找到那些没有检测到的点 #grouped [N, 17, 5] if len(grouped) > 0: grouped[:, :, :2] = kpt_affine(grouped[:, :, :2] * 4, mat) # 把检测结果投到原图上面去 4是原图的下采样 512--》 128 return grouped, scores