class PoseDetector(object): def __init__(self, model_path: str): self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.net = PoseEstimationWithMobileNet() self.net.to(self.device) checkpoint = torch.load(model_path) load_state(self.net, checkpoint) self.net.eval() self.image = None self.avg_heatmap = None self.avg_paf = None self.track = True self.stride = 8 self.upsample_ratio = 4 self.height_size = 256 self.smooth = 1 self.num_keypoints = Pose.num_kpts def __inference(self, image, multiscale=False): img = image.copy() base_height = 368 scales = [1] if multiscale: scales = [0.5, 1.0, 1.5, 2.0] stride = 8 normed_img = self.__normalize(img) height, width, _ = normed_img.shape scales_ratios = [ scale * base_height / float(height) for scale in scales ] avg_heatmap = np.zeros((height, width, 19), dtype=np.float32) avg_paf = np.zeros((height, width, 38), dtype=np.float32) for ratio in scales_ratios: scaled_img = cv2.resize(normed_img, (0, 0), fx=ratio, fy=ratio, interpolation=cv2.INTER_CUBIC) min_dims = [base_height, max(scaled_img.shape[1], base_height)] padded_img, pad = self.__pad_width(scaled_img, stride, min_dims) tensor_img = torch.from_numpy(padded_img).permute( 2, 0, 1).unsqueeze(0).float().to(self.device) stages_output = self.net(tensor_img) stage2_heatmap = stages_output[-2] heatmap = np.transpose(stage2_heatmap.squeeze().cpu().data.numpy(), (1, 2, 0)) heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) heatmap = heatmap[pad[0]:heatmap.shape[0] - pad[2], pad[1]:heatmap.shape[1] - pad[3]:, :] heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) self.avg_heatmap = avg_heatmap + heatmap / len(scales_ratios) stage2_paf = stages_output[-1] paf = np.transpose(stage2_paf.squeeze().cpu().data.numpy(), (1, 2, 0)) paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) paf = paf[pad[0]:paf.shape[0] - pad[2], pad[1]:paf.shape[1] - pad[3], :] paf = cv2.resize(paf, (width, height), interpolation=cv2.INTER_CUBIC) self.avg_paf = avg_paf + paf / len(scales_ratios) def __inference_fast(self, img, net_input_height_size, stride, upsample_ratio): height, width, _ = img.shape self.scale = net_input_height_size / height scaled_img = cv2.resize(img, (0, 0), fx=self.scale, fy=self.scale, interpolation=cv2.INTER_LINEAR) scaled_img = self.__normalize(scaled_img) min_dims = [ net_input_height_size, max(scaled_img.shape[1], net_input_height_size) ] padded_img, self.pad = self.__pad_width(scaled_img, stride, min_dims) tensor_img = torch.from_numpy(padded_img).permute( 2, 0, 1).unsqueeze(0).float() tensor_img = tensor_img.to(self.device) stages_output = self.net(tensor_img) stage2_heatmaps = stages_output[-2] heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) self.avg_heatmap = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) stage2_pafs = stages_output[-1] pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) self.avg_paf = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(self.num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints( self.avg_heatmap[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) self.pose_entries, self.all_keypoints = group_keypoints( all_keypoints_by_type, self.avg_paf) for kpt_id in range(self.all_keypoints.shape[0]): self.all_keypoints[kpt_id, 0] = \ int((self.all_keypoints[kpt_id, 0] * self.stride / self.upsample_ratio - self.pad[1]) / self.scale) self.all_keypoints[kpt_id, 1] = \ int((self.all_keypoints[kpt_id, 1] * self.stride / self.upsample_ratio - self.pad[0]) / self.scale) def visualize_prediction(self, image): orig_img = image.copy() if not np.array_equal(self.image, image): self.image = image self.__inference_fast(self.image, self.height_size, self.stride, self.upsample_ratio) current_poses = [] for n in range(len(self.pose_entries)): if len(self.pose_entries[n]) == 0: continue pose_keypoints = np.ones( (self.num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(self.num_keypoints): if self.pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( self.all_keypoints[int(self.pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( self.all_keypoints[int(self.pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, self.pose_entries[n][18]) current_poses.append(pose) # if self.track: # previous_poses = [] # track_poses(previous_poses, current_poses, smooth=smooth) # previous_poses = current_poses for pose in current_poses: pose.draw(image) image = cv2.addWeighted(orig_img, 0.6, image, 0.4, 0) # plt.imshow(image) # plt.show() return image def __get_auto_grading_outputs(self, image): if not np.array_equal(self.image, image): self.image = image self.__inference_fast(self.image, self.height_size, self.stride, self.upsample_ratio) return self.all_keypoints, self.pose_entries def __visualize_prediction_slow(self, image): if not np.array_equal(self.image, image): self.image = image self.__inference(self.image) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(18): # 19th for bg total_keypoints_num += extract_keypoints( self.avg_heatmap[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, self.avg_paf) coco_keypoints, scores = convert_to_coco_format( pose_entries, all_keypoints) for keypoints in coco_keypoints: for idx in range(len(keypoints) // 3): cv2.circle( image, (int(keypoints[idx * 3]), int(keypoints[idx * 3 + 1])), 3, (255, 0, 255), -1) plt.imshow(image) plt.show() def __get_auto_grading_outputs_slow(self, image): if not np.array_equal(self.image, image): self.image = image self.__inference(self.image) all_peaks = [] peak_counter = 0 thre1 = 0.1 thre2 = 0.05 for part in range(18): map_ori = self.avg_heatmap[:, :, part] one_heatmap = gaussian_filter(map_ori, sigma=3) map_left = np.zeros(one_heatmap.shape) map_left[1:, :] = one_heatmap[:-1, :] map_right = np.zeros(one_heatmap.shape) map_right[:-1, :] = one_heatmap[1:, :] map_up = np.zeros(one_heatmap.shape) map_up[:, 1:] = one_heatmap[:, :-1] map_down = np.zeros(one_heatmap.shape) map_down[:, :-1] = one_heatmap[:, 1:] peaks_binary = np.logical_and.reduce( (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1)) peaks = list( zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks] peak_id = range(peak_counter, peak_counter + len(peaks)) peaks_with_score_and_id = [ peaks_with_score[i] + (peak_id[i], ) for i in range(len(peak_id)) ] all_peaks.append(peaks_with_score_and_id) peak_counter += len(peaks) # find connection in the specified sequence, center 29 is in the position 15 limbSeq = [[2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], [1, 16], [16, 18], [3, 17], [6, 18]] # the middle joints heatmap correspondence mapIdx = [[31, 32], [39, 40], [33, 34], [35, 36], [41, 42], [43, 44], [19, 20], [21, 22], [23, 24], [25, 26], [27, 28], [29, 30], [47, 48], [49, 50], [53, 54], [51, 52], [55, 56], [37, 38], [45, 46]] connection_all = [] special_k = [] mid_num = 10 for k in range(len(mapIdx)): score_mid = self.avg_paf[:, :, [x - 19 for x in mapIdx[k]]] candA = all_peaks[limbSeq[k][0] - 1] candB = all_peaks[limbSeq[k][1] - 1] nA = len(candA) nB = len(candB) if nA != 0 and nB != 0: connection_candidate = [] for i in range(nA): for j in range(nB): vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) vec = np.divide(vec, norm) startend = list( zip( np.linspace(candA[i][0], candB[j][0], num=mid_num), np.linspace(candA[i][1], candB[j][1], num=mid_num))) vec_x = np.array([ score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] for I in range(len(startend)) ]) vec_y = np.array([ score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] for I in range(len(startend)) ]) score_midpts = np.multiply( vec_x, vec[0]) + np.multiply(vec_y, vec[1]) score_with_dist_prior = sum(score_midpts) / len( score_midpts) + min( 0.5 * image.shape[0] / norm - 1, 0) criterion1 = len(np.nonzero( score_midpts > thre2)[0]) > 0.8 * len(score_midpts) criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([ i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2] ]) connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) connection = np.zeros((0, 5)) for c in range(len(connection_candidate)): i, j, s = connection_candidate[c][0:3] if i not in connection[:, 3] and j not in connection[:, 4]: connection = np.vstack( [connection, [candA[i][3], candB[j][3], s, i, j]]) if len(connection) >= min(nA, nB): break connection_all.append(connection) else: special_k.append(k) connection_all.append([]) # last number in each row is the total parts number of that person # the second last number in each row is the score of the overall configuration subset = -1 * np.ones((0, 20)) candidate = np.array( [item for sublist in all_peaks for item in sublist]) for k in range(len(mapIdx)): if k not in special_k: partAs = connection_all[k][:, 0] partBs = connection_all[k][:, 1] indexA, indexB = np.array(limbSeq[k]) - 1 for i in range(len(connection_all[k])): # = 1:size(temp,1) found = 0 subset_idx = [-1, -1] for j in range(len(subset)): # 1:size(subset,1): if subset[j][indexA] == partAs[i] or subset[j][ indexB] == partBs[i]: subset_idx[found] = j found += 1 if found == 1: j = subset_idx[0] if subset[j][indexB] != partBs[i]: subset[j][indexB] = partBs[i] subset[j][-1] += 1 subset[j][-2] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = subset_idx membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: # merge subset[j1][:-2] += (subset[j2][:-2] + 1) subset[j1][-2:] += subset[j2][-2:] subset[j1][-2] += connection_all[k][i][2] subset = np.delete(subset, j2, 0) else: # as like found == 1 subset[j1][indexB] = partBs[i] subset[j1][-1] += 1 subset[j1][-2] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the subset, create a new subset elif not found and k < 17: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = sum( candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] subset = np.vstack([subset, row]) # delete some rows of subset which has few parts occur deleteIdx = [] for i in range(len(subset)): if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts # candidate: x, y, score, id return candidate, subset @staticmethod def __normalize(img, img_mean=(128, 128, 128), img_scale=1 / 256): img = np.array(img, dtype=np.float32) img = (img - img_mean) * img_scale return img @staticmethod def __pad_width(img, stride, min_dims, pad_value=(0, 0, 0)): h, w, _ = img.shape h = min(min_dims[0], h) min_dims[0] = math.ceil(min_dims[0] / float(stride)) * stride min_dims[1] = max(min_dims[1], w) min_dims[1] = math.ceil(min_dims[1] / float(stride)) * stride pad = [] pad.append(int(math.floor((min_dims[0] - h) / 2.0))) pad.append(int(math.floor((min_dims[1] - w) / 2.0))) pad.append(int(min_dims[0] - h - pad[0])) pad.append(int(min_dims[1] - w - pad[1])) padded_img = cv2.copyMakeBorder(img, pad[0], pad[2], pad[1], pad[3], cv2.BORDER_CONSTANT, value=pad_value) return padded_img, pad def __call__(self, image): return self.__get_auto_grading_outputs(image)
if __name__ == '__main__': # parser = argparse.ArgumentParser( # description='''Lightweight human pose estimation python demo. # This is just for quick results preview. # Please, consider c++ demo for the best performance.''') # parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') # parser.add_argument('--video', type=str, default='', help='path to video file or camera id') # args = parser.parse_args() # if args.video == '': # raise ValueError('--video has to be provided') # net = PoseEstimationWithMobileNet(num_heatmaps=26, num_pafs=52) # checkpoint = torch.load(args.checkpoint_path, map_location='cpu') # load_state(net, checkpoint) # frame_provider = VideoReader(args.video) # run_demo(net, frame_provider, 256, False, True, True) net = PoseEstimationWithMobileNet(num_heatmaps=26, num_pafs=52, num_refinement_stages=1) checkpoint = torch.load('body25_checkpoints/checkpoint_iter_465000.pth', map_location='cpu') load_state(net, checkpoint) frame_provider = VideoReader('D:/projects/MotioNet/video/beyonce.mp4') run_demo(net, frame_provider, 256, False, True, True)
from modules.load_state import load_state def convert_to_onnx(net, output_name): input = torch.zeros(1, 3, 256, 448) input_names = ['data'] output_names = ['features', 'heatmaps', 'pafs'] model_trt = torch2trt(net, [input], fp16_mode=True) torch.save(model_trt.state_dict(), output_name) # torch.onnx.export(net, input, output_name, verbose=True, input_names=input_names, output_names=output_names) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--checkpoint-path', type=str, default='models/human-pose-estimation-3d.pth', help='path to the checkpoint') parser.add_argument('--output-name', type=str, default='human-pose-estimation-3d-trt.pth', help='name of output model in ONNX format') args = parser.parse_args() net = PoseEstimationWithMobileNet(is_convertible_by_mo=True) checkpoint = torch.load(args.checkpoint_path) load_state(net, checkpoint) convert_to_onnx(net, args.output_name) print('=====================done=====================')
# Optimizes a model # Relies on the platform => Must be compiled for every device # The first command lin parameter must be the path to the old model import sys import torch # import torch2trt from models.with_mobilenet import PoseEstimationWithMobileNet from torch2trt.torch2trt import * MODEL_NAME = sys.argv[1] NEW_MODEL_NAME = MODEL_NAME.replace(".pth", "_opt.pth") WIDTH = 224 HEIGHT = 224 net = PoseEstimationWithMobileNet() net.load_state_dict(torch.load(sys.argv[1]), strict=False) net.eval().cuda() data = torch.ones((1, 3, HEIGHT, WIDTH)).cuda() model_trt = torch2trt(net, [data], fp16_mode=True, max_workspace_size=1 << 25) torch.save(model_trt.state_dict(), NEW_MODEL_NAME)
default='', help='path to video file or camera id') parser.add_argument('--images', nargs='+', default='', help='path to input image(s)') parser.add_argument('--cpu', action='store_true', help='run network inference on cpu') parser.add_argument('--track-ids', default=True, help='track poses ids') args = parser.parse_args() if args.video == '' and args.images == '': raise ValueError('Either --video or --image has to be provided') net = PoseEstimationWithMobileNet() checkpoint = torch.load(args.checkpoint_path, map_location='cpu') load_state(net, checkpoint) frame_provider = ImageReader(args.images) if args.video != '': frame_provider = VideoReader(args.video) # torch.cuda.synchronize() t = time.time() net = net.eval() if not args.cpu: net = net.cuda() ta = time.time() run_demo(net, frame_provider, args.height_size, args.cpu, args.track_ids) #, frame_provider.file_names) tb = time.time()
def callback(self, data): try: cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8") except CvBridgeError as e: print(e) ## Rescale Image size rescale_factor = 1 width = int(cv_image.shape[1] * rescale_factor) height = int(cv_image.shape[0] * rescale_factor) dim = (width, height) resized_img = cv2.resize(cv_image, dim) net = PoseEstimationWithMobileNet() checkpoint = torch.load( "/home/zheng/lightweight-human-pose-estimation.pytorch/checkpoint_iter_370000.pth", map_location='cpu') load_state(net, checkpoint) height_size = 256 net = net.eval() net = net.cuda() net.eval() stride = 8 upsample_ratio = 4 num_keypoints = Pose.num_kpts previous_poses = [] delay = 33 # img = cv2.imread("/home/zheng/lightweight-human-pose-estimation.pytorch/data/image_1400.jpg") img = asarray(cv_image) orig_img = img heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu="store_true") total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(num_keypoints): # 19th for bg total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale current_poses = [] for n in range(len(pose_entries)): if len(pose_entries[n]) == 0: continue pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 for kpt_id in range(num_keypoints): if pose_entries[n][kpt_id] != -1.0: # keypoint was found pose_keypoints[kpt_id, 0] = int( all_keypoints[int(pose_entries[n][kpt_id]), 0]) pose_keypoints[kpt_id, 1] = int( all_keypoints[int(pose_entries[n][kpt_id]), 1]) pose = Pose(pose_keypoints, pose_entries[n][18]) current_poses.append(pose) for pose in current_poses: pose.draw(img) img = cv2.addWeighted(orig_img, 0.6, img, 0.4, 0) for pose in current_poses: cv2.rectangle( img, (pose.bbox[0], pose.bbox[1]), (pose.bbox[0] + pose.bbox[2], pose.bbox[1] + pose.bbox[3]), (0, 255, 0)) # cv2.imshow('Lightweight Human Pose Estimation Python Demo', img) self.image_pub.publish(self.bridge.cv2_to_imgmsg(img, "bgr8")) # cv2.imwrite('/home/zheng/Bureau/image_1400_key.jpg',img) cv2.waitKey(2)
# if leg is missing, use pelvis to get cropping center = (0.5 * (pose_keypoints[8] + pose_keypoints[11])).astype(np.int) radius = int(1.45*np.sqrt(((center[None,:] - valid_keypoints)**2).sum(1)).max(0)) center[1] += int(0.05*radius) else: center = np.array([img.shape[1]//2,img.shape[0]//2]) radius = max(img.shape[1]//2,img.shape[0]//2) x1 = center[0] - radius y1 = center[1] - radius rects.append([x1, y1, 2*radius, 2*radius]) np.savetxt(rect_path, np.array(rects), fmt='%d') net = PoseEstimationWithMobileNet() checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu') load_state(net, checkpoint) get_rect(net.cuda(), [image_path], 512) """## Download the Pretrained Model""" cd /content/pifuhd/ !sh ./scripts/download_trained_model.sh """## Run PIFuHD! """
def convert_to_skelets(in_, out_, cpu=False, height_size=256): # height_size - network input layer height size # cpu - True if we would like to run in CPU print('start convert to skelets') # mask that shows - this is bed mask = cv2.imread(os.path.join('mask', 'mask.jpg'), 0) mask = cv2.normalize(mask, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) net = PoseEstimationWithMobileNet() load_state(net, checkpoint) net = net.eval() if not cpu: net = net.cuda() stride = 8 upsample_ratio = 4 max_number = 963 num_img = 0 stream = cv2.VideoCapture("rtsp://*****:*****@62.140.233.76:554") # for num in range(0, max_number + 1): while (True): # frame = 'frame' + str(num) + '.jpg' # img = cv2.imread(os.path.join(in_, frame), cv2.IMREAD_COLOR) r, img = stream.read() # cv2.destroyAllWindows() # find the place of the bed - and add border to it, so we can cut the unnecessary part # apply object detection and find bed # output is an image with black pixels of not bed, and white pixels of bed heatmaps, pafs, scale, pad = infer_fast(net, img, height_size, stride, upsample_ratio, cpu) total_keypoints_num = 0 all_keypoints_by_type = [] for kpt_idx in range(18): total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs) for kpt_id in range(all_keypoints.shape[0]): all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale # how many persons in image num_persons = len(pose_entries) # num_img more than time_period - we delete first second and add the last second bones_detected = np.zeros(len(bones_to_detect)) bones_xa = np.zeros(len(bones_to_detect)) bones_ya = np.zeros(len(bones_to_detect)) bones_xb = np.zeros(len(bones_to_detect)) bones_yb = np.zeros(len(bones_to_detect)) bones_in_bed = np.zeros(len(bones_to_detect)) for n in range(num_persons): count_person_not_in_bed = 1 for id_x in range(len(bones_to_detect)): bones_detected[id_x] = 0 bones_xa[id_x] = 0 bones_ya[id_x] = 0 bones_xb[id_x] = 0 bones_yb[id_x] = 0 bones_in_bed[id_x] = 0 if len(pose_entries[n]) == 0: continue for id_, part_id in enumerate(bones_to_detect): kpt_a_id = BODY_PARTS_KPT_IDS[part_id][0] global_kpt_a_id = pose_entries[n][kpt_a_id] kpt_b_id = BODY_PARTS_KPT_IDS[part_id][1] global_kpt_b_id = pose_entries[n][kpt_b_id] # if both points are detected if global_kpt_a_id != -1 and global_kpt_b_id != -1: bones_xa[id_], bones_ya[id_] = all_keypoints[ int(global_kpt_a_id), 0:2] bones_xb[id_], bones_yb[id_] = all_keypoints[ int(global_kpt_b_id), 0:2] if mask[int(bones_ya[id_])][int( bones_xa[id_])] == 1 and mask[int( bones_yb[id_])][int(bones_xb[id_])] == 1: bones_in_bed[id_] = 1 bones_detected[id_] = 1 sum_bones = 0 for id_, val in enumerate(bones_in_bed): sum_bones += val if sum_bones == len(bones_in_bed): # anomaly # we take mean vector of 2 vectors of bones 6 and 9 bone_xa = (bones_xa[0] + bones_xa[2]) / 2 bone_ya = (bones_ya[0] + bones_ya[2]) / 2 bone_xb = (bones_xb[0] + bones_xb[2]) / 2 bone_yb = (bones_yb[0] + bones_yb[2]) / 2 x1 = bone_xb - bone_xa y1 = bone_yb - bone_ya x2 = 100 y2 = 0 global anomaly_checker alfa = math.acos( (x1 * x2 + y1 * y2) / (math.sqrt(x1**2 + y1**2) * math.sqrt(x2**2 + y2**2))) # if alfa is close to 90 degree - anomaly if min(abs(alfa - rad_90), abs(alfa - rad_270)) <= threshold: print('num_persons', num_persons) if num_persons == 1: anomaly_checker = np.delete(anomaly_checker, 0) anomaly_checker = np.append(anomaly_checker, 1) cv2.imwrite(os.path.join('out_out', frame), img) if np.sum(anomaly_checker) >= SEC_WITHOUT_HELP: print('ALARM!') num_img += 1 if not os.path.exists(out_): os.mkdir(out_) if cv2.waitKey(1) & 0xFF == ord('q'): break print('done convert to skelets')
def __init__(self, model_path): self.model = PoseEstimationWithMobileNet() checkpoint = torch.load(model_path, map_location='cpu') load_state(self.model, checkpoint) self.model = self.model.eval() self.model = self.model.cuda()
def gen(): """Video streaming generator function.""" HOST = '' PORT = 8088 emptyPoses = [] s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) print('Socket created') s.bind((HOST, PORT)) print('Socket bind complete') s.listen(10) print('Socket now listening') net = PoseEstimationWithMobileNet() checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu') load_state(net, checkpoint) conn, addr = s.accept() print('ACCENPTED') data = b'' ### CHANGED payload_size = struct.calcsize("=L") ### CHANGED stepA = False stepB = False global count count = 0 sitAngle = 0 stdupAngle = 0 font = cv2.FONT_HERSHEY_SIMPLEX bottomLeftCornerOfText = (50, 400) topLeft = (150, 400) fontScale = 3 fontColor = (255, 0, 0) lineType = 2 emptyPoses = [] while True: while len(data) < payload_size: data += conn.recv(4096) print('MESSAGESIZE') print(payload_size) packed_msg_size = data[:payload_size] data = data[payload_size:] msg_size = struct.unpack("=L", packed_msg_size)[0] ### CHANGED print('unpack') # Retrieve all data based on message size while len(data) < msg_size: data += conn.recv(4096) print(len(data)) print(msg_size) print('RECIEVED') frame_data = data[:msg_size] data = data[msg_size:] # Extract frame frame = pickle.loads(frame_data) #read_return_code, frame = vc.read() pose = run_demo(net, frame, 256, 0, 0, 1) if pose is not None: pose.draw(frame) #cv2.imshow('test', frame) if cv2.waitKey(1) == ord('q'): break A = np.array([pose.keypoints[8][0], pose.keypoints[8][1]]) B = np.array([pose.keypoints[9][0], pose.keypoints[9][1]]) C = np.array([pose.keypoints[10][0], pose.keypoints[10][1]]) BA = A - B BC = C - B cosine_angle = np.dot( BA, BC) / (np.linalg.norm(BA) * np.linalg.norm(BC)) angle = np.arccos(cosine_angle) angle = np.degrees(angle) #print(angle) if angle > 140: stepA = True if stdupAngle is 0: stdupAngle = angle if angle > stdupAngle: stdupAngle = angle if angle < 70: stepB = True if sitAngle is 0: sitAngle = angle if angle < sitAngle: sitAngle = angle if stepA and stepB is True: if angle > 140: stepA = False stepB = False count += 1 if sitAngle > 60: cv2.putText(frame, "Bend your knee more", topLeft, font, fontScale, fontColor, lineType) stdupDiff = 140 - stdupAngle sitDiff = 70 - sitAngle stdupDiff = abs(stdupDiff) sitDiff = abs(sitDiff) correctness = 280 - (stdupDiff + sitDiff) cv2.putText(frame, "correctness" + str(correctness), bottomLeftCornerOfText, font, fontScale, fontColor, lineType) sitAngle = 0 stdupAngle = 0 if stepA is True and stepB is False: if angle > 140: stepA = False stepB = False sitAngle = 0 stdupAngle = 0 cv2.putText(frame, "count" + str(count), bottomLeftCornerOfText, font, fontScale, fontColor, lineType) #cv2.imshow("img", frame) print(count) encode_return_code, image_buffer = cv2.imencode('.jpg', frame) io_buf = io.BytesIO(image_buffer) yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + io_buf.read() + b'\r\n')
if len(coco_result) > 0: with open(output_name, 'w') as f: json.dump(coco_result, f, indent=4) run_coco_eval(labels, output_name) if __name__ == '__main__': # parser = argparse.ArgumentParser() # parser.add_argument('--labels', type=str, required=True, help='path to json with keypoints val labels') # parser.add_argument('--output-name', type=str, default='detections.json', # help='name of output json file with detected keypoints') # parser.add_argument('--images-folder', type=str, required=True, help='path to COCO val images folder') # parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') # parser.add_argument('--multiscale', action='store_true', # help='average inference results over multiple scales') # parser.add_argument('--visualize', action='store_true', help='show keypoints') # args = parser.parse_args() # net = PoseEstimationWithMobileNet(num_heatmaps=26, num_pafs=52) # checkpoint = torch.load(args.checkpoint_path) # load_state(net, checkpoint) # evaluate(args.labels, args.output_name, args.images_folder, net, args.multiscale, args.visualize) net = PoseEstimationWithMobileNet(num_heatmaps=26, num_pafs=52) checkpoint = torch.load('body25_checkpoints/checkpoint_iter_5000.pth') load_state(net, checkpoint) evaluate('data/val_subset_1.json', 'data/detections.json', 'coco/val2017/', net, True, False)
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() # print("show imgs" # , batch_data['keypoint_maps'].shape, batch_data['paf_maps'].shape # , batch_data['keypoint_mask'].shape, batch_data['paf_mask'].shape # , batch_data['mask'].shape, batch_data['image'].shape # ) # print("seg", batch_data['label']['segmentations']) print("batched images size", batch_data['image'].shape) vis.images(batch_data['image'][:, [2, 1, 0], ...] + 0.5, 4, 2, "1", opts=dict(title="img")) vis.images(batch_data['keypoint_mask'].permute(1, 0, 2, 3), 4, 2, "2", opts=dict(title="kp_mask")) vis.images(batch_data['paf_mask'].permute(1, 0, 2, 3), 4, 2, "3", opts=dict(title="paf_mask")) vis.images(batch_data['keypoint_maps'].permute(1, 0, 2, 3), 4, 2, "4", opts=dict(title="keypoint_maps")) vis.images(batch_data['paf_maps'].permute(1, 0, 2, 3), 4, 2, "5", opts=dict(title="paf_maps")) vis.images(batch_data['mask'].unsqueeze(0), 4, 2, "6", opts=dict(title="MASK")) images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() pafs = batch_data['paf_maps'][0].permute(1, 2, 0).numpy() scale = 4 img_p = np.zeros((pafs.shape[1] * 8, pafs.shape[0] * 8, 3), dtype=np.uint8) # pafs[pafs < 0.07] = 0 for idx in range(len(BODY_PARTS_PAF_IDS)): # print(pp, pafs.shape) pp = BODY_PARTS_PAF_IDS[idx] k_idx = BODY_PARTS_KPT_IDS[idx] cc = BODY_CONN_COLOR[idx] vx = pafs[:, :, pp[0]] vy = pafs[:, :, pp[1]] for i in range(pafs.shape[1]): for j in range(pafs.shape[0]): a = (i * 2 * scale, j * 2 * scale) b = (2 * int((i + vx[j, i] * 3) * scale), 2 * int( (j + vy[j, i] * 3) * scale)) if a[0] == b[0] and a[1] == b[1]: continue cv2.line(img_p, a, b, cc, 1) # break cv2.imshow("paf", img_p) key = cv2.waitKey(0) if key == 27: # esc exit(0) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
import torch from models.with_mobilenet import PoseEstimationWithMobileNet #my particular net architecture from modules.load_state import load_state from torch2trt import torch2trt #import library import time #HERE IT IS HOW COMPILE AND SAVE A MODEL checkpoint_path = '/home/nvidia/Documents/poseFINAL/checkpoints/body.pth' #your trained weights path net = PoseEstimationWithMobileNet() #my particular net istance checkpoint = torch.load(checkpoint_path, map_location='cuda') load_state(net, checkpoint) #load your trained weights path net.cuda().eval() data = torch.rand(( 1, 3, 256, 344)).cuda() #initialize a random tensor with the shape of your input data #model_trt = torch2trt(net, [data]) #IT CREATES THE COMPILED VERSION OF YOUR MODEL, IT TAKES A WHILE #torch.save(model_trt.state_dict(), 'net_trt.pth') #TO SAVE THE WEIGHTS OF THE COMPILED MODEL WICH ARE DIFFERENT FROM THE PREVIOUS ONES #HERE IT IS HOW TO UPLOAD THE MODEL ONCE YOU HAVE COMPILED IT LIKE IN MY CASE THAT I HAVE ALREADY COMPILED IT from torch2trt import TRTModule #import a class model_trt = TRTModule() #the compiled model istance model_trt.load_state_dict(torch.load( 'net_trt.pth')) #load the compiled weights in the compiled model
if not os.path.isdir("log_data"): os.mkdir("log_data") if args.record: if os.path.isfile("log_data/out_no_vis.avi") or os.path.isfile( "log_data/out_with_vis.avi"): print("video exist, quitting") sys.exit() fourcc = cv2.VideoWriter_fourcc(*'X264') out_raw = cv2.VideoWriter("log_data/out_no_vis.avi", fourcc, 5.0, (640, 480)) out_pose = cv2.VideoWriter("log_data/out_with_vis.avi", fourcc, 5.0, (640, 480)) if args.camera == "webcam": from threaded_cam import ThreadedCamera cap = ThreadedCamera() elif args.camera == "jetson": from threaded_cam import jetson_csi_camera camSet = "nvarguscamerasrc sensor-id=0 ! video/x-raw(memory:NVMM), width=3280, height=2464, framerate=21/1, format=NV12 ! nvvidconv flip-method=0 ! video/x-raw, format=BGRx ! videoconvert ! video/x-raw, format=BGR ! appsink" cap = jetson_csi_camera(camSet) net = PoseEstimationWithMobileNet() checkpoint = torch.load(args.checkpoint_path, map_location='cpu') load_state(net, checkpoint) run_demo(net, args.height_size, args.track, args.smooth, args.record, args.camera)
x1 = center[0] - radius y1 = center[1] - radius rects.append([x1, y1, 2*radius, 2*radius]) np.savetxt(rect_path, np.array(rects), fmt='%d') if __name__ == '__main__': parser = argparse.ArgumentParser( description='''Lightweight human pose estimation python demo. This is just for quick results preview. Please, consider c++ demo for the best performance.''') parser.add_argument('--checkpoint-path', type=str, required=True, help='path to the checkpoint') parser.add_argument('--images', type=str, default='', help='path to input image(s)') args = parser.parse_args() files = glob.glob(args.images) files2 = [] for f in files: if f.split('.')[-1] in ['png', 'jpeg', 'jpg', 'PNG', 'JPG', 'JPEG']: files2.append(f) net = PoseEstimationWithMobileNet() checkpoint = torch.load('checkpoint_iter_370000.pth', map_location='cpu') load_state(net, checkpoint) get_rect(net.cuda(), files2, 512) #for img in args.images # print(img)
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip()])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ {'params': get_parameters_conv(net.model, 'weight')}, {'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr}, {'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0}, {'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr}, {'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, {'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4}, {'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0}, {'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0}, {'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0}, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] net = DataParallel(net).cuda() net.train() for epochId in range(current_epoch, 280): scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append(l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append(l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join(['stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}']).format( loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format(checkpoints_folder, num_iter) torch.save({'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId}, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()
def train(prepared_train_labels, train_images_folder, num_refinement_stages, base_lr, batch_size, batches_per_iter, num_workers, checkpoint_path, weights_only, from_mobilenet, checkpoints_folder, log_after, val_labels, val_images_folder, val_output_name, checkpoint_after, val_after): net = PoseEstimationWithMobileNet(num_refinement_stages) stride = 8 sigma = 7 path_thickness = 1 dataset = CocoTrainDataset(prepared_train_labels, train_images_folder, stride, sigma, path_thickness, transform=transforms.Compose([ ConvertKeypoints(), Scale(), Rotate(pad=(128, 128, 128)), CropPad(pad=(128, 128, 128)), Flip() ])) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) optimizer = optim.Adam([ { 'params': get_parameters_conv(net.model, 'weight') }, { 'params': get_parameters_conv_depthwise(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.model, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.cpm, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.cpm, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv_depthwise(net.cpm, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_conv(net.initial_stage, 'weight'), 'lr': base_lr }, { 'params': get_parameters_conv(net.initial_stage, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, { 'params': get_parameters_conv(net.refinement_stages, 'weight'), 'lr': base_lr * 4 }, { 'params': get_parameters_conv(net.refinement_stages, 'bias'), 'lr': base_lr * 8, 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'weight'), 'weight_decay': 0 }, { 'params': get_parameters_bn(net.refinement_stages, 'bias'), 'lr': base_lr * 2, 'weight_decay': 0 }, ], lr=base_lr, weight_decay=5e-4) num_iter = 0 current_epoch = 0 drop_after_epoch = [100, 200, 260] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.333) if checkpoint_path: checkpoint = torch.load(checkpoint_path) if from_mobilenet: load_from_mobilenet(net, checkpoint) else: load_state(net, checkpoint) if not weights_only: optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) num_iter = checkpoint['iter'] current_epoch = checkpoint['current_epoch'] print("optimizer LR") for param_group in optimizer.param_groups: print(param_group['lr']) for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() net = DataParallel(net).cuda() net.train() from DGPT.Visualize.Viz import Viz viz = Viz(dict(env="refine")) for epochId in range(current_epoch, 280): # scheduler.step() total_losses = [0, 0] * (num_refinement_stages + 1 ) # heatmaps loss, paf loss per stage batch_per_iter_idx = 0 for batch_data in train_loader: if batch_per_iter_idx == 0: optimizer.zero_grad() images = batch_data['image'].cuda() keypoint_masks = batch_data['keypoint_mask'].cuda() paf_masks = batch_data['paf_mask'].cuda() keypoint_maps = batch_data['keypoint_maps'].cuda() paf_maps = batch_data['paf_maps'].cuda() images = preprocess(images) stages_output = net(images) losses = [] for loss_idx in range(len(total_losses) // 2): losses.append( l2_loss(stages_output[loss_idx * 2], keypoint_maps, keypoint_masks, images.shape[0])) losses.append( l2_loss(stages_output[loss_idx * 2 + 1], paf_maps, paf_masks, images.shape[0])) total_losses[loss_idx * 2] += losses[-2].item() / batches_per_iter total_losses[loss_idx * 2 + 1] += losses[-1].item() / batches_per_iter loss = losses[0] for loss_idx in range(1, len(losses)): loss += losses[loss_idx] loss /= batches_per_iter loss.backward() viz.draw_line(num_iter, loss.item(), "Loss") batch_per_iter_idx += 1 if batch_per_iter_idx == batches_per_iter: optimizer.step() batch_per_iter_idx = 0 num_iter += 1 scheduler.step() else: continue if num_iter % log_after == 0: print('Iter: {}'.format(num_iter)) for loss_idx in range(len(total_losses) // 2): print('\n'.join([ 'stage{}_pafs_loss: {}', 'stage{}_heatmaps_loss: {}' ]).format(loss_idx + 1, total_losses[loss_idx * 2 + 1] / log_after, loss_idx + 1, total_losses[loss_idx * 2] / log_after)) for loss_idx in range(len(total_losses)): total_losses[loss_idx] = 0 xx = images[:1, ...].detach() #.clone() hh = keypoint_maps[:1, ...].detach() #.clone() mm = keypoint_masks[:1, ...].detach() #.clone() print(xx.shape, hh.shape, mm.shape) hh = hh.squeeze(0).reshape(19, 1, hh.shape[2], hh.shape[3]) mm = mm.squeeze(0).reshape(19, 1, hh.shape[2], hh.shape[3]) viz.draw_images(xx, "input1") viz.draw_images(hh, "input1_heatmap") viz.draw_images(mm, "input1_mask") oh = stages_output[-2].detach()[:1, :-1, ...] oh = oh.reshape(oh.shape[1], 1, oh.shape[2], oh.shape[3]) viz.draw_images(oh, "output1_heatmap") if num_iter % checkpoint_after == 0: snapshot_name = '{}/checkpoint_iter_{}.pth'.format( checkpoints_folder, num_iter) torch.save( { 'state_dict': net.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'iter': num_iter, 'current_epoch': epochId }, snapshot_name) if num_iter % val_after == 0: print('Validation...') evaluate(val_labels, val_output_name, val_images_folder, net) net.train()