def train(func_train_one_batch, param_dict, path, log_dir_path): dis_loss = [] pos_gen_loss = [] neg_gen_loss = [] device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') p = Logger(log_dir_path, **param_dict) # load data base if p.dataset is 'car196': data = Car196(root=path) else: print('DATASET is', p.dataset) data = CUB_200_2011(root=path) sampler = BalancedBatchSampler(data.train.label_to_indices, n_samples=p.n_samples, n_classes=p.n_classes) kwargs = {'num_workers': 6, 'pin_memory': True} train_loader = DataLoader(data.train, batch_sampler=sampler, **kwargs) # (5 * 98, 3, 224, 224) # train_iter = iter(train_loader) # batch = next(train_iter) # generate_random_triplets_from_batch(batch, p.n_samples, p.n_classes) test_loader = DataLoader(data.test, batch_size=p.batch_size) # construct the model model = ModifiedGoogLeNet(p.out_dim, p.normalize_hidden).to(device) model_pos_gen = Generator(p.out_dim, p.normalize_hidden).to(device) model_neg_gen = Generator(p.out_dim, p.normalize_output).to(device) model_dis = Discriminator(p.out_dim, p.out_dim).to(device) model_optimizer = optim.Adam(model.parameters(), lr=p.learning_rate) pos_gen_optimizer = optim.Adam(model_pos_gen.parameters(), lr=p.learning_rate) neg_gen_optimizer = optim.Adam(model_neg_gen.parameters(), lr=p.learning_rate) dis_optimizer = optim.Adam(model_dis.parameters(), lr=p.learning_rate) model_feat_optimizer = optim.Adam(model.parameters(), lr=p.learning_rate) time_origin = time.time() best_nmi_1 = 0. best_f1_1 = 0. best_nmi_2 = 0. best_f1_2 = 0. for epoch in range(p.num_epochs): time_begin = time.time() epoch_loss_neg_gen = 0 epoch_loss_pos_gen = 0 epoch_loss_dis = 0 total = 0 for batch in tqdm(train_loader, desc='# {}'.format(epoch)): triplet_batch = generate_random_triplets_from_batch( batch, n_samples=p.n_samples, n_class=p.n_classes) loss_pos_gen, loss_neg_gen, loss_dis = func_train_one_batch( device, model, model_pos_gen, model_neg_gen, model_dis, model_optimizer, model_feat_optimizer, pos_gen_optimizer, neg_gen_optimizer, dis_optimizer, p, triplet_batch, epoch) ''' loss_dis = func_train_one_batch(device, model, model_dis, model_pos_gen model_optimizer, dis_optimizer, p, triplet_batch) ''' epoch_loss_neg_gen += loss_neg_gen epoch_loss_pos_gen += loss_pos_gen epoch_loss_dis += loss_dis total += triplet_batch[0].size(0) loss_average_neg_gen = epoch_loss_neg_gen / total loss_average_pos_gen = epoch_loss_pos_gen / total loss_average_dis = epoch_loss_dis / total dis_loss.append(loss_average_dis) pos_gen_loss.append(loss_average_pos_gen) neg_gen_loss.append(loss_average_neg_gen) nmi, f1 = evaluate(device, model, model_dis, test_loader, epoch, n_classes=p.n_classes, distance=p.distance_type, normalize=p.normalize_output, neg_gen_epoch=p.neg_gen_epoch) if nmi > best_nmi_1: best_nmi_1 = nmi best_f1_1 = f1 torch.save(model, os.path.join(p.model_save_path, "model.pt")) torch.save(model_pos_gen, os.path.join(p.model_save_path, "model_pos_gen.pt")) torch.save(model_neg_gen, os.path.join(p.model_save_path, "model_neg_gen.pt")) torch.save(model_dis, os.path.join(p.model_save_path, "model_dis.pt")) if f1 > best_f1_2: best_nmi_2 = nmi best_f1_2 = f1 time_end = time.time() epoch_time = time_end - time_begin total_time = time_end - time_origin print("#", epoch) print("time: {} ({})".format(epoch_time, total_time)) print("[train] loss NEG gen:", loss_average_neg_gen) print("[train] loss POS gen:", loss_average_pos_gen) print("[train] loss dis:", loss_average_dis) print("[test] nmi:", nmi) print("[test] f1:", f1) print("[test] nmi:", best_nmi_1, " f1:", best_f1_1, "for max nmi") print("[test] nmi:", best_nmi_2, " f1:", best_f1_2, "for max f1") print(p) plt.plot(dis_loss) plt.ylabel("dis_loss") plt.savefig(log_dir_path + '/dis_loss.png') plt.close() plt.plot(pos_gen_loss) plt.ylabel("pos_gen_loss") plt.savefig(log_dir_path + '/pos_gen_loss.png') plt.close() plt.plot(neg_gen_loss) plt.ylabel("neg_gen_loss") plt.savefig(log_dir_path + '/neg_gen_loss.png') plt.close()
def main(args): #! TCP server MESSAGE = [None] t = Thread(target=tcpThread, args=(MESSAGE, event)) t.start() #! read image from camera from joints_detectors.Alphapose.video2d import DetectionLoader det_loader = DetectionLoader(size=args.viz_size,device=0) #! visualization from common.visualization import Sequencial_animation, RealtimePlot pose3d_animation = Sequencial_animation( skeleton=Skeleton(), i=8, size=args.viz_size, azim=np.array(70., dtype=np.float32)) fig_angle = plt.figure() ax1 = fig_angle.add_subplot(411) ax2 = fig_angle.add_subplot(412) ax3 = fig_angle.add_subplot(413) ax4 = fig_angle.add_subplot(414) # ani1_r = RealtimePlot(fig_angle, ax1, "squatting","r") # ani1_y = RealtimePlot(fig_angle, ax1, "squatting","y") # ani2_r = RealtimePlot(fig_angle, ax2, "turning","r") # ani2_y = RealtimePlot(fig_angle, ax2, "turning","y") # ani3_r = RealtimePlot(fig_angle, ax3, "moving","r") # ani3_y = RealtimePlot(fig_angle, ax3, "moving","y") # ani4_r = RealtimePlot(fig_angle, ax4, "walking","r") # ani4_y = RealtimePlot(fig_angle, ax4, "walking","y") ani1_r = RealtimePlot(fig_angle, ax1, "LElbowRoll_filtered","r") ani1_y = RealtimePlot(fig_angle, ax1, "LElbowRoll","y") ani2_r = RealtimePlot(fig_angle, ax2, "LElbowYaw_filtered","r") ani2_y = RealtimePlot(fig_angle, ax2, "LElbowYaw","y") ani3_r = RealtimePlot(fig_angle, ax3, "LShoulderRoll_filtered","r") ani3_y = RealtimePlot(fig_angle, ax3, "LShoulderRoll","y") ani4_r = RealtimePlot(fig_angle, ax4, "LShoulderPitch_filtered","r") ani4_y = RealtimePlot(fig_angle, ax4, "LShoulderPitch","y") thismanager = get_current_fig_manager() thismanager.window.wm_geometry("+1000+0") plt.ion() # continuously plot #! load 3d pose estimation model model_pos = TemporalModel(17, 2, 17, filter_widths=[3, 3, 3, 3, 3], causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) if torch.cuda.is_available(): model_pos = model_pos.cuda() ckpt, time1 = ckpt_time(time0) print('-------------- load 3d pose estimaion model spends {:.2f} seconds'.format(ckpt)) #! load trained weights chk_filename = os.path.join(args.checkpoint, args.resume if args.resume else args.evaluate) print('Loading checkpoint', chk_filename) checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage) model_pos.load_state_dict(checkpoint['model_pos']) # Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3] receptive_field = model_pos.receptive_field() pad = (receptive_field - 1) // 2 # Padding on each side causal_shift = 0 ckpt, time2 = ckpt_time(time1) print('-------------- load trained weights for 3D model spends {:.2f} seconds'.format(ckpt)) #! Initialize some kp keypoints_symmetry = metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list(keypoints_symmetry[1]) joints_left, joints_right = list([4, 5, 6, 11, 12, 13]), list([1, 2, 3, 14, 15, 16]) rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32) kp_deque = deque(maxlen=9) compute_walking = Compute_walking() #! loop through the frame (now fake frame) ckpt, time3 = ckpt_time(time2) #! Prepare deque for 8 joints q_LShoulderPitch, q_RShoulderPitch, q_LShoulderRoll, q_RShoulderRoll = [deque(maxlen=7) for i in range(4)] q_LElbowYaw, q_RElbowYaw, q_LElbowRoll, q_RElbowRoll = [deque(maxlen=7) for i in range(4)] try: while True and not event.is_set(): frame = -1 while frame < 65501: # prevent from overspilling ckpt, time3 = ckpt_time(time3) frame += 1 print("------------------- frame ",frame, '-------- generate reconstruction 3D data spends {:.2f} seconds'.format(ckpt)) #! get 2d key points kp = det_loader.update() if kp is None: continue kp_deque.append(kp.numpy()) if len(kp_deque)<9: continue #! estimate 3d pose # normlization keypoints Suppose using the camera parameter input_keypoints = normalize_screen_coordinates(np.asarray(kp_deque)[..., :2], w=1000, h=1002) prediction = evaluate(input_keypoints, pad, model_pos, return_predictions=True) # rotate the camera perspective prediction = camera_to_world(prediction, R=rot, t=0) # We don't have the trajectory, but at least we can rebase the height prediction[:, :, 2] -= np.min(prediction[:, :, 2]) # input_keypoints = image_coordinates(input_keypoints[..., :2], w=1000, h=1002) #! Visualize 3d prediction = prediction[8] pose3d_animation.call(prediction) #! Motion retargeting MHip, RHip, RKnee, RAnkle, LHip, LKnee, LAnkle, Waist, Neck, Face, Top, \ LShoulder, LElbow, LWrist, RShoulder, RElbow, RWrist = prediction """ Top:10, Face:9, Neck:8 Waist: 7 MHip: 0 LShoulder: 11, LElbow: 12, LWrist: 13, LHip: 4, LKnee: 5, LAnkle: 6 RShoulder: 14, RElbow: 15, RWrist: 16, RHip: 1, RKnee: 2, RAnkle: 3 """ left_upperarm = LElbow - LShoulder right_upperarm = RElbow - RShoulder left_lowerarm = LWrist - LElbow right_lowerarm = RWrist - RElbow left_upperleg = LHip - LKnee left_lowerleg = LAnkle - LKnee right_upperleg = RHip - RKnee right_lowerleg = RAnkle - RKnee #### Remapping #### coord = compute_torso_coord(Neck, LHip, RHip) LShoulderRoll, LShoulderPitch, left_upperarm_t = compute_shoulder_rotation(left_upperarm, coord) RShoulderRoll, RShoulderPitch, right_upperarm_t = compute_shoulder_rotation(right_upperarm, coord) LElbowYaw, LElbowRoll, left_upperarm_t, left_lowerarm_t = compute_elbow_rotation(left_upperarm, left_lowerarm, coord, -1) RElbowYaw, RElbowRoll, right_upperarm_t, right_lowerarm_t = compute_elbow_rotation(right_upperarm, right_lowerarm, coord, 1) Turning, turningangle = get_turning(coord[2]) walking, walkingangle = compute_walking(LAnkle[1] - RAnkle[1]) squatting, squattingangle_left, squattingangle_right = get_squatting( left_upperleg, left_lowerleg, right_upperleg, right_lowerleg) moving, movingangle = get_moving(Top[0]-(RAnkle[0]+LAnkle[0])/2) #### Filtering #### LShoulderPitch_n = filter_data(q_LShoulderPitch, LShoulderPitch, median_filter) LShoulderRoll_n = filter_data(q_LShoulderRoll, LShoulderRoll, median_filter) RShoulderPitch_n = filter_data(q_RShoulderPitch, RShoulderPitch, median_filter) RShoulderRoll_n = filter_data(q_RShoulderRoll, RShoulderRoll, median_filter) LElbowYaw_n = filter_data(q_LElbowYaw, LElbowYaw, median_filter) LElbowRoll_n = filter_data(q_LElbowRoll, LElbowRoll, median_filter) RElbowYaw_n = filter_data(q_RElbowYaw, RElbowYaw, median_filter) RElbowRoll_n = filter_data(q_RElbowRoll, RElbowRoll, median_filter) #! Plot angle # ani1_r.call(frame,math.degrees(LElbowRoll_n)) # ani1_y.call(frame,math.degrees(LElbowRoll)) # ani2_r.call(frame,math.degrees(LElbowYaw_n)) # ani2_y.call(frame,math.degrees(LElbowYaw)) # ani3_r.call(frame,math.degrees(LShoulderRoll_n)) # ani3_y.call(frame,math.degrees(LShoulderRoll)) # ani4_r.call(frame,math.degrees(LShoulderPitch_n)) # ani4_y.call(frame,math.degrees(LShoulderPitch)) # ani1_r.call(frame, LShoulderRoll_n) # ani2_r.call(frame, LShoulderPitch_n) # ani1_y.call(frame, LShoulderRoll) # ani2_y.call(frame, LShoulderPitch) # # ani1_y.call(frame,squattingangle) # ani1_r.call(frame, 180*squatting) # # ani2_y.call(frame,turningangle) # ani2_r.call(frame, 60*Turning) # # ani3_y.call(frame,3*180*movingangle) # ani3_r.call(frame, 60*moving) # # ani4_y.call(frame,walkingangle) # ani4_r.call(frame,180* walking) #! send udp message = np.array((frame, LShoulderRoll_n, LShoulderPitch_n, RShoulderRoll_n, RShoulderPitch_n, LElbowYaw_n, LElbowRoll_n, RElbowYaw_n, RElbowRoll_n, Turning, squatting, walking, moving)) MESSAGE[0] = message.astype(np.float16).tostring() # print(sys.getsizeof(MESSAGE)) # print(Turning, squatting, walking) # print(message) except KeyboardInterrupt: plt.ioff() cv2.destroyAllWindows() return
def main(args): detector_2d = get_detector_2d(args.detector_2d) assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)' # 2D kpts loads or generate if not args.input_npz: video_name = args.viz_video keypoints = detector_2d(video_name) else: npz = np.load(args.input_npz) keypoints = npz['kpts'] # (N, 17, 2) keypoints_symmetry = metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list( keypoints_symmetry[1]) joints_left, joints_right = list([4, 5, 6, 11, 12, 13]), list([1, 2, 3, 14, 15, 16]) # normlization keypoints Suppose using the camera parameter keypoints = normalize_screen_coordinates(keypoints[..., :2], w=1000, h=1002) model_pos = TemporalModel(17, 2, 17, filter_widths=[3, 3, 3, 3, 3], causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) if torch.cuda.is_available(): model_pos = model_pos.cuda() ckpt, time1 = ckpt_time(time0) print('-------------- load data spends {:.2f} seconds'.format(ckpt)) # load trained model chk_filename = os.path.join(args.checkpoint, args.resume if args.resume else args.evaluate) print('Loading checkpoint', chk_filename) checkpoint = torch.load( chk_filename, map_location=lambda storage, loc: storage) # 把loc映射到storage model_pos.load_state_dict(checkpoint['model_pos']) ckpt, time2 = ckpt_time(time1) print('-------------- load 3D model spends {:.2f} seconds'.format(ckpt)) # Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3] receptive_field = model_pos.receptive_field() pad = (receptive_field - 1) // 2 # Padding on each side causal_shift = 0 print('Rendering...') input_keypoints = keypoints.copy() gen = UnchunkedGenerator(None, None, [input_keypoints], pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation, kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) prediction = evaluate(gen, model_pos, return_predictions=True) # save 3D joint points np.save('outputs/test_3d_output.npy', prediction, allow_pickle=True) rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32) prediction = camera_to_world(prediction, R=rot, t=0) # We don't have the trajectory, but at least we can rebase the height prediction[:, :, 2] -= np.min(prediction[:, :, 2]) anim_output = {'Reconstruction': prediction} input_keypoints = image_coordinates(input_keypoints[..., :2], w=1000, h=1002) ckpt, time3 = ckpt_time(time2) print( '-------------- generate reconstruction 3D data spends {:.2f} seconds'. format(ckpt)) if not args.viz_output: args.viz_output = 'outputs/alpha_result.mp4' from common.visualization import render_animation render_animation(input_keypoints, anim_output, Skeleton(), 25, args.viz_bitrate, np.array(70., dtype=np.float32), args.viz_output, limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size, input_video_path=args.viz_video, viewport=(1000, 1002), input_video_skip=args.viz_skip) ckpt, time4 = ckpt_time(time3) print('total spend {:2f} second'.format(ckpt))
def main(args): # 第一步:检测2D关键点 detector_2d = get_detector_2d(args.detector_2d) assert detector_2d, 'detector_2d should be in ({alpha, hr, open}_pose)' # 2D kpts loads or generate if not args.input_npz: video_name = args.viz_video keypoints = detector_2d(video_name) else: npz = np.load(args.input_npz) keypoints = npz['kpts'] # (N, 17, 2) # 第二步:将2D关键点转换为3D关键点 keypoints_symmetry = metadata['keypoints_symmetry'] kps_left, kps_right = list(keypoints_symmetry[0]), list( keypoints_symmetry[1]) joints_left, joints_right = list([4, 5, 6, 11, 12, 13]), list([1, 2, 3, 14, 15, 16]) # normlization keypoints Suppose using the camera parameter keypoints = normalize_screen_coordinates(keypoints[..., :2], w=1000, h=1002) model_pos = TemporalModel(17, 2, 17, filter_widths=[3, 3, 3, 3, 3], causal=args.causal, dropout=args.dropout, channels=args.channels, dense=args.dense) if torch.cuda.is_available(): model_pos = model_pos.cuda() ckpt, time1 = ckpt_time(time0) print('-------------- load data spends {:.2f} seconds'.format(ckpt)) # load trained model chk_filename = os.path.join(args.checkpoint, args.resume if args.resume else args.evaluate) print('Loading checkpoint', chk_filename) checkpoint = torch.load( chk_filename, map_location=lambda storage, loc: storage) # 把loc映射到storage model_pos.load_state_dict(checkpoint['model_pos']) ckpt, time2 = ckpt_time(time1) print('-------------- load 3D model spends {:.2f} seconds'.format(ckpt)) # Receptive field: 243 frames for args.arc [3, 3, 3, 3, 3] receptive_field = model_pos.receptive_field() pad = (receptive_field - 1) // 2 # Padding on each side causal_shift = 0 print('Rendering...') input_keypoints = keypoints.copy() gen = UnchunkedGenerator(None, None, [input_keypoints], pad=pad, causal_shift=causal_shift, augment=args.test_time_augmentation, kps_left=kps_left, kps_right=kps_right, joints_left=joints_left, joints_right=joints_right) prediction = evaluate(gen, model_pos, return_predictions=True) # save 3D joint points 保存三维关节点 np.save('outputs/test_3d_output.npy', prediction, allow_pickle=True) # 第三步:将预测的三维点从相机坐标系转换到世界坐标系 # (1)第一种转换方法 rot = np.array([0.14070565, -0.15007018, -0.7552408, 0.62232804], dtype=np.float32) prediction = camera_to_world(prediction, R=rot, t=0) # We don't have the trajectory, but at least we can rebase the height将预测的三维点的Z值减去预测的三维点中Z的最小值,得到正向的Z值 prediction[:, :, 2] -= np.min(prediction[:, :, 2]) # (2)第二种转换方法 # subject = 'S1' # cam_id = '55011271' # cam_params = load_camera_params('./camera/cameras.h5')[subject][cam_id] # R = cam_params['R'] # T = 0 # azimuth = cam_params['azimuth'] # # prediction = camera2world(pose=prediction, R=R, T=T) # prediction[:, :, 2] -= np.min(prediction[:, :, 2]) # rebase the height # 第四步:将3D关键点输出并将预测的3D点转换为bvh骨骼 # 将三维预测点输出 write_3d_point(args.viz_output, prediction) # 将预测的三维骨骼点转换为bvh骨骼 prediction_copy = np.copy(prediction) write_standard_bvh(args.viz_output, prediction_copy) #转为标准bvh骨骼 write_smartbody_bvh(args.viz_output, prediction_copy) #转为SmartBody所需的bvh骨骼 anim_output = {'Reconstruction': prediction} input_keypoints = image_coordinates(input_keypoints[..., :2], w=1000, h=1002) ckpt, time3 = ckpt_time(time2) print( '-------------- generate reconstruction 3D data spends {:.2f} seconds'. format(ckpt)) if not args.viz_output: args.viz_output = 'outputs/outputvideo/alpha_result.mp4' # 第五步:生成输出视频 # from common.visualization import render_animation # render_animation(input_keypoints, anim_output, # Skeleton(), 25, args.viz_bitrate, np.array(70., dtype=np.float32), args.viz_output, # limit=args.viz_limit, downsample=args.viz_downsample, size=args.viz_size, # input_video_path=args.viz_video, viewport=(1000, 1002), # input_video_skip=args.viz_skip) ckpt, time4 = ckpt_time(time3) print('total spend {:2f} second'.format(ckpt))