def calculate_and_write_pose_maps(image_paths, output_directory, mode): cfg = load_config("demo/pose_cfg.yaml") # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) for i, path in enumerate(image_paths): sys.stdout.write('\r >> Evaluating path %d of %d' % (i + 1, len(image_paths))) sys.stdout.flush() # Read image from file image = imread(path, mode='RGB') target_width = image.shape[1] * TARGET_HEIGHT / image.shape[0] scaled_image = imresize(image, (int(TARGET_HEIGHT), int(target_width)), 'cubic') image_batch = data_to_input(scaled_image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) write_scmap(output_directory, ntpath.basename(path), scmap, mode, image.shape) print('\nFinished generating pose maps...')
def main(): # paths to setup annotation_path = '/home/babybrain/Escritorio/300145_via.json' frames_path = '/home/babybrain/Escritorio/300145' # get the x-y anotations for each frame annotations = load_annotations(annotation_path) # get x, y positions for a certain part part_id_index = 4 # we'll get elbows, need left and right (algorithm doesn't discriminate) file_anno, x_anno_r, y_anno_r = get_xy_for('r-elbow', annotations) _, x_anno_l, y_anno_l = get_xy_for('l-elbow', annotations) # get the x,y model prediction for each frame annotated cfg = load_config( "/home/babybrain/PycharmProjects/pose-tensorflow/demo/pose_cfg_babybrain.yaml" ) # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # run session for each frame image annotated x_model = np.empty(len(file_anno)) y_model = np.empty(len(file_anno)) for index, an_image in enumerate(file_anno): infile = "{path}/{name}".format(path=frames_path, name=an_image) image = imread(infile, mode='RGB') image_batch = data_to_input(image) # Compute prediction with CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) x_model[index] = pose[part_id_index, 0] y_model[index] = pose[part_id_index, 1] # now calculate distances distances_r = calculate_distances(x_model, y_model, x_anno_r, y_anno_r) distances_l = calculate_distances(x_model, y_model, x_anno_l, y_anno_l) # merge the best distance results distances = [min(xr, xl) for xr, xl in zip(distances_r, distances_l)] distances = np.array(distances) distance_steps, rates = detection_rate(distances, nsteps=50) rates = rates * 100 # finally plot the graph fig, ax = plt.subplots() ax.plot(distance_steps, rates) ax.set_xlabel('Normalized Distance') ax.set_ylabel('Detection %') ax.set_title('Distance threshold vs Detection Ratio') ax.set_xlim([0, 0.5]) plt.show()
def dlc_setupTF(options): dlc_config_settings = load_yaml(options['cfg_dlc']) cfg = load_config(dlc_config_settings['dlc_network_posecfg']) cfg['init_weights'] = dlc_config_settings['dlc_network_snapshot'] scorer = dlc_config_settings['scorer'] sess, inputs, outputs = predict.setup_pose_prediction(cfg) return {'scorer': scorer, 'sess': sess, 'inputs': inputs, 'outputs': outputs, 'cfg': cfg}
def preprocess(video_name, duration): source_path = f'./data/video/{video_name}.mp4' csv_base_path = './data/poses/' if not os.path.exists(csv_base_path): os.makedirs(csv_base_path) csv_path = f'{csv_base_path}{video_name}_poses.csv' audio_base_path = './data/audio/' if not os.path.exists(audio_base_path): os.makedirs(audio_base_path) audio_path = f'{audio_base_path}{video_name}.mp3' start_time = datetime.now() video = mpe.VideoFileClip(source_path) if duration < 0: duration = video.duration frame_count = int(video.fps * duration) frame_length = 1 / video.fps print( f'video length: {video.duration}s fps: {video.fps} frame count: {frame_count}' ) # Load and setup CNN part detector cfg = load_config('./pose_cfg.yaml') sess, inputs, outputs = predict.setup_pose_prediction(cfg) print('pose model loaded') poses = [] times = [] for i in range(frame_count): t = i * frame_length frame = video.get_frame(t) image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) poses.append(pose) times.append(t) if i % 100 == 0: print( f'processed frame: {i}/{frame_count} elapsed time: {datetime.now() - start_time}', end='\r') sess.close() print(f'saving poses at {csv_path}') save_poses(np.array(poses), times, cfg, csv_path) print(f'saving audio at {audio_path}') video.audio.write_audiofile(audio_path) print(f'total time: {datetime.now() - start_time}')
def run_inference(cfg, input_data, tf_var=None): if tf_var is None: sess, batch_inputs, outputs = setup_pose_prediction(cfg) else: sess = tf_var[0] batch_inputs = tf_var[1] outputs = tf_var[2] pose = run_pose_prediction(cfg, input_data, sess, batch_inputs, outputs) return pose
def main(option): start_time = time.time() cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() tf.reset_default_graph() draw_multi = PersonDraw() sess, inputs, outputs = predict.setup_pose_prediction(cfg) fps_time = 0 # Read image from file slopes = {} k = 0 cap = cv2.VideoCapture("http://192.168.43.31:8081") cap_user = cv2.VideoCapture('/dev/video0') cap = cap_user i = 0 while (True): ret, orig_frame = cap.read() ret2, orig_frame_user = cap_user.read() if i % 25 == 0: #frame=orig_frame frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50) user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50) co1 = run_predict(frame, sess, outputs, inputs, cfg, dataset, sm, draw_multi) print("CO1 ", co1) user_co1 = run_predict(user_frame, sess, outputs, inputs, cfg, dataset, sm, draw_multi) print("USER_CO1 ", user_co1) print("CO1 ", co1) k = None try: slope_reqd, slope_user = slope_calc(co1, user_co1) k, s = compare_images(slope_reqd, slope_user, 0.75) except IndexError: #if len(co1)!=len(user_co1): print("Except condition") pass vibrate(k) frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0) user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0) cv2.putText(user_frame, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('user_frame', user_frame) cv2.imshow('frame', frame) fps_time = time.time() #visualize.waitforbuttonpress() if cv2.waitKey(10) == ord('q'): break elapsed = time.time() - start_time cap.release() cap_user.release() cv2.destroyAllWindows()
def test_net(visualise, cache_scoremaps): logging.basicConfig(level=logging.INFO) cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(False) dataset.set_test_mode(True) sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: out_dir = cfg.scoremap_dir if not os.path.exists(out_dir): os.makedirs(out_dir) num_images = dataset.num_images predictions = np.zeros((num_images, ), dtype=np.object) for k in range(num_images): print('processing image {}/{}'.format(k, num_images - 1)) batch = dataset.next_batch() outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg) pose = argmax_pose_predict(scmap, locref, cfg.stride) pose_refscale = np.copy(pose) pose_refscale[:, 0:2] /= cfg.global_scale predictions[k] = pose_refscale if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') visualize.show_heatmaps(cfg, img, scmap, pose) visualize.waitforbuttonpress() if cache_scoremaps: base = os.path.basename(batch[Batch.data_item].im_path) raw_name = os.path.splitext(base)[0] out_fn = os.path.join(out_dir, raw_name + '.mat') scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')}) out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat') if cfg.location_refinement: scipy.io.savemat( out_fn, mdict={'locreg_pred': locref.astype('float32')}) scipy.io.savemat('predictions.mat', mdict={'joints': predictions}) sess.close()
def test_net(visualise, cache_scoremaps): logging.basicConfig(level=logging.INFO) cfg = load_config() dataset = create_dataset(cfg) dataset.set_shuffle(False) dataset.set_test_mode(True) sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: out_dir = cfg.scoremap_dir if not os.path.exists(out_dir): os.makedirs(out_dir) num_images = dataset.num_images predictions = np.zeros((num_images,), dtype=np.object) for k in range(num_images): print('processing image {}/{}'.format(k, num_images-1)) batch = dataset.next_batch() outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) scmap, locref, pairwise_diff = extract_cnn_output(outputs_np, cfg) pose = argmax_pose_predict(scmap, locref, cfg.stride) pose_refscale = np.copy(pose) pose_refscale[:, 0:2] /= cfg.global_scale predictions[k] = pose_refscale if visualise: img = np.squeeze(batch[Batch.inputs]).astype('uint8') visualize.show_heatmaps(cfg, img, scmap, pose) visualize.waitforbuttonpress() if cache_scoremaps: base = os.path.basename(batch[Batch.data_item].im_path) raw_name = os.path.splitext(base)[0] out_fn = os.path.join(out_dir, raw_name + '.mat') scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')}) out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat') if cfg.location_refinement: scipy.io.savemat(out_fn, mdict={'locreg_pred': locref.astype('float32')}) scipy.io.savemat('predictions.mat', mdict={'joints': predictions}) sess.close()
def run_dataset(): cfg = deepcopy(load_config()) if not os.path.exists(cfg.dir_json_pred): os.makedirs(cfg.dir_json_pred) sess, batch_inputs, outputs = setup_pose_prediction(cfg) tf_var = [sess, batch_inputs, outputs] if cfg.dataset_type == "posetrack_v1": from dataset.posetrack_v1 import run_posetrack_v1 run_posetrack_v1(cfg, tf_var) if cfg.dataset_type == "posetrack_v2": from dataset.posetrack_v2 import run_posetrack_v2 run_posetrack_v2(cfg, tf_var)
def main(option): cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() tf.reset_default_graph() draw_multi = PersonDraw() sess, inputs, outputs = predict.setup_pose_prediction(cfg) fps_time = 0 # Read image from file cap = cv2.VideoCapture('msgifs/icon4.gif') cap_user = cv2.VideoCapture('user.mp4') i = 0 while (True): ret, orig_frame = cap.read() ret2, orig_frame_user = cap_user.read() if i % 25 == 0: frame = cv2.resize(orig_frame, (0, 0), fx=0.50, fy=0.50) user_frame = cv2.resize(orig_frame_user, (0, 0), fx=0.50, fy=0.50) co1 = run_predict(frame, sess, inputs, outputs, cfg, dataset, sm, draw_multi) user_co1 = run_predict(user_frame, sess, inputs, outputs, cfg, dataset, sm, draw_multi) try: slope_reqd = slope_calc(co1) slope_user = slope_calc(user_co1) compare_images(slope_reqd, slope_user, 0.1) except IndexError: #if len(co1)!=len(user_co1): #messagebox.showinfo("Title", "Please adjust camera to show your keypoints") pass #frame = cv2.resize(frame, (0, 0), fx=2.0, fy=2.0) #user_frame = cv2.resize(user_frame, (0, 0), fx=2.0, fy=2.0) cv2.putText(user_frame, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.imshow('user_frame', user_frame) cv2.imshow('frame', frame) fps_time = time.time() #visualize.waitforbuttonpress() if cv2.waitKey(10) == ord('q'): break cap.release() cap_user.release() cv2.destroyAllWindows() cap_user.release()
def predict_frame(video, t): frame_count = int(video.fps * video.duration) frame_length = 1 / video.fps # Load and setup CNN part detector cfg = load_config('./pose_cfg.yaml') sess, inputs, outputs = predict.setup_pose_prediction(cfg) frame = video.get_frame(t) image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) return pose
def main(): start_time=time.time() print("main hai") tf.reset_default_graph() cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file dir=os.listdir("stick") k=0 cap=cv2.VideoCapture(0) i=0 while (cap.isOpened()): if i%20 == 0: ret, orig_frame= cap.read() if ret==True: frame = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30) image= frame sse=0 mse=0 image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) img = np.copy(image) #coor = PersonDraw.draw() visim_multi = img.copy() co1=draw_multi.draw(visim_multi, dataset, person_conf_multi) plt.imshow(visim_multi) plt.show() visualize.waitforbuttonpress() #print("this is draw : ", co1) if k==1: qwr = np.zeros((1920,1080,3), np.uint8) cv2.line(qwr, co1[5][0], co1[5][1],(255,0,0),3) cv2.line(qwr, co1[7][0], co1[7][1],(255,0,0),3) cv2.line(qwr, co1[6][0], co1[6][1],(255,0,0),3) cv2.line(qwr, co1[4][0], co1[4][1],(255,0,0),3) cv2.line(qwr, co1[9][0], co1[9][1],(255,0,0),3) cv2.line(qwr, co1[11][0], co1[11][1],(255,0,0),3) cv2.line(qwr, co1[8][0], co1[8][1],(255,0,0),3) cv2.line(qwr, co1[10][0], co1[10][1],(255,0,0),3) # In[9]: cv2.imshow('r',qwr) qwr2="stick/frame"+str(k)+".jpg" qw1 = cv2.cvtColor(qwr, cv2.COLOR_BGR2GRAY) qw2= cv2.cvtColor(qwr2, cv2.COLOR_BGR2GRAY) fig = plt.figure("Images") images = ("Original", qw1), ("Contrast", qw2) for (i, (name, image)) in enumerate(images): ax = fig.add_subplot(1, 3, i + 1) ax.set_title(name) plt.imshow(hash(tuple(image))) # compare the images s,m=compare_images(qw1, qw2, "Image1 vs Image2") k+=1 sse=s mse=m else: break elapsed= time.time()-start_time #print("sse score : ", sse) print("Mean squared error : ", elapsed/100) cap.release() cv2.destroyAllWindows()
def video2posevideo(video_name): time_start = time.clock() import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread, imsave from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt from PIL import Image, ImageDraw, ImageFont font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 24) import random cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ################ video = read_video(video_name) video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3 pose_frame_list = [] point_r = 3 # radius of points point_min = 10 # threshold of points - If there are more than point_min points in person, we define he/she is REAL PERSON part_min = 3 # threshold of parts - If there are more than part_min parts in person, we define he/she is REAL PERSON / part means head, arm and leg point_num = 17 # There are 17 points in 1 person def ellipse_set(person_conf_multi, people_i, point_i): return (person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r) def line_set(person_conf_multi, people_i, point_i, point_j): return (person_conf_multi[people_i][point_i][0], person_conf_multi[people_i][point_i][1], person_conf_multi[people_i][point_j][0], person_conf_multi[people_i][point_j][1]) def draw_ellipse_and_line(draw, person_conf_multi, people_i, a, b, c, point_color): draw.ellipse(ellipse_set(person_conf_multi, people_i, a), fill=point_color) draw.ellipse(ellipse_set(person_conf_multi, people_i, b), fill=point_color) draw.ellipse(ellipse_set(person_conf_multi, people_i, c), fill=point_color) draw.line(line_set(person_conf_multi, people_i, a, b), fill=point_color, width=5) draw.line(line_set(person_conf_multi, people_i, b, c), fill=point_color, width=5) for i in range(0, video_frame_number): image = video.get_frame(i/video.fps) ###################### image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) # print('person_conf_multi: ') # print(type(person_conf_multi)) # print(person_conf_multi) # Add library to save image image_img = Image.fromarray(image) # Save image with points of pose draw = ImageDraw.Draw(image_img) people_num = 0 people_real_num = 0 people_part_num = 0 people_num = person_conf_multi.size / (point_num * 2) people_num = int(people_num) print('people_num: ' + str(people_num)) for people_i in range(0, people_num): point_color_r = random.randrange(0, 256) point_color_g = random.randrange(0, 256) point_color_b = random.randrange(0, 256) point_color = (point_color_r, point_color_g, point_color_b, 255) point_list = [] point_count = 0 point_i = 0 # index of points part_count = 0 # count of parts in THAT person # To find rectangle which include that people - list of points x, y coordinates people_x = [] people_y = [] for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data point_count = point_count + 1 point_list.append(point_i) # Draw each parts if (5 in point_list) and (7 in point_list) and (9 in point_list): # Draw left arm draw_ellipse_and_line(draw, person_conf_multi, people_i, 5, 7, 9, point_color) part_count = part_count + 1 if (6 in point_list) and (8 in point_list) and (10 in point_list): # Draw right arm draw_ellipse_and_line(draw, person_conf_multi, people_i, 6, 8, 10, point_color) part_count = part_count + 1 if (11 in point_list) and (13 in point_list) and (15 in point_list): # Draw left leg draw_ellipse_and_line(draw, person_conf_multi, people_i, 11, 13, 15, point_color) part_count = part_count + 1 if (12 in point_list) and (14 in point_list) and (16 in point_list): # Draw right leg draw_ellipse_and_line(draw, person_conf_multi, people_i, 12, 14, 16, point_color) part_count = part_count + 1 if point_count >= point_min: people_real_num = people_real_num + 1 for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data draw.ellipse(ellipse_set(person_conf_multi, people_i, point_i), fill=point_color) people_x.append(person_conf_multi[people_i][point_i][0]) people_y.append(person_conf_multi[people_i][point_i][1]) # Draw rectangle which include that people draw.rectangle([min(people_x), min(people_y), max(people_x), max(people_y)], fill=point_color, outline=5) if part_count >= part_min: people_part_num = people_part_num + 1 draw.text((0, 0), 'People(by point): ' + str(people_real_num) + ' (threshold = ' + str(point_min) + ')', (0,0,0), font=font) draw.text((0, 32), 'People(by line): ' + str(people_part_num) + ' (threshold = ' + str(part_min) + ')', (0,0,0), font=font) draw.text((0, 64), 'Frame: ' + str(i) + '/' + str(video_frame_number), (0,0,0), font=font) draw.text((0, 96), 'Total time required: ' + str(round(time.clock() - time_start, 1)) + 'sec', (0,0,0)) print('people_real_num: ' + str(people_real_num)) print('people_part_num: ' + str(people_part_num)) print('frame: ' + str(i)) image_img_numpy = np.asarray(image_img) pose_frame_list.append(image_img_numpy) video_pose = ImageSequenceClip(pose_frame_list, fps=video.fps) video_pose.write_videofile("testset/" + video_name + "_pose.mp4", fps=video.fps) print("Time(s): " + str(time.clock() - time_start))
TF_CUDNN_USE_AUTOTUNE = 0 import sys, os from PIL import Image sys.path.insert(1, 'pose_tensorflow') from util.config import load_config from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input os.chdir("pose_tensorflow") cfg = {} cfg['cfg'] = load_config("demo/pose_cfg.yaml") cfg['sess'], cfg['inputs'], cfg['outputs'] = predict.setup_pose_prediction( cfg['cfg']) os.chdir("..") def resize_image(img: Image): basewidth = 300 wpercent = basewidth / img.size[0] hsize = int(img.size[1] * wpercent) img = img.resize((basewidth, hsize), Image.ANTIALIAS) return img def get_pose(image, d=cfg): image = resize_image(image)
def video2poseframe(video_name): import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread, imsave from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt from PIL import Image, ImageDraw import random cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ################ video = read_video(video_name) video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3 if not os.path.exists('testset/' + video_name): os.makedirs('testset/' + video_name) for i in range(0, video_frame_number): image = video.get_frame(i/video.fps) ###################### image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) print('person_conf_multi: ') print(type(person_conf_multi)) print(person_conf_multi) # Add library to save image image_img = Image.fromarray(image) # Save image with points of pose draw = ImageDraw.Draw(image_img) people_num = 0 point_num = 17 print('person_conf_multi.size: ') print(person_conf_multi.size) people_num = person_conf_multi.size / (point_num * 2) people_num = int(people_num) print('people_num: ') print(people_num) point_i = 0 # index of points point_r = 5 # radius of points people_real_num = 0 for people_i in range(0, people_num): point_color_r = random.randrange(0, 256) point_color_g = random.randrange(0, 256) point_color_b = random.randrange(0, 256) point_color = (point_color_r, point_color_g, point_color_b, 255) point_count = 0 for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data point_count = point_count + 1 if point_count > 5: # If there are more than 5 point in person, we define he/she is REAL PERSON people_real_num = people_real_num + 1 for point_i in range(0, point_num): draw.ellipse((person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r), fill=point_color) print('people_real_num: ') print(people_real_num) video_name_result = 'testset/' + video_name + '/frame_pose_' + str(i).zfill(video_frame_ciphers) + '.jpg' image_img.save(video_name_result, "JPG")
def poser(): global state global points global reps import os import sys import cv2 import time import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread from config import load_config from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input cfg = load_config("demo/pose_cfg.yaml") # Load and setup CNN part detector sess2, inputs, outputs = predict.setup_pose_prediction(cfg) camera = cv2.VideoCapture(0) # Read image from file prevPoints = -1 while self.running: r, image = camera.read() image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess2.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) # Visualise data = visualize.visualize_joints(image, pose) frame = cv2.cvtColor(data, 4) img = QtGui.QImage(frame, frame.shape[1], frame.shape[0], QtGui.QImage.Format_RGB888) pix = QtGui.QPixmap.fromImage(img) try: self.lblVideo.setPixmap(pix) except: return arr = [] for i in range(14): arr += pose[i].tolist()[0:2] predictedPose = sess.run(prediction, feed_dict={X: [arr]}) processPose(predictedPose) if prevPoints != points: print("Current points: " + str(points)) prevPoints = points
from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file file_name_ext = sys.argv[1] ## example: test_single_03.png file_name = file_name_ext.split('.')[0] ## example: test_single_03 file_name_input = 'testset/' + file_name_ext image = imread(file_name_input, mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections)
def AnalyzeVideosTrial(video_file): """ DeepLabCut Toolbox https://github.com/AlexEMG/DeepLabCut A Mathis, [email protected] M Mathis, [email protected] This script analyzes videos based on a trained network (as specified in myconfig_analysis.py) You need tensorflow for evaluation. Run by: python3 AnalyzeVideosTrial.py video_file Functionalized by Adam S. Lowet, 10/25/19 """ #################################################### # Dependencies #################################################### import os.path import sys subfolder = os.getcwd().split('analysis-tools')[0] sys.path.append(subfolder) # add parent directory: (where nnet & config are!) sys.path.append(os.path.join(subfolder, "pose-tensorflow")) sys.path.append(os.path.join(subfolder, "config")) from myconfig_analysis import cropping, Task, date, \ trainingsFraction, resnet, snapshotindex, shuffle,x1, x2, y1, y2, videotype, storedata_as_csv # Deep-cut dependencies from config import load_config from nnet import predict from dataset.pose_dataset import data_to_input # Dependencies for video: import pickle # import matplotlib.pyplot as plt import imageio from skimage.util import img_as_ubyte from moviepy.editor import VideoFileClip import skimage import skimage.color import time import pandas as pd import numpy as np import os from tqdm import tqdm def getpose(image, cfg, outputs, outall=False): ''' Adapted from DeeperCut, see pose-tensorflow folder''' image_batch = data_to_input(skimage.color.gray2rgb(image)) outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref = predict.extract_cnn_output(outputs_np, cfg) pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) if outall: return scmap, locref, pose else: return pose #################################################### # Loading data, and defining model folder #################################################### basefolder = os.path.join('..','..','pose-tensorflow','models') modelfolder = os.path.join(basefolder, Task + str(date) + '-trainset' + str(int(trainingsFraction * 100)) + 'shuffle' + str(shuffle)) cfg = load_config(os.path.join(modelfolder , 'test' ,"pose_cfg.yaml")) ################################################## # Load and setup CNN part detector ################################################## # Check which snapshots are available and sort them by # iterations Snapshots = np.array([ fn.split('.')[0] for fn in os.listdir(os.path.join(modelfolder , 'train')) if "index" in fn ]) increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] print(modelfolder) print(Snapshots) ################################################## # Compute predictions over images ################################################## # Check if data already was generated: cfg['init_weights'] = os.path.join(modelfolder , 'train', Snapshots[snapshotindex]) # Name for scorer: trainingsiterations = (cfg['init_weights'].split('/')[-1]).split('-')[-1] # Name for scorer: scorer = 'DeepCut' + "_resnet" + str(resnet) + "_" + Task + str( date) + 'shuffle' + str(shuffle) + '_' + str(trainingsiterations) cfg['init_weights'] = os.path.join(modelfolder , 'train', Snapshots[snapshotindex]) sess, inputs, outputs = predict.setup_pose_prediction(cfg) pdindex = pd.MultiIndex.from_product( [[scorer], cfg['all_joints_names'], ['x', 'y', 'likelihood']], names=['scorer', 'bodyparts', 'coords']) ################################################## # Datafolder ################################################## # video_dir='../videos/' #where your folder with videos is. frame_buffer = 10 #os.chdir(video_dir) #videos = np.sort([fn for fn in os.listdir(os.curdir) if (videotype in fn)]) #print("Starting ", video_dir, videos) #for video in videos: video = video_file dataname = video.split('.')[0] + scorer + '.h5' try: # Attempt to load data... pd.read_hdf(dataname) print("Video already analyzed!", dataname) except FileNotFoundError: print("Loading ", video) clip = VideoFileClip(video) ny, nx = clip.size # dimensions of frame (height, width) fps = clip.fps #nframes = np.sum(1 for j in clip.iter_frames()) #this is slow (but accurate) nframes_approx = int(np.ceil(clip.duration * clip.fps) + frame_buffer) # this will overestimage number of frames (see https://github.com/AlexEMG/DeepLabCut/issues/9) This is especially a problem # for high frame rates and long durations due to rounding errors (as Rich Warren found). Later we crop the result (line 187) if cropping: clip = clip.crop( y1=y1, y2=y2, x1=x1, x2=x2) # one might want to adjust print("Duration of video [s]: ", clip.duration, ", recorded with ", fps, "fps!") print("Overall # of frames: ", nframes_approx,"with cropped frame dimensions: ", clip.size) start = time.time() PredicteData = np.zeros((nframes_approx, 3 * len(cfg['all_joints_names']))) clip.reader.initialize() print("Starting to extract posture") for index in tqdm(range(nframes_approx)): #image = img_as_ubyte(clip.get_frame(index * 1. / fps)) image = img_as_ubyte(clip.reader.read_frame()) # Thanks to Rick Warren for the following snipplet: # if close to end of video, start checking whether two adjacent frames are identical # this should only happen when moviepy has reached the final frame # if two adjacent frames are identical, terminate the loop if index==int(nframes_approx-frame_buffer*2): last_image = image elif index>int(nframes_approx-frame_buffer*2): if (image==last_image).all(): nframes = index print("Detected frames: ", nframes) break else: last_image = image pose = getpose(image, cfg, outputs) PredicteData[index, :] = pose.flatten() # NOTE: thereby cfg['all_joints_names'] should be same order as bodyparts! stop = time.time() dictionary = { "start": start, "stop": stop, "run_duration": stop - start, "Scorer": scorer, "config file": cfg, "fps": fps, "frame_dimensions": (ny, nx), "nframes": nframes } metadata = {'data': dictionary} print("Saving results...") DataMachine = pd.DataFrame(PredicteData[:nframes,:], columns=pdindex, index=range(nframes)) #slice pose data to have same # as # of frames. DataMachine.to_hdf(dataname, 'df_with_missing', format='table', mode='w') if storedata_as_csv: DataMachine.to_csv(video.split('.')[0] + scorer+'.csv') with open(dataname.split('.')[0] + 'includingmetadata.pickle', 'wb') as f: pickle.dump(metadata, f, pickle.HIGHEST_PROTOCOL)
def analyse(): basefolder = '../pose-tensorflow/models/' # for cfg file & ckpt! modelfolder = (basefolder + Task + str(date) + '-trainset' + str(int(trainingsFraction * 100)) + 'shuffle' + str(shuffle)) cfg = load_config(modelfolder + '/test/' + "pose_cfg.yaml") ################################################## # Load and setup CNN part detector ################################################## # Check which snap shots are available and sort them by # iterations Snapshots = np.array([ fn.split('.')[0] for fn in os.listdir(modelfolder + '/train/') if "index" in fn ]) increasing_indices = np.argsort([int(m.split('-')[1]) for m in Snapshots]) Snapshots = Snapshots[increasing_indices] print(modelfolder) print(Snapshots) ################################################## # Compute predictions over images ################################################## # Check if data already was generated: cfg['init_weights'] = modelfolder + '/train/' + Snapshots[snapshotindex] # Name for scorer: trainingsiterations = (cfg['init_weights'].split('/')[-1]).split('-')[-1] # Name for scorer: scorer = 'DeepCut' + "_resnet" + str(resnet) + "_" + Task + str( date) + 'shuffle' + str(shuffle) + '_' + str(trainingsiterations) cfg['init_weights'] = modelfolder + '/train/' + Snapshots[snapshotindex] sess, inputs, outputs = predict.setup_pose_prediction(cfg) pdindex = pd.MultiIndex.from_product( [[scorer], cfg['all_joints_names'], ['x', 'y', 'likelihood']], names=['scorer', 'bodyparts', 'coords']) ################################################## # Datafolder ################################################## # Folder where your tiffstacks are: os.chdir(videofolder) videos = np.sort([fn for fn in os.listdir(os.curdir) if (".tif" in fn)]) print("Starting ", videofolder, videos) for tiffstack in videos: dataname = tiffstack.split('.')[0] + scorer + '.h5' try: # Attempt to load data... pd.read_hdf(dataname) print("tiffstack already analyzed!", dataname) except: print("Loading ", tiffstack) im = io.imread(tiffstack) nframes = np.shape( im )[0] # Assuming: numframes x width x height [otherwise consider changing this!] start = time.time() PredicteData = np.zeros( (nframes, 3 * len(cfg['all_joints_names']))) print("Starting to extract posture") for index in tqdm(range(nframes)): image = img_as_ubyte(im[index]) pose = getpose(image, cfg, outputs) PredicteData[index, :] = pose.flatten( ) # NOTE: thereby cfg['all_joints_names'] should be same order as bodyparts! stop = time.time() dictionary = { "start": start, "stop": stop, "run_duration": stop - start, "Scorer": scorer, "config file": cfg, "nframes": nframes } metadata = {'data': dictionary} print("Saving results...") DataMachine = pd.DataFrame(PredicteData, columns=pdindex, index=range(nframes)) DataMachine.to_hdf(dataname, 'df_with_missing', format='table', mode='w') if storedata_as_csv: DataMachine.to_csv(tiffstack.split('.')[0] + scorer + '.csv') with open( dataname.split('.')[0] + 'includingmetadata.pickle', 'wb') as f: pickle.dump(metadata, f, pickle.HIGHEST_PROTOCOL)
# import video_pose #################### cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ########## ## Get the source of video parser = ap.ArgumentParser() parser.add_argument('-f', "--videoFile", help="Path to Video File") parser.add_argument('-w', "--videoWidth", help="Width of Output Video") parser.add_argument('-o', "--videoType", help="Extension of Output Video") args = vars(parser.parse_args()) if args["videoFile"] is not None: video_name = args["videoFile"] else: print("You have to input videoFile name")
def test_net(visualise, cache_scoremaps): # 打开python的日志功能 logging.basicConfig(level=logging.INFO) # 加载配置文件 cfg = load_config() # 根据配置文件中的信息产生数据读取类的实例 dataset = create_dataset(cfg) # 不用对数据进行洗牌 dataset.set_shuffle(False) # 告诉数据读取类没有类标,即处于测试模式 dataset.set_test_mode(True) # 该函数返回session,输入算子,输出算子 sess, inputs, outputs = setup_pose_prediction(cfg) # 是否需要保存测试过程中的heatmap if cache_scoremaps: # 保存heatmap的目录 out_dir = cfg.scoremap_dir # 目录不存在则创建 if not os.path.exists(out_dir): os.makedirs(out_dir) # 图片个数 num_images = dataset.num_images # 预测的关节坐标都保存在这里 predictions = np.zeros((num_images, ), dtype=np.object) for k in range(num_images): print('processing image {}/{}'.format(k, num_images - 1)) # 获得一批数据 batch = dataset.next_batch() # 进行预测 outputs_np = sess.run(outputs, feed_dict={inputs: batch[Batch.inputs]}) # 得到heatmap和精细化的heatmap scmap, locref = extract_cnn_output(outputs_np, cfg) # 获得最终的关节坐标 ''' pose = [ [ pos_f8[::-1], [scmap[maxloc][joint_idx]] ] .... ..... .... ] 用我的话说就是下面的结构 pose = [ [关节的坐标, 关节坐标的置信度] .... ..... .... ] ''' pose = argmax_pose_predict(scmap, locref, cfg.stride) pose_refscale = np.copy(pose) # 除以尺度,就能恢复到未经过缩放的图像的坐标系上去 # 注意0:2是左开右闭的区间只取到了0和1 pose_refscale[:, 0:2] /= cfg.global_scale predictions[k] = pose_refscale if visualise: # 获取图片 img = np.squeeze(batch[Batch.inputs]).astype('uint8') # 显示heatmap visualize.show_heatmaps(cfg, img, scmap, pose) # 等待按键按下 visualize.waitforbuttonpress() if cache_scoremaps: # 保存heatmap base = os.path.basename(batch[Batch.data_item].im_path) raw_name = os.path.splitext(base)[0] out_fn = os.path.join(out_dir, raw_name + '.mat') scipy.io.savemat(out_fn, mdict={'scoremaps': scmap.astype('float32')}) # 保存精细化关节定位的heatmap out_fn = os.path.join(out_dir, raw_name + '_locreg' + '.mat') if cfg.location_refinement: scipy.io.savemat( out_fn, mdict={'locreg_pred': locref.astype('float32')}) # 将最终预测的关节坐标保存起来 scipy.io.savemat('predictions.mat', mdict={'joints': predictions}) sess.close()