def im_process(sess, cfg, inputs, outputs, image, out_port, fig="preview"): image_batch = data_to_input(image) # image = image[:, :, (2, 1, 0)] // Remove comment to get "good" image in opencv timer = Timer() timer.tic() # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) timer.toc() print('Detection took {:.3f}s'.format(timer.total_time)) # Visualise # visualize.show_heatmaps(cfg, image, scmap, pose) # plt.figure() # plt.imshow(visualize.visualize_joints(image, pose)) CONF_THRES = 0.8 stream_parts(out_port, pose) image = draw_links(image, pose) image = visualize.visualize_joints(image, pose, threshold=CONF_THRES) if args.cv_show: cv2.imshow(fig, image) return image
def get_test_data(): ''' Takes in the data for GAIT and resizes the array in the form most suitable for the train ''' Final_Array = np.zeros(shape=(20, 9, 2)) trainer_vid("test/pic") for j in range(20): image = imread("test/pic" + str(j) + ".PNG", mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) temp = np.zeros(shape=(9, 2)) list_indexes = [0, 1, 4, 5, 6, 7, 10, 11, 13] for k in range(9): temp[k] = pose[list_indexes[k]][0:2] Final_Array[j] = temp print(Final_Array.shape) temp2 = np.zeros(shape=(2, 20, 9)) temp2[0] = Final_Array[:, :, 0] temp2[1] = Final_Array[:, :, 1] Answer = np.zeros(shape=(9, 2, 20)) for i in range(9): Answer[i] = temp2[:, :, i] print(Answer.shape) return Answer
def calculate_and_write_pose_maps(image_paths, output_directory, mode): cfg = load_config("demo/pose_cfg.yaml") # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) for i, path in enumerate(image_paths): sys.stdout.write('\r >> Evaluating path %d of %d' % (i + 1, len(image_paths))) sys.stdout.flush() # Read image from file image = imread(path, mode='RGB') target_width = image.shape[1] * TARGET_HEIGHT / image.shape[0] scaled_image = imresize(image, (int(TARGET_HEIGHT), int(target_width)), 'cubic') image_batch = data_to_input(scaled_image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) write_scmap(output_directory, ntpath.basename(path), scmap, mode, image.shape) print('\nFinished generating pose maps...')
def main(): # paths to setup annotation_path = '/home/babybrain/Escritorio/300145_via.json' frames_path = '/home/babybrain/Escritorio/300145' # get the x-y anotations for each frame annotations = load_annotations(annotation_path) # get x, y positions for a certain part part_id_index = 4 # we'll get elbows, need left and right (algorithm doesn't discriminate) file_anno, x_anno_r, y_anno_r = get_xy_for('r-elbow', annotations) _, x_anno_l, y_anno_l = get_xy_for('l-elbow', annotations) # get the x,y model prediction for each frame annotated cfg = load_config( "/home/babybrain/PycharmProjects/pose-tensorflow/demo/pose_cfg_babybrain.yaml" ) # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # run session for each frame image annotated x_model = np.empty(len(file_anno)) y_model = np.empty(len(file_anno)) for index, an_image in enumerate(file_anno): infile = "{path}/{name}".format(path=frames_path, name=an_image) image = imread(infile, mode='RGB') image_batch = data_to_input(image) # Compute prediction with CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) x_model[index] = pose[part_id_index, 0] y_model[index] = pose[part_id_index, 1] # now calculate distances distances_r = calculate_distances(x_model, y_model, x_anno_r, y_anno_r) distances_l = calculate_distances(x_model, y_model, x_anno_l, y_anno_l) # merge the best distance results distances = [min(xr, xl) for xr, xl in zip(distances_r, distances_l)] distances = np.array(distances) distance_steps, rates = detection_rate(distances, nsteps=50) rates = rates * 100 # finally plot the graph fig, ax = plt.subplots() ax.plot(distance_steps, rates) ax.set_xlabel('Normalized Distance') ax.set_ylabel('Detection %') ax.set_title('Distance threshold vs Detection Ratio') ax.set_xlim([0, 0.5]) plt.show()
def preprocess(video_name, duration): source_path = f'./data/video/{video_name}.mp4' csv_base_path = './data/poses/' if not os.path.exists(csv_base_path): os.makedirs(csv_base_path) csv_path = f'{csv_base_path}{video_name}_poses.csv' audio_base_path = './data/audio/' if not os.path.exists(audio_base_path): os.makedirs(audio_base_path) audio_path = f'{audio_base_path}{video_name}.mp3' start_time = datetime.now() video = mpe.VideoFileClip(source_path) if duration < 0: duration = video.duration frame_count = int(video.fps * duration) frame_length = 1 / video.fps print( f'video length: {video.duration}s fps: {video.fps} frame count: {frame_count}' ) # Load and setup CNN part detector cfg = load_config('./pose_cfg.yaml') sess, inputs, outputs = predict.setup_pose_prediction(cfg) print('pose model loaded') poses = [] times = [] for i in range(frame_count): t = i * frame_length frame = video.get_frame(t) image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) poses.append(pose) times.append(t) if i % 100 == 0: print( f'processed frame: {i}/{frame_count} elapsed time: {datetime.now() - start_time}', end='\r') sess.close() print(f'saving poses at {csv_path}') save_poses(np.array(poses), times, cfg, csv_path) print(f'saving audio at {audio_path}') video.audio.write_audiofile(audio_path) print(f'total time: {datetime.now() - start_time}')
def getpose(image, cfg, outputs, outall=False): ''' Adapted from DeeperCut, see pose-tensorflow folder''' image_batch = data_to_input(skimage.color.gray2rgb(image)) outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref = predict.extract_cnn_output(outputs_np, cfg) pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) if outall: return scmap, locref, pose else: return pose
def get_pose(image, d=cfg): image = resize_image(image) image_batch = data_to_input(image) outputs_np = d['sess'].run(d['outputs'], feed_dict={d['inputs']: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, d['cfg']) pose = predict.argmax_pose_predict(scmap, locref, d['cfg'].stride) return pose
def detection(image): time_start=time.time() image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) time_end=time.time() print('totally cost', time_end-time_start) return person_conf_multi
def run_predict(frame, sess, inputs, outputs, cfg, dataset, sm, draw_multi): tf.reset_default_graph() image= frame image_batch = data_to_input(frame) # Compute prediction_n with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) m=time.time() person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) img = np.copy(image) visim_multi = img.copy() draw_multi.draw(visim_multi, dataset, person_conf_multi, image) return pos_array.round().astype(int)
def get_position(image, prev_pos): x = 0 y = 0 cropped_image = image if prev_pos[0] > 0 and prev_pos[1] > 0: BOX_SIZE = 120 x = max(0, prev_pos[0] - BOX_SIZE / 2) y = max(0, prev_pos[1] - BOX_SIZE / 2) cropped_image = image[y:(y + BOX_SIZE), x:(x + BOX_SIZE)] ''' Adapted from DeeperCut, see pose-tensorflow folder''' image_batch = data_to_input(skimage.color.gray2rgb(cropped_image)) outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref = predict.extract_cnn_output(outputs_np, cfg) pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) pose[0][0] += x pose[0][1] += y return pose[0][0], pose[0][1], pose[0][2]
def predict_frame(video, t): frame_count = int(video.fps * video.duration) frame_length = 1 / video.fps # Load and setup CNN part detector cfg = load_config('./pose_cfg.yaml') sess, inputs, outputs = predict.setup_pose_prediction(cfg) frame = video.get_frame(t) image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) return pose
def disp_pic(new=False, where="your_file4"): ''' Displays image ''' if (new): new_pic("QWERTY12345") where = "QWERTY12345" image = imread(where + ".PNG", mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) visualize.show_heatmaps(cfg, image, scmap, pose) visualize.waitforbuttonpress()
def arms(new=False, where="your_file"): ''' Does arm_span and arm_span_est ''' scale = 0 #Number of inches per pixel if (new): new_pic(where + ".PNG") image = imread(where + ".PNG", mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) scale = set_scale(pose) print(arm_span(pose, scale)) print(arm_span_est(pose, scale)) disp_pic(where=where)
def get_person_data(person): ''' Takes in the data for GAIT and resizes the array in the form most suitable for the train ''' Final_Array = np.zeros(shape=(15, 20, 9, 2)) for i in range(15): trainer_vid("walk_data/" + person + "/vid" + str(i) + "/pic") if (input("Video index: " + str(i) + " has been taken. Type yeet to leave:\n") == "yeet"): return for i in range(15): for j in range(20): image = imread("walk_data/" + person + "/vid" + str(i) + "/pic" + str(j) + ".PNG", mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) temp = np.zeros(shape=(9, 2)) list_indexes = [0, 1, 4, 5, 6, 7, 10, 11, 13] for k in range(9): temp[k] = pose[list_indexes[k]][0:2] Final_Array[i][j] = temp print(str(i) + " is finished") print(Final_Array.shape) Answer = np.zeros(shape=(9, 15, 20, 2)) for i in range(9): Answer[i] = Final_Array[:, :, i, :] print(Answer.shape) return Answer
def play(): global VIDEO VIDEO = True cap = cv2.VideoCapture(0) #lancement de la caméra cap.set( 3, 160) #redimensionnement de la video (reduire la taille des données) cap.set(4, 120) while VIDEO: # Capture frame-by-frame ret, image = cap.read() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # print (scmap) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) # Visualise #visualize.show_heatmaps(cfg, image, scmap, pose) visim = visualize.visualize_joints(image, pose) #visim = image fenetre_fft.display_image(visim) print("OK") cv2.VideoCapture(0).release() #extinction de la camera
def kick_vid(new=False): ''' Takes a video, takes 40 frames from the video, maps the body in each, and finds the maximum height ''' shoe_size = int(input("What is your shoe size (US):\n")) scale = 0 #Number of inches per pixel if (new): vid_view(sec=10) print("GO") vid_pics(sec=4) ans = [] for i in range(40): image = imread("temp/vid_pic" + str(i) + ".PNG", mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) if (i == 0): scale = set_scale(pose) ans.append(kick_height(pose, scale)) cv2.destroyAllWindows() ans = np.array(ans) print(ans) print("We measured " + str(max(ans)) + " using the ankles as our points") print("The true value is approximately " + str(max(ans) + (4 / 3 * shoe_size))) print(np.argmax(ans)) # Visualise disp_pic(where="temp/vid_pic" + str(np.argmax(ans)))
# for object-tracker target_points = [ ] # format: [(minx, miny, maxx, maxy), (minx, miny, maxx, maxy) ... ] tracker = [] final = [] for i in range(0, video_frame_number): d = {} # Save i-th frame as image image = video.get_frame(i / video.fps) ########## ## By pose-tensorflow image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output( outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph( sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) ##### # Add library to draw image
def poser(): global state global points global reps import os import sys import cv2 import time import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread from config import load_config from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input cfg = load_config("demo/pose_cfg.yaml") # Load and setup CNN part detector sess2, inputs, outputs = predict.setup_pose_prediction(cfg) camera = cv2.VideoCapture(0) # Read image from file prevPoints = -1 while self.running: r, image = camera.read() image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess2.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict(scmap, locref, cfg.stride) # Visualise data = visualize.visualize_joints(image, pose) frame = cv2.cvtColor(data, 4) img = QtGui.QImage(frame, frame.shape[1], frame.shape[0], QtGui.QImage.Format_RGB888) pix = QtGui.QPixmap.fromImage(img) try: self.lblVideo.setPixmap(pix) except: return arr = [] for i in range(14): arr += pose[i].tolist()[0:2] predictedPose = sess.run(prediction, feed_dict={X: [arr]}) processPose(predictedPose) if prevPoints != points: print("Current points: " + str(points)) prevPoints = points
sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file file_name = "demo/try.jpeg" file_name1 = 'demo/try2.jpeg' image = imread(file_name, 0) image2 = imread(file_name1, 0) image_batch = data_to_input(image) image_batch2 = data_to_input(image2) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) outputs_np2 = sess.run(outputs, feed_dict={inputs: image_batch2}) scmap, locref, pairwise_diff = predict.extract_cnn_output( outputs_np, cfg, dataset.pairwise_stats) scmap2, locref2, pairwise_diff2 = predict.extract_cnn_output( outputs_np2, cfg, dataset.pairwise_stats) # In[6]: detections = extract_detections(cfg, scmap, locref, pairwise_diff) detections2 = extract_detections(cfg, scmap2, locref2, pairwise_diff2)
def return_pose(image, image2, keeper, referee): keeper_id = -1 ref_ids = [] team_class_num = 0.25 # sys.maxsize ref_num = 0.10 contours, mask = get_contours(image) all_info = [] image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output( outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph( sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) cl = [(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 255, 0), (0, 255, 255), (255, 0, 255), (0, 0, 0)] names = { 6: 'rightShoulder', 5: 'leftShoulder', 11: 'leftHip', 12: 'rightHip', 14: 'rightKnee', 13: 'leftKnee', 16: 'rightAnkle', 15: 'leftAnkle' } allTeamClassificationFeatures_upper = [] allTeamClassificationFeatures_lower = [] # TODO - Use mid-back to knee features for i in range(len(person_conf_multi)): x = 0 player_parts = {} min_x = image2.shape[0] - 1 min_y = image2.shape[1] - 1 for j in range(17): printFlag = True if j in [5, 6, 15, 16, 11, 12, 13, 14]: ptsy, ptsx = person_conf_multi[i, j, :] # print(ptsx, ptsy, ':', min_x, min_y) if ptsy > 0.0 and ptsx > 0.0: player_parts.update({names[j]: {'x': ptsx, 'y': ptsy}}) x = x + 1 if min_x > ptsx: min_x = ptsx min_y = ptsy # print('\n', min_x, min_y) teamClassificationFeatures_upper = [] teamClassificationFeatures_lower = [] allXCoords_upper = [] allYCoords_upper = [] allXCoords_lower = [] allYCoords_lower = [] leftUpperPointFound = False rightUpperPointFound = False leftMidPointFound = False rightMidPointFound = False leftLowerPointFound = False rightLowerPointFound = False if 'rightShoulder' in list(player_parts.keys()): allXCoords_upper.append(int(player_parts['rightShoulder']['x'])) allYCoords_upper.append(int(player_parts['rightShoulder']['y'])) rightUpperPointFound = True if 'leftShoulder' in list(player_parts.keys()): allXCoords_upper.append(int(player_parts['leftShoulder']['x'])) allYCoords_upper.append(int(player_parts['leftShoulder']['y'])) leftUpperPointFound = True if 'rightHip' in list(player_parts.keys()): allXCoords_upper.append(int(player_parts['rightHip']['x'])) allYCoords_upper.append(int(player_parts['rightHip']['y'])) allXCoords_lower.append(int(player_parts['rightHip']['x'])) allYCoords_lower.append(int(player_parts['rightHip']['y'])) rightMidpointFound = True if 'leftHip' in list(player_parts.keys()): allXCoords_upper.append(int(player_parts['leftHip']['x'])) allYCoords_upper.append(int(player_parts['leftHip']['y'])) allXCoords_lower.append(int(player_parts['leftHip']['x'])) allYCoords_lower.append(int(player_parts['leftHip']['y'])) leftMidPointFound = True if 'rightKnee' in list(player_parts.keys()): allXCoords_lower.append(int(player_parts['rightKnee']['x'])) allYCoords_lower.append(int(player_parts['rightKnee']['y'])) rightLowerPointFound = True if 'leftKnee' in list(player_parts.keys()): allXCoords_lower.append(int(player_parts['leftKnee']['x'])) allYCoords_lower.append(int(player_parts['leftKnee']['y'])) leftLowerPointFound = True allXCoords_upper.sort() allYCoords_upper.sort() allXCoords_lower.sort() allYCoords_lower.sort() if (len(allXCoords_upper) < 3) or (len(allXCoords_lower) < 3): continue if mask[int(min_x), int(min_y)] == 0: continue # Upper body features if len(allXCoords_upper) == 4: if 0 > allXCoords_upper[0]: allXCoords_upper[0] = 0 if 0 > allXCoords_upper[3]: allXCoords_upper[3] = 0 if 0 > allXCoords_upper[0] and 0 > allXCoords_upper[3]: allXCoords_upper[3] = 2 if 0 > allYCoords_upper[0]: allYCoords_upper[0] = 0 if 0 > allYCoords_upper[3]: allYCoords_upper[3] = 0 if 0 > allYCoords_upper[0] and 0 > allYCoords_upper[3]: allYCoords_upper[3] = 2 allXCoords_upper.sort() allYCoords_upper.sort() allColors = image2[allXCoords_upper[0]:allXCoords_upper[3], allYCoords_upper[0]:allYCoords_upper[3]] image = cv2.rectangle( image, (allYCoords_upper[0], allXCoords_upper[0]), (int(allYCoords_upper[3] - abs(allYCoords_upper[3] - allYCoords_upper[0]) / 2), allXCoords_upper[3]), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures_upper.extend( [np.int64(x[0]) for x in hist]) teamClassificationFeatures_upper = [ float(i) / max(teamClassificationFeatures_upper) for i in teamClassificationFeatures_upper ] allTeamClassificationFeatures_upper.append( teamClassificationFeatures_upper) if len(allXCoords_upper) == 3: if 0 > allXCoords_upper[0]: allXCoords_upper[0] = 0 if 0 > allXCoords_upper[2]: allXCoords_upper[2] = 0 if 0 > allXCoords_upper[0] and 0 > allXCoords_upper[2]: allXCoords_upper[2] = 10 if 0 > allYCoords_upper[0]: allYCoords_upper[0] = 0 if 0 > allYCoords_upper[2]: allYCoords_upper[2] = 0 if 0 > allYCoords_upper[0] and 0 > allYCoords_upper[2]: allYCoords_upper[2] = 10 allXCoords_upper.sort() allYCoords_upper.sort() allColors = image2[allXCoords_upper[0]:allXCoords_upper[2], allYCoords_upper[0]:allYCoords_upper[2]] image = cv2.rectangle( image, (allYCoords_upper[0], allXCoords_upper[0]), (int(allYCoords_upper[3] - abs(allYCoords_upper[2] - allYCoords_upper[0]) / 2), allXCoords_upper[2]), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures_upper.extend( [np.int64(x[0]) for x in hist]) teamClassificationFeatures_upper = [ float(i) / max(teamClassificationFeatures_upper) for i in teamClassificationFeatures_upper ] allTeamClassificationFeatures_upper.append( teamClassificationFeatures_upper) if len(allXCoords_upper) == 2: # if (leftUpperPointFound and rightMidPointFound) or (leftMidPointFound and rightUpperPointFound): if 0 > allXCoords_upper[0]: allXCoords_upper[0] = 0 if 0 > allXCoords_upper[1]: allXCoords_upper[1] = 0 if 0 > allXCoords_upper[0] and 0 > allXCoords_upper[1]: allXCoords_upper[1] = 2 if 0 > allYCoords_upper[0]: allYCoords_upper[0] = 0 if 0 > allYCoords_upper[1]: allYCoords_upper[1] = 0 if 0 > allYCoords_upper[0] and 0 > allYCoords_upper[1]: allYCoords_upper[1] = 2 allXCoords_upper.sort() allYCoords_upper.sort() allColors = image2[allXCoords_upper[0]:allXCoords_upper[1], allYCoords_upper[0]:allYCoords_upper[1]] image = cv2.rectangle( image, (allYCoords_upper[0], allXCoords_upper[0]), (int(allYCoords_upper[1] - abs(allYCoords_upper[3] - allYCoords_upper[0]) / 2), allXCoords_upper[1]), (255, 0, 0), 1) '''reqInLinePoints = bresenham_march(image2, [allXCoords[0], allYCoords[0]], [allXCoords[1], allYCoords[1]]) allColors = [] for point in reqInLinePoints: allColors.append(point[1].reshape(1,3)) reqInLinePoints = bresenham_march(image2, [allXCoords[0], allYCoords[1]], [allXCoords[1], allYCoords[0]]) for point in reqInLinePoints: allColors.append(point[1].reshape(1,3))''' chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures_upper.extend( [np.int64(x[0]) for x in hist]) teamClassificationFeatures_upper = [ float(i) / max(teamClassificationFeatures_upper) for i in teamClassificationFeatures_upper ] allTeamClassificationFeatures_upper.append( teamClassificationFeatures_upper) # Lower body features if len(allXCoords_lower) == 4: if 0 > allXCoords_lower[0]: allXCoords_lower[0] = 0 if 0 > allXCoords_lower[3]: allXCoords_lower[3] = 0 if 0 > allXCoords_lower[0] and 0 > allXCoords_lower[3]: allXCoords_lower[3] = 2 if 0 > allYCoords_lower[0]: allYCoords_lower[0] = 0 if 0 > allYCoords_lower[3]: allYCoords_lower[3] = 0 if 0 > allYCoords_lower[0] and 0 > allYCoords_lower[3]: allYCoords_lower[3] = 2 allXCoords_lower.sort() allYCoords_lower.sort() allColors = image2[allXCoords_lower[0]:allXCoords_lower[3], allYCoords_lower[0]:allYCoords_lower[3]] image = cv2.rectangle(image, (allYCoords_lower[0], allXCoords_lower[0]), (allYCoords_lower[3], allXCoords_lower[3]), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures_lower.extend( [np.int64(x[0]) for x in hist]) teamClassificationFeatures_lower = [ float(i) / max(teamClassificationFeatures_lower) for i in teamClassificationFeatures_lower ] allTeamClassificationFeatures_lower.append( teamClassificationFeatures_lower) if len(allXCoords_lower) == 3: if 0 > allXCoords_lower[0]: allXCoords_lower[0] = 0 if 0 > allXCoords_lower[2]: allXCoords_lower[2] = 0 if 0 > allXCoords_lower[0] and 0 > allXCoords_lower[2]: allXCoords_lower[2] = 10 if 0 > allYCoords_lower[0]: allYCoords_lower[0] = 0 if 0 > allYCoords_lower[2]: allYCoords_lower[2] = 0 if 0 > allYCoords_lower[0] and 0 > allYCoords_lower[2]: allYCoords_lower[2] = 10 allXCoords_lower.sort() allYCoords_lower.sort() allColors = image2[allXCoords_lower[0]:allXCoords_lower[2], allYCoords_lower[0]:allYCoords_lower[2]] image = cv2.rectangle(image, (allYCoords_lower[0], allXCoords_lower[0]), (allYCoords_lower[2], allXCoords_lower[2]), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures_lower.extend( [np.int64(x[0]) for x in hist]) teamClassificationFeatures_lower = [ float(i) / max(teamClassificationFeatures_lower) for i in teamClassificationFeatures_lower ] allTeamClassificationFeatures_lower.append( teamClassificationFeatures_lower) if len(allXCoords_lower) == 2: # if (leftMidPointFound and rightLowerPointFound) or (leftLowerPointFound and rightMidPointFound): if 0 > allXCoords_lower[0]: allXCoords_lower[0] = 0 if 0 > allXCoords_lower[1]: allXCoords_lower[1] = 0 if 0 > allXCoords_lower[0] and 0 > allXCoords_lower[1]: allXCoords_lower[1] = 2 if 0 > allYCoords_lower[0]: allYCoords_lower[0] = 0 if 0 > allYCoords_lower[1]: allYCoords_lower[1] = 0 if 0 > allYCoords_lower[0] and 0 > allYCoords_lower[1]: allYCoords_lower[1] = 2 allXCoords_lower.sort() allYCoords_lower.sort() allColors = image2[allXCoords_lower[0]:allXCoords_lower[1], allYCoords_lower[0]:allYCoords_lower[1]] image = cv2.rectangle(image, (allYCoords_lower[0], allXCoords_lower[0]), (allYCoords_lower[1], allXCoords_lower[1]), (255, 0, 0), 1) '''reqInLinePoints = bresenham_march(image2, [allXCoords[0], allYCoords[0]], [allXCoords[1], allYCoords[1]]) allColors = [] for point in reqInLinePoints: allColors.append(point[1].reshape(1,3)) reqInLinePoints = bresenham_march(image2, [allXCoords[0], allYCoords[1]], [allXCoords[1], allYCoords[0]]) for point in reqInLinePoints: allColors.append(point[1].reshape(1,3))''' chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures_lower.extend( [np.int64(x[0]) for x in hist]) teamClassificationFeatures_lower = [ float(i) / max(teamClassificationFeatures_lower) for i in teamClassificationFeatures_lower ] allTeamClassificationFeatures_lower.append( teamClassificationFeatures_lower) '''distance_keep = np.sqrt(np.sum(np.square(np.array(teamClassificationFeatures) - np.array(keeper)))) if distance_keep < team_class_num: team_class_num = distance_keep keeper_id = i distance_ref = np.sqrt(np.sum(np.square(np.array(teamClassificationFeatures) - np.array(referee)))) if distance_ref < ref_num: ref_ids.append(i)''' temp = None all_info.append([i, temp, player_parts, [min_x, min_y]]) teamClassifier_upper = KMeans(n_clusters=2) teamLabels_upper = teamClassifier_upper.fit_predict( allTeamClassificationFeatures_upper) teamClassifierDBSCAN_upper = DBSCAN( eps=0.5, min_samples=2, metric='euclidean').fit(allTeamClassificationFeatures_upper) teamLabelsDBSCAN_upper = teamClassifierDBSCAN_upper.labels_ teamClassifier_lower = KMeans(n_clusters=2) teamLabels_lower = teamClassifier_lower.fit_predict( allTeamClassificationFeatures_lower) teamClassifierDBSCAN_lower = DBSCAN( eps=0.5, min_samples=2, metric='euclidean').fit(allTeamClassificationFeatures_lower) teamLabelsDBSCAN_lower = teamClassifierDBSCAN_lower.labels_ isKeeperFound = False isRefFound = False # Mapping clusters # Create cost matrix CM = np.zeros(shape=(2, 2)) for i in range(2): for j in range(2): CM[i][j] = calc_dist_between_points(teamClassifier_upper.cluster_centers_[i], \ teamClassifier_lower.cluster_centers_[j], \ len(teamClassifier_lower.cluster_centers_)) # Hungarian algorithm row_ind, col_ind = linear_sum_assignment(CM) upper_to_lower_map = dict(zip(row_ind, col_ind)) lower_to_upper_map = dict(zip(col_ind, row_ind)) for player_itr in range(len(all_info)): dist_from_centroid_upper = np.sqrt(np.sum(np.square(np.array(allTeamClassificationFeatures_upper[player_itr]) - \ np.array(teamClassifier_upper.cluster_centers_[teamLabels_upper[player_itr]])))) dist_from_centroid_lower = np.sqrt(np.sum(np.square(np.array(allTeamClassificationFeatures_lower[player_itr]) - \ np.array(teamClassifier_lower.cluster_centers_[teamLabels_lower[player_itr]])))) if (teamLabelsDBSCAN_upper[player_itr] == -1) or (teamLabelsDBSCAN_lower[player_itr] == -1): all_info[player_itr][1] = -1 else: all_info[player_itr][1] = teamLabels_upper[ player_itr] * 10 + teamLabels_lower[player_itr] '''elif (upper_to_lower_map[teamLabels_upper[player_itr]] == teamLabels_lower[player_itr]): all_info[player_itr][1] = teamLabels_upper[player_itr] else: if dist_from_centroid_upper < dist_from_centroid_lower: all_info[player_itr][1] = teamLabels_upper[player_itr] else: all_info[player_itr][1] = lower_to_upper_map[teamLabels_lower[player_itr]]''' '''if all_info[player_itr][0] == keeper_id: isKeeperFound = True all_info[player_itr][1] = 2''' '''if all_info[player_itr][1] in ref_ids: isRefFound = True all_info[player_itr][1] = 3''' print("ref:", ref_ids, "keeper:", keeper_id, "score", team_class_num) return all_info, isKeeperFound, isRefFound, image
def video2posevideo(video_name): time_start = time.clock() import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread, imsave from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt from PIL import Image, ImageDraw, ImageFont font = ImageFont.truetype("./font/NotoSans-Bold.ttf", 24) import random cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ################ video = read_video(video_name) video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3 pose_frame_list = [] point_r = 3 # radius of points point_min = 10 # threshold of points - If there are more than point_min points in person, we define he/she is REAL PERSON part_min = 3 # threshold of parts - If there are more than part_min parts in person, we define he/she is REAL PERSON / part means head, arm and leg point_num = 17 # There are 17 points in 1 person def ellipse_set(person_conf_multi, people_i, point_i): return (person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r) def line_set(person_conf_multi, people_i, point_i, point_j): return (person_conf_multi[people_i][point_i][0], person_conf_multi[people_i][point_i][1], person_conf_multi[people_i][point_j][0], person_conf_multi[people_i][point_j][1]) def draw_ellipse_and_line(draw, person_conf_multi, people_i, a, b, c, point_color): draw.ellipse(ellipse_set(person_conf_multi, people_i, a), fill=point_color) draw.ellipse(ellipse_set(person_conf_multi, people_i, b), fill=point_color) draw.ellipse(ellipse_set(person_conf_multi, people_i, c), fill=point_color) draw.line(line_set(person_conf_multi, people_i, a, b), fill=point_color, width=5) draw.line(line_set(person_conf_multi, people_i, b, c), fill=point_color, width=5) for i in range(0, video_frame_number): image = video.get_frame(i/video.fps) ###################### image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) # print('person_conf_multi: ') # print(type(person_conf_multi)) # print(person_conf_multi) # Add library to save image image_img = Image.fromarray(image) # Save image with points of pose draw = ImageDraw.Draw(image_img) people_num = 0 people_real_num = 0 people_part_num = 0 people_num = person_conf_multi.size / (point_num * 2) people_num = int(people_num) print('people_num: ' + str(people_num)) for people_i in range(0, people_num): point_color_r = random.randrange(0, 256) point_color_g = random.randrange(0, 256) point_color_b = random.randrange(0, 256) point_color = (point_color_r, point_color_g, point_color_b, 255) point_list = [] point_count = 0 point_i = 0 # index of points part_count = 0 # count of parts in THAT person # To find rectangle which include that people - list of points x, y coordinates people_x = [] people_y = [] for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data point_count = point_count + 1 point_list.append(point_i) # Draw each parts if (5 in point_list) and (7 in point_list) and (9 in point_list): # Draw left arm draw_ellipse_and_line(draw, person_conf_multi, people_i, 5, 7, 9, point_color) part_count = part_count + 1 if (6 in point_list) and (8 in point_list) and (10 in point_list): # Draw right arm draw_ellipse_and_line(draw, person_conf_multi, people_i, 6, 8, 10, point_color) part_count = part_count + 1 if (11 in point_list) and (13 in point_list) and (15 in point_list): # Draw left leg draw_ellipse_and_line(draw, person_conf_multi, people_i, 11, 13, 15, point_color) part_count = part_count + 1 if (12 in point_list) and (14 in point_list) and (16 in point_list): # Draw right leg draw_ellipse_and_line(draw, person_conf_multi, people_i, 12, 14, 16, point_color) part_count = part_count + 1 if point_count >= point_min: people_real_num = people_real_num + 1 for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data draw.ellipse(ellipse_set(person_conf_multi, people_i, point_i), fill=point_color) people_x.append(person_conf_multi[people_i][point_i][0]) people_y.append(person_conf_multi[people_i][point_i][1]) # Draw rectangle which include that people draw.rectangle([min(people_x), min(people_y), max(people_x), max(people_y)], fill=point_color, outline=5) if part_count >= part_min: people_part_num = people_part_num + 1 draw.text((0, 0), 'People(by point): ' + str(people_real_num) + ' (threshold = ' + str(point_min) + ')', (0,0,0), font=font) draw.text((0, 32), 'People(by line): ' + str(people_part_num) + ' (threshold = ' + str(part_min) + ')', (0,0,0), font=font) draw.text((0, 64), 'Frame: ' + str(i) + '/' + str(video_frame_number), (0,0,0), font=font) draw.text((0, 96), 'Total time required: ' + str(round(time.clock() - time_start, 1)) + 'sec', (0,0,0)) print('people_real_num: ' + str(people_real_num)) print('people_part_num: ' + str(people_part_num)) print('frame: ' + str(i)) image_img_numpy = np.asarray(image_img) pose_frame_list.append(image_img_numpy) video_pose = ImageSequenceClip(pose_frame_list, fps=video.fps) video_pose.write_videofile("testset/" + video_name + "_pose.mp4", fps=video.fps) print("Time(s): " + str(time.clock() - time_start))
sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file file_name_ext = sys.argv[1] ## example: test_single_03.png file_name = file_name_ext.split('.')[0] ## example: test_single_03 file_name_input = 'testset/' + file_name_ext image = imread(file_name_input, mode='RGB') image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) print('person_conf_multi: ') print(type(person_conf_multi)) print(person_conf_multi) # img = np.copy(image)
def video2poseframe(video_name): import numpy as np sys.path.append(os.path.dirname(__file__) + "/../") from scipy.misc import imread, imsave from config import load_config from dataset.factory import create as create_dataset from nnet import predict from util import visualize from dataset.pose_dataset import data_to_input from multiperson.detections import extract_detections from multiperson.predict import SpatialModel, eval_graph, get_person_conf_multicut from multiperson.visualize import PersonDraw, visualize_detections import matplotlib.pyplot as plt from PIL import Image, ImageDraw import random cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) ################ video = read_video(video_name) video_frame_number = int(video.duration * video.fps) ## duration: second / fps: frame per second video_frame_ciphers = math.ceil(math.log(video_frame_number, 10)) ## ex. 720 -> 3 if not os.path.exists('testset/' + video_name): os.makedirs('testset/' + video_name) for i in range(0, video_frame_number): image = video.get_frame(i/video.fps) ###################### image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) print('person_conf_multi: ') print(type(person_conf_multi)) print(person_conf_multi) # Add library to save image image_img = Image.fromarray(image) # Save image with points of pose draw = ImageDraw.Draw(image_img) people_num = 0 point_num = 17 print('person_conf_multi.size: ') print(person_conf_multi.size) people_num = person_conf_multi.size / (point_num * 2) people_num = int(people_num) print('people_num: ') print(people_num) point_i = 0 # index of points point_r = 5 # radius of points people_real_num = 0 for people_i in range(0, people_num): point_color_r = random.randrange(0, 256) point_color_g = random.randrange(0, 256) point_color_b = random.randrange(0, 256) point_color = (point_color_r, point_color_g, point_color_b, 255) point_count = 0 for point_i in range(0, point_num): if person_conf_multi[people_i][point_i][0] + person_conf_multi[people_i][point_i][1] != 0: # If coordinates of point is (0, 0) == meaningless data point_count = point_count + 1 if point_count > 5: # If there are more than 5 point in person, we define he/she is REAL PERSON people_real_num = people_real_num + 1 for point_i in range(0, point_num): draw.ellipse((person_conf_multi[people_i][point_i][0] - point_r, person_conf_multi[people_i][point_i][1] - point_r, person_conf_multi[people_i][point_i][0] + point_r, person_conf_multi[people_i][point_i][1] + point_r), fill=point_color) print('people_real_num: ') print(people_real_num) video_name_result = 'testset/' + video_name + '/frame_pose_' + str(i).zfill(video_frame_ciphers) + '.jpg' image_img.save(video_name_result, "JPG")
file_name1 = 'demo/try2.jpeg' image = imread(file_name, 0) image2 = imread(file_name1, 0) cap = cv2.VideoCapture('demo/seed.mp4') i = 0 cap1 = cv2.VideoCapture('demo/comp.mp4') while True: if i % 8 == 0: ret, orig_frame = cap.read() ret2, frame2 = cap1.read() frame = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30) frame2 = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30) image = frame image2 = frame2 image_batch = data_to_input(frame2) image_batch2 = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) outputs_np2 = sess.run(outputs, feed_dict={inputs: image_batch2}) scmap, locref, pairwise_diff = predict.extract_cnn_output( outputs_np, cfg, dataset.pairwise_stats) scmap2, locref2, pairwise_diff2 = predict.extract_cnn_output( outputs_np2, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) detections2 = extract_detections(cfg, scmap2, locref2, pairwise_diff2) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(
def return_pose(image , image2 , keeper , referee): keeper_id = -1 ref_ids= [] team_class_num = 0.25 # sys.maxsize ref_num = 0.10 contours, mask = get_contours(image) all_info = [] image_batch = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) cl = [(255,0,0),(0,0,255),(0,255,0),(255,255,0),(0,255,255),(255,0,255),(0,0,0)] names = {6:'rightShoulder', 5:'leftShoulder', 11:'leftHip', 12:'rightHip',14:'rightKnee',13:'leftKnee',16:'rightAnkle',15:'leftAnkle'} allTeamClassificationFeatures = [] allTeamClassificationFeaturesDBSCAN = [] # TODO - Use mid-back to knee features for i in range(len(person_conf_multi)): x = 0 player_parts = {} min_x = image2.shape[0] - 1 min_y = image2.shape[1] - 1 for j in range(17): printFlag = True if j in [5, 6, 15, 16, 11, 12, 13, 14]: ptsy, ptsx = person_conf_multi[i,j,:] if ptsy > 0.0 and ptsx > 0.0: player_parts.update({names[j] : {'x' : ptsx , 'y' :ptsy} }) x = x + 1 if min_x > ptsx: min_x = ptsx min_y = ptsy teamClassificationFeatures = [] teamClassificationFeaturesDBSCAN = [] allXCoords = [] allYCoords = [] leftUpperPointFound = False rightUpperPointFound = False leftLowerPointFound = False rightLowerPointFound = False hipPointsFound = False if 'rightShoulder' in list(player_parts.keys()): allXCoords.append(int(player_parts['rightShoulder']['x'])) allYCoords.append(int(player_parts['rightShoulder']['y'])) rightUpperPointFound = True if 'leftShoulder' in list(player_parts.keys()): allXCoords.append(int(player_parts['leftShoulder']['x'])) allYCoords.append(int(player_parts['leftShoulder']['y'])) leftUpperPointFound = True if 'rightKnee' in list(player_parts.keys()): allXCoords.append(int(player_parts['rightKnee']['x'])) allYCoords.append(int(player_parts['rightKnee']['y'])) rightLowerPointFound = True if 'leftKnee' in list(player_parts.keys()): allXCoords.append(int(player_parts['leftKnee']['x'])) allYCoords.append(int(player_parts['leftKnee']['y'])) leftLowerPointFound = True allXCoords.sort() allYCoords.sort() if len(allXCoords) < 3: continue hipPointsFound = 'rightHip' in list(player_parts.keys()) and 'leftHip' in list(player_parts.keys()) if hipPointsFound: hipPointsFound &= abs(int(player_parts['leftHip']['y']) - int(player_parts['rightHip']['y'])) > 5 if hipPointsFound: allHipXCoords = [] allHipYCoords = [] allHipXCoords.append(int(player_parts['rightHip']['x'])) allHipYCoords.append(int(player_parts['rightHip']['y'])) allHipXCoords.append(int(player_parts['leftHip']['x'])) allHipYCoords.append(int(player_parts['leftHip']['y'])) allHipXCoords.sort() allHipYCoords.sort() if mask[int(min_x) , int(min_y)] == 0: continue if len(allXCoords) == 4: if 0 > allXCoords[0]: allXCoords[0] = 0 if 0 > allXCoords[3]: allXCoords[3] = 0 if 0 > allXCoords[0] and 0 > allXCoords[3]: allXCoords[3] = 2 if 0 > allYCoords[0]: allYCoords[0] = 0 if 0 > allYCoords[3]: allYCoords[3] = 0 if 0 > allYCoords[0] and 0 > allYCoords[3]: allYCoords[3] = 2 allXCoords.sort() allYCoords.sort() newXLower = int(allXCoords[0]+abs(allXCoords[3]-allXCoords[0])*0.4) newXUpper = int(allXCoords[3]-abs(allXCoords[3]-allXCoords[0])*0.2) if hipPointsFound: newYLower = int(allHipYCoords[0]) newYUpper = int(allHipYCoords[-1]) else: newYLower = int(allYCoords[0]) newYUpper = int(allYCoords[3]) allColors = image2[newXLower:newXUpper, newYLower:newYUpper] allColorsDBSCAN = image2[allXCoords[0]:allXCoords[-1], allYCoords[0]:allYCoords[-1]] image = cv2.rectangle(image, (newYLower, newXLower), (newYUpper, newXUpper), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [64], [0, 256]) teamClassificationFeatures.extend([np.int64(x[0]) for x in hist]) teamClassificationFeatures = [float(i)/max(teamClassificationFeatures) for i in teamClassificationFeatures] allTeamClassificationFeatures.append(teamClassificationFeatures) chans = cv2.split(allColorsDBSCAN) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeaturesDBSCAN.extend([np.int64(x[0]) for x in hist]) teamClassificationFeaturesDBSCAN = [float(i)/max(teamClassificationFeaturesDBSCAN) for i in teamClassificationFeaturesDBSCAN] allTeamClassificationFeaturesDBSCAN.append(teamClassificationFeaturesDBSCAN) if len(allXCoords) == 3: if 0 > allXCoords[0]: allXCoords[0] = 0 if 0 > allXCoords[2]: allXCoords[2] = 0 if 0 > allXCoords[0] and 0 > allXCoords[2]: allXCoords[2] = 10 if 0 > allYCoords[0]: allYCoords[0] = 0 if 0 > allYCoords[2]: allYCoords[2] = 0 if 0 > allYCoords[0] and 0 > allYCoords[2]: allYCoords[2] = 10 allXCoords.sort() allYCoords.sort() newXLower = int(allXCoords[0]+abs(allXCoords[2]-allXCoords[0])*0.4) newXUpper = int(allXCoords[2]-abs(allXCoords[2]-allXCoords[0])*0.2) if hipPointsFound: newYLower = int(allHipYCoords[0]) newYUpper = int(allHipYCoords[-1]) else: newYLower = int(allYCoords[0]) newYUpper = int(allYCoords[2]) allColors = image2[newXLower:newXUpper, newYLower:newYUpper] allColorsDBSCAN = image2[allXCoords[0]:allXCoords[-1], allYCoords[0]:allYCoords[-1]] image = cv2.rectangle(image, (newYLower, newXLower), (newYUpper, newXUpper), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [64], [0, 256]) teamClassificationFeatures.extend([np.int64(x[0]) for x in hist]) teamClassificationFeatures = [float(i)/max(teamClassificationFeatures) for i in teamClassificationFeatures] allTeamClassificationFeatures.append(teamClassificationFeatures) chans = cv2.split(allColorsDBSCAN) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeaturesDBSCAN.extend([np.int64(x[0]) for x in hist]) teamClassificationFeaturesDBSCAN = [float(i)/max(teamClassificationFeaturesDBSCAN) for i in teamClassificationFeaturesDBSCAN] allTeamClassificationFeaturesDBSCAN.append(teamClassificationFeaturesDBSCAN) if len(allXCoords) == 2: # if (leftUpperPointFound and rightLowerPointFound) or (leftLowerPointFound and rightUpperPointFound): if 0 > allXCoords[0]: allXCoords[0] = 0 if 0 > allXCoords[1]: allXCoords[1] = 0 if 0 > allXCoords[0] and 0 > allXCoords[1]: allXCoords[1] = 2 if 0 > allYCoords[0]: allYCoords[0] = 0 if 0 > allYCoords[1]: allYCoords[1] = 0 if 0 > allYCoords[0] and 0 > allYCoords[1]: allYCoords[1] = 2 allXCoords.sort() allYCoords.sort() newXLower = int(allXCoords[0]+abs(allXCoords[1]-allXCoords[0])*0.35) newXUpper = int(allXCoords[1]*0.9) allColors = image2[newXLower:newXUpper, allYCoords[0]:allYCoords[1]] allColorsDBSCAN = image2[allXCoords[0]:allXCoords[1], allYCoords[0]:allYCoords[1]] image = cv2.rectangle(image, (allYCoords[0], int(allXCoords[0]+abs(allXCoords[1]-allXCoords[0])*0.35)), (allYCoords[1], allXCoords[1]), (255, 0, 0), 1) chans = cv2.split(allColors) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeatures.extend([np.int64(x[0]) for x in hist]) teamClassificationFeatures = [float(i)/max(teamClassificationFeatures) for i in teamClassificationFeatures] allTeamClassificationFeatures.append(teamClassificationFeatures) chans = cv2.split(allColorsDBSCAN) colors = ("b", "g", "r") for (chan, color) in zip(chans, colors): hist = cv2.calcHist([chan], [0], None, [3], [0, 256]) teamClassificationFeaturesDBSCAN.extend([np.int64(x[0]) for x in hist]) teamClassificationFeaturesDBSCAN = [float(i)/max(teamClassificationFeaturesDBSCAN) for i in teamClassificationFeaturesDBSCAN] allTeamClassificationFeaturesDBSCAN.append(teamClassificationFeaturesDBSCAN) temp = None all_info.append([i, temp, player_parts, [min_x,min_y]]) teamClassifier = KMeans(n_clusters = 2) teamLabels = teamClassifier.fit_predict(allTeamClassificationFeatures) teamClassifierDBSCAN = DBSCAN(eps=0.5, min_samples=2, metric='euclidean').fit(allTeamClassificationFeaturesDBSCAN) teamLabelsDBSCAN = teamClassifierDBSCAN.labels_ isKeeperFound = False isRefFound = False dist_from_centroids = [] for player_itr in range(len(all_info)): dist_from_centroid = np.sqrt(np.sum(np.square(np.array(allTeamClassificationFeatures[player_itr]) - \ np.array(teamClassifier.cluster_centers_[teamLabels[player_itr]])))) dist_from_centroids.append(dist_from_centroid) normed_dist_from_centroids = [float(i)/sum(dist_from_centroids) for i in dist_from_centroids] print('normes dist', normed_dist_from_centroids) for player_itr in range(len(all_info)): if (teamLabelsDBSCAN[player_itr] == -1): all_info[player_itr][1] = -1 else: all_info[player_itr][1] = teamLabels[player_itr] print("ref:", ref_ids,"keeper:", keeper_id, "score", team_class_num) return all_info, isKeeperFound, isRefFound, image
def main(): start_time=time.time() print("main hai") tf.reset_default_graph() cfg = load_config("demo/pose_cfg_multi.yaml") dataset = create_dataset(cfg) sm = SpatialModel(cfg) sm.load() draw_multi = PersonDraw() # Load and setup CNN part detector sess, inputs, outputs = predict.setup_pose_prediction(cfg) # Read image from file dir=os.listdir("stick") k=0 cap=cv2.VideoCapture(0) i=0 while (cap.isOpened()): if i%20 == 0: ret, orig_frame= cap.read() if ret==True: frame = cv2.resize(orig_frame, (0, 0), fx=0.30, fy=0.30) image= frame sse=0 mse=0 image_batch = data_to_input(frame) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output(outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph(sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) img = np.copy(image) #coor = PersonDraw.draw() visim_multi = img.copy() co1=draw_multi.draw(visim_multi, dataset, person_conf_multi) plt.imshow(visim_multi) plt.show() visualize.waitforbuttonpress() #print("this is draw : ", co1) if k==1: qwr = np.zeros((1920,1080,3), np.uint8) cv2.line(qwr, co1[5][0], co1[5][1],(255,0,0),3) cv2.line(qwr, co1[7][0], co1[7][1],(255,0,0),3) cv2.line(qwr, co1[6][0], co1[6][1],(255,0,0),3) cv2.line(qwr, co1[4][0], co1[4][1],(255,0,0),3) cv2.line(qwr, co1[9][0], co1[9][1],(255,0,0),3) cv2.line(qwr, co1[11][0], co1[11][1],(255,0,0),3) cv2.line(qwr, co1[8][0], co1[8][1],(255,0,0),3) cv2.line(qwr, co1[10][0], co1[10][1],(255,0,0),3) # In[9]: cv2.imshow('r',qwr) qwr2="stick/frame"+str(k)+".jpg" qw1 = cv2.cvtColor(qwr, cv2.COLOR_BGR2GRAY) qw2= cv2.cvtColor(qwr2, cv2.COLOR_BGR2GRAY) fig = plt.figure("Images") images = ("Original", qw1), ("Contrast", qw2) for (i, (name, image)) in enumerate(images): ax = fig.add_subplot(1, 3, i + 1) ax.set_title(name) plt.imshow(hash(tuple(image))) # compare the images s,m=compare_images(qw1, qw2, "Image1 vs Image2") k+=1 sse=s mse=m else: break elapsed= time.time()-start_time #print("sse score : ", sse) print("Mean squared error : ", elapsed/100) cap.release() cv2.destroyAllWindows()
def process_body_image(req): process_width = int(rospy.get_param("~process_width", 320)) process_height = int(rospy.get_param("~process_height", 240)) try: cv_image = bridge.imgmsg_to_cv2(req.image, "bgr8") except CvBridgeError as e: rospy.logerr(e) height, width, channels = cv_image.shape resize_rio_width = 1.0 * width / process_width resize_rio_height = 1.0 * height / process_height resized_cv_image = cv2.resize(cv_image, (process_width, process_height)) image_batch = data_to_input(resized_cv_image) outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, pairwise_diff = predict.extract_cnn_output( outputs_np, cfg, dataset.pairwise_stats) detections = extract_detections(cfg, scmap, locref, pairwise_diff) unLab, pos_array, unary_array, pwidx_array, pw_array = eval_graph( sm, detections) person_conf_multi = get_person_conf_multicut(sm, unLab, unary_array, pos_array) num_people = person_conf_multi.shape[0] conf_min_count = rospy.get_param("conf_min_count", 5) # 至少三个点才被认为能够接受 bodys_response = BodyPoseResponse() for pidx in range(num_people): if np.sum(person_conf_multi[pidx, :, 0] > 0) < conf_min_count: continue body_rect = person_conf_multi[pidx].tolist() body_rect_filterd = list( filter(lambda x: x[0] > 0 and x[1] > 0, body_rect)) if len(body_rect_filterd) < conf_min_count: continue body_info = BodyInfo() body_info.nose = [ body_rect[0][0] * resize_rio_width, body_rect[0][1] * resize_rio_height ] body_info.right_eye = [ body_rect[1][0] * resize_rio_width, body_rect[1][1] * resize_rio_height ] body_info.left_eye = [ body_rect[2][0] * resize_rio_width, body_rect[2][1] * resize_rio_height ] body_info.right_ear = [ body_rect[3][0] * resize_rio_width, body_rect[3][1] * resize_rio_height ] body_info.left_ear = [ body_rect[4][0] * resize_rio_width, body_rect[4][1] * resize_rio_height ] body_info.right_arm_top = [ body_rect[5][0] * resize_rio_width, body_rect[5][1] * resize_rio_height ] body_info.left_arm_top = [ body_rect[6][0] * resize_rio_width, body_rect[6][1] * resize_rio_height ] body_info.right_arm_middle = [ body_rect[7][0] * resize_rio_width, body_rect[7][1] * resize_rio_height ] body_info.left_arm_middle = [ body_rect[8][0] * resize_rio_width, body_rect[8][1] * resize_rio_height ] body_info.right_arm_bottom = [ body_rect[9][0] * resize_rio_width, body_rect[9][1] * resize_rio_height ] body_info.left_arm_bottom = [ body_rect[10][0] * resize_rio_width, body_rect[10][1] * resize_rio_height ] body_info.right_leg_top = [ body_rect[11][0] * resize_rio_width, body_rect[11][1] * resize_rio_height ] body_info.left_leg_top = [ body_rect[12][0] * resize_rio_width, body_rect[12][1] * resize_rio_height ] body_info.right_leg_middle = [ body_rect[13][0] * resize_rio_width, body_rect[13][1] * resize_rio_height ] body_info.left_leg_middle = [ body_rect[14][0] * resize_rio_width, body_rect[14][1] * resize_rio_height ] body_info.right_leg_bottom = [ body_rect[15][0] * resize_rio_width, body_rect[15][1] * resize_rio_height ] body_info.left_leg_bottom = [ body_rect[16][0] * resize_rio_width, body_rect[16][1] * resize_rio_height ] bodys_response.body_poses.append(body_info) return bodys_response
# Uncomment this line and comment line before for development purposes (increase time execution)£ # # pose_image_resources = "../pose_images/acc/*.jpeg" # 26 samples 6 testing set --> Score 0,767 (n_estimators=40, max_depth=20) 0,916 # pose_image_resources ="../pose_images/all_tree/*.jpeg" # Images normalization --> using resize_images.py script features = [] picture_name = [] # Read all images, call cnn model and make predictions about human main body parts for images in glob.glob(pose_image_resources_warrior): try: image_name = images.title() image = plt.imread(images) picture_name.append(image_name) image_batch: ndarray = data_to_input(image) # Compute prediction with the CNN outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person pose: ndarray = predict.argmax_pose_predict(scmap, locref, cfg.stride) # print(pose.toarr) # Visualise # visualize.show_heatmaps(cfg, image, scmap, pose) # visualize.waitforbuttonpress() features_df = list(chain.from_iterable(pose))
def get_frame_pose(frame): image_batch = data_to_input(frame) outputs_np = sess.run(outputs, feed_dict={inputs: image_batch}) scmap, locref, _ = predict.extract_cnn_output(outputs_np, cfg) # Extract maximum scoring location from the heatmap, assume 1 person return predict.argmax_pose_predict(scmap, locref, cfg.stride)