def processImage(self, color, scale): image = cv2.resize(color, None, fx=scale, fy=scale) image_pil = PIL.Image.fromarray(image) processed_image_cpu, _, __ = transforms.EVAL_TRANSFORM(image_pil, [], None) if self.cuda: processed_image = processed_image_cpu.contiguous().to("cuda", non_blocking=True) else: processed_image = processed_image_cpu.contiguous().to(non_blocking=True) fields = self.processor.fields(torch.unsqueeze(processed_image, 0))[0] keypoint_sets, _ = self.processor.keypoint_sets(fields) self.peoplelist = [] # create joint dictionary for id, p in enumerate(keypoint_sets): person = Person() person.id = id person.joints = dict() for pos, joint in enumerate(p): keypoint = KeyPoint() keypoint.i = int(joint[0] / scale) keypoint.j = int(joint[1] / scale) keypoint.score = float(joint[2]) #keypoint.x = self.points[self.width*keypoint.j+keypoint.i][0] #keypoint.y = self.points[self.width*keypoint.j+keypoint.i][1] #keyp#oint.z = self.points[self.width*keypoint.j+keypoint.i][2] person.joints[COCO_IDS[pos]] = keypoint self.peoplelist.append(person)
def processImage(self, img, scale): print("llega imagen ", img.width, scale) scale = 0.7 self.src = np.frombuffer(img.image, np.uint8).reshape(img.height, img.width, img.depth) image = cv2.resize(self.src, None, fx=scale, fy=scale) #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image_pil = PIL.Image.fromarray(image) processed_image_cpu, _, __ = transforms.EVAL_TRANSFORM(image_pil, [], None) processed_image = processed_image_cpu.contiguous().to(non_blocking=True).cuda() unsqueezed = torch.unsqueeze(processed_image, 0).to(self.args.device) fields = self.processor.fields(unsqueezed)[0] keypoint_sets, _ = self.processor.keypoint_sets(fields) #print("keyPoints", keypoint_sets) # # save in ice structure people = [] for p in keypoint_sets: joints = {} person = Person() for pos, joint in enumerate(p): keypoint = KeyPoint() keypoint.x = joint[0]/scale keypoint.y = joint[1]/scale keypoint.score = joint[2] joints[COCO_IDS[pos]] = keypoint person.id = 0 person.joints = joints people.append(person) return people
def main(): vrep.simxFinish(-1) # just in case, close all opened connections clientID = vrep.simxStart('127.0.0.1', 20000, True, True, 1300, 5) # Connect to V-REP if clientID == -1: sys.exit() print('Connected to remote API server') res, camhandle = vrep.simxGetObjectHandle(clientID, 'camara_1', vrep.simx_opmode_oneshot_wait) print(res) res, resolution, image = vrep.simxGetVisionSensorImage( clientID, camhandle, 0, vrep.simx_opmode_streaming) ############## args = cli() # load model model, _ = nets.factory_from_args(args) model = model.to(args.device) processor = decoder.factory_from_args(args, model) visualizer = None while True: res, resolution, image = vrep.simxGetVisionSensorImage( clientID, camhandle, 0, vrep.simx_opmode_buffer) if len(image) == 0: continue img = np.array(image, dtype=np.uint8) img.resize([resolution[1], resolution[0], 3]) img = np.rot90(img, 2) img = np.fliplr(img) cv2.imshow('t', img) cv2.waitKey(1) image = cv2.resize(img, None, fx=args.scale, fy=args.scale) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if visualizer is None: visualizer = Visualizer(processor, args)(image) visualizer.send(None) start = time.time() image_pil = PIL.Image.fromarray(image) processed_image_cpu, _, __ = transforms.EVAL_TRANSFORM( image_pil, [], None) processed_image = processed_image_cpu.contiguous().to( args.device, non_blocking=True) #print('preprocessing time', time.time() - start) fields = processor.fields(torch.unsqueeze(processed_image, 0))[0] visualizer.send((image, fields)) #print('loop time = {:.3}s, FPS = {:.3}'.format( # time.time() - last_loop, 1.0 / (time.time() - last_loop))) last_loop = time.time() vrep.simxFinish(clientID)
def processPifPaf(processor, img, scale, pifResult): image = cv2.resize(img, None, fx=scale, fy=scale) image_pil = PIL.Image.fromarray(image) processed_image_cpu, _, __ = transforms.EVAL_TRANSFORM(image_pil, [], None) processed_image = processed_image_cpu.contiguous().to(non_blocking=True).cuda() fields = processor.fields(torch.unsqueeze(processed_image, 0))[0] keypoint_sets, _ = processor.keypoint_sets(fields) pifResult.append(keypoint_sets)
def processImage(self, scale): image = cv2.resize(self.color, None, fx=scale, fy=scale) image_pil = PIL.Image.fromarray(image) processed_image_cpu, _, __ = transforms.EVAL_TRANSFORM( image_pil, [], None) processed_image = processed_image_cpu.contiguous().to( non_blocking=True).cuda() fields = self.processor.fields(torch.unsqueeze(processed_image, 0))[0] keypoint_sets, _ = self.processor.keypoint_sets(fields) self.peoplelist = [] # create joint dictionary for id, p in enumerate(keypoint_sets): person = Person() person.id = id person.joints = dict() for pos, joint in enumerate(p): if float(joint[2]) > 0.5: keypoint = KeyPoint() keypoint.i = int(joint[0] / scale) keypoint.j = int(joint[1] / scale) keypoint.score = float(joint[2]) ki = keypoint.i - 320 kj = 240 - keypoint.j pdepth = float(self.getDepth(keypoint.i, keypoint.j)) #keypoint.z = pdepth * self.focal / math.sqrt(ki*ki + kj*kj + self.fsquare) keypoint.z = pdepth ## camara returns Z directly. If depth use equation above keypoint.x = ki * keypoint.z / self.focal keypoint.y = kj * keypoint.z / self.focal person.joints[COCO_IDS[pos]] = keypoint #print("-------------------") self.peoplelist.append(person) # draw if self.viewimage: for name1, name2 in SKELETON_CONNECTIONS: try: joint1 = person.joints[name1] joint2 = person.joints[name2] if joint1.score > 0.5: cv2.circle(self.color, (joint1.i, joint1.j), 10, (0, 0, 255)) if joint2.score > 0.5: cv2.circle(self.color, (joint2.i, joint2.j), 10, (0, 0, 255)) if joint1.score > 0.5 and joint2.score > 0.5: cv2.line(self.color, (joint1.i, joint1.j), (joint2.i, joint2.j), (0, 255, 0), 2) except: pass
def process_video(source, video_output, xls_output, args): osSleep = None # in Windows, prevent the OS from sleeping while we run if os.name == 'nt': osSleep = keep_awake.WindowsInhibitor() osSleep.inhibit() keypoint_painter = keypoint_painter_factory() # Set up input and output capture = cv2.VideoCapture(source) fps = capture.get(cv2.CAP_PROP_FPS) animation = pifpaf.show.AnimationFrame(show=False, video_output=video_output, video_fps=fps) # Used to report processing time per frame last_loop = time.time() workbook = Workbook() sheet_list = [] sheet = workbook.active # header sheet doesn't go into the list - we don't want to add data columns to it sheet.append([ 'Data generated using https://github.com/CathalHarte/openPifPafScripts video_joints_positions' ]) time_stamp_seconds = 0.0 first_frame = True for frame_i, (ax, _) in enumerate(animation.iter()): _, image = capture.read() # Determine if we will process this frame if image is None: LOG.info('no more images captured') break if frame_i < args.start_frame: animation.skip_frame() continue if args.max_frames and frame_i >= args.start_frame + args.max_frames: break if frame_i % args.skip_frames != 0: animation.skip_frame() continue image_pifpaf = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) def get_resize_pow2(max_allowed, image_dims): max_dim = max(image_dims[0], image_dims[1]) # find x: max_allowed > max_dim / ( 2 ^ x) x = np.log2(max_dim / max_allowed) return 2**x image_rescale = get_resize_pow2(max_allowed=360, image_dims=image_pifpaf.shape) # with image_descale as image_pifpaf = cv2.resize(image_pifpaf, (0, 0), fx=1 / image_rescale, fy=1 / image_rescale) start = time.time() image_pil = PIL.Image.fromarray(image_pifpaf) processed_image, _, __ = transforms.EVAL_TRANSFORM(image_pil, [], None) LOG.debug('preprocessing time %.3fs', time.time() - start) preds = args.processor.batch(args.model, torch.unsqueeze(processed_image, 0), device=args.device)[0] if first_frame: ax, _ = animation.frame_init(image) keypoint_painter.xy_scale = 1 # image_rescale first_frame = False for idx, pred in enumerate(preds): if len(sheet_list) <= idx: sheet = workbook.create_sheet("Body " + str(idx)) sheet_list.append(sheet) sheet.append(get_column_names(pred.keypoints)) try: sheet_list[idx].append( flatten_keypoints_matrix(time_stamp_seconds, pred.data)) except: sheet.append( [np.nan for i in range(len(preds[0].keypoints) + 1)]) time_stamp_seconds = time_stamp_seconds + (1 / fps) # image_color_corrected = cv2.cvtColor(image_pifpaf, cv2.COLOR_BGR) ax.imshow(image_pifpaf) keypoint_painter.annotations(ax, preds) current_time = time.time() elapsed_time = current_time - last_loop if (elapsed_time == 0): processed_fps = 1000 else: processed_fps = 1.0 / elapsed_time LOG.info('frame %d, loop time = %.3fs, processed FPS = %.3f', frame_i, elapsed_time, processed_fps) last_loop = current_time workbook.save(xls_output) if osSleep: osSleep.uninhibit()
def main(): args = cli() # load model model, _ = nets.factory_from_args(args) model = model.to(args.device) processor = decoder.factory_from_args(args, model) # zed init = sl.InitParameters() init.depth_mode = sl.DEPTH_MODE.DEPTH_MODE_ULTRA init.coordinate_units = sl.UNIT.UNIT_METER init.coordinate_system = sl.COORDINATE_SYSTEM.COORDINATE_SYSTEM_RIGHT_HANDED_Y_UP cam = sl.Camera() status = cam.open(init) if status != sl.ERROR_CODE.SUCCESS: print(repr(status)) exit() runtime_parameters = sl.RuntimeParameters() runtime_parameters.sensing_mode = sl.SENSING_MODE.SENSING_MODE_STANDARD # Use STANDARD sensing mode img = sl.Mat() depth = sl.Mat() point_cloud = sl.Mat() last_loop = time.time() #capture = cv2.VideoCapture(args.source) visualizer = None while True: err = cam.grab(runtime_parameters) if err == sl.ERROR_CODE.SUCCESS: # Retrieve left image cam.retrieve_image(img, sl.VIEW.VIEW_LEFT) # Retrieve depth map. Depth is aligned on the left image cam.retrieve_measure(depth, sl.MEASURE.MEASURE_DEPTH) # Retrieve colored point cloud. Point cloud is aligned on the left image. cam.retrieve_measure(point_cloud, sl.MEASURE.MEASURE_XYZRGBA) # Get and print distance value in mm at the center of the image # We measure the distance camera - object using Euclidean distance x = round(img.get_width() / 2) y = round(img.get_height() / 2) err, point_cloud_value = point_cloud.get_value(x, y) err, depth_value = depth.get_value(x, y) print("depth ", depth_value) distance = math.sqrt(point_cloud_value[0] * point_cloud_value[0] + point_cloud_value[1] * point_cloud_value[1] + point_cloud_value[2] * point_cloud_value[2]) if not np.isnan(distance) and not np.isinf(distance): distance = round(distance) #print("Distance to Camera at ({0}, {1}): {2} mm\n".format(x, y, distance)) else: print( "Can't estimate distance at this position, move the camera\n" ) cv2.imshow("Depth", depth.get_data()) else: print("Err", err) continue image = cv2.resize(img.get_data(), None, fx=args.scale, fy=args.scale) #print('resized image size: {}'.format(image.shape)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if visualizer is None: visualizer = Visualizer(processor, args)(image) visualizer.send(None) start = time.time() image_pil = PIL.Image.fromarray(image) processed_image_cpu, _, __ = transforms.EVAL_TRANSFORM( image_pil, [], None) processed_image = processed_image_cpu.contiguous().to( args.device, non_blocking=True) #print('preprocessing time', time.time() - start) fields = processor.fields(torch.unsqueeze(processed_image, 0))[0] visualizer.send((image, fields)) #print('loop time = {:.3}s, FPS = {:.3}'.format( # time.time() - last_loop, 1.0 / (time.time() - last_loop))) last_loop = time.time() cam.close()