def FrameToModelInput( raw_frame, crop_top, crop_bottom, crop_left, crop_right, target_height, target_width, convert_to_grayscale, convert_to_yuv): # Grayscale and YUV outputs are mutually exclusive. # TODO: make an output enum instead (RGB, YUV, GRAY). assert not convert_to_grayscale or not convert_to_yuv # Crop to the ROI for the vision model. frame_image_cropped = image_helpers.CropHWC( raw_frame, crop_top, crop_bottom, crop_left, crop_right) frame_image_resized = image_helpers.MaybeResizeHWC( frame_image_cropped, target_height, target_width) # Optionally convert to grayscale. if convert_to_grayscale: rgb_to_gray_weights = np.array( [0.2989, 0.5870, 0.1140], dtype=np.float64).reshape([1,1,3]) frame_image_resized = np.sum( frame_image_resized.astype(np.float64) * rgb_to_gray_weights, axis=2, keepdims=True).astype(np.uint8) if convert_to_yuv: frame_image_resized = image_helpers.RgbToYuv(frame_image_resized) # Transpose to CHW for pytorch. frame_image_chw = np.transpose(frame_image_resized, (2,0,1)) return frame_image_chw, frame_image_resized
def FrameToModelInput(raw_frame, crop_top, crop_bottom, crop_left, crop_right, target_height, target_width): # Crop to the ROI for the vision model. frame_image_cropped = image_helpers.CropHWC(raw_frame, crop_top, crop_bottom, crop_left, crop_right) frame_image_resized = image_helpers.MaybeResizeHWC(frame_image_cropped, target_height, target_width) # Transpose to CHW for pytorch. frame_image_chw = np.transpose(frame_image_resized, (2, 0, 1)) return frame_image_chw, frame_image_resized
def RawFrameToModelInput(raw_frame, crop_settings, net_settings, convert_to_yuv, cuda_device_id): frame_cropped = image_helpers.CropHWC(raw_frame, crop_settings.crop_top, crop_settings.crop_bottom, crop_settings.crop_left, crop_settings.crop_right) frame_resized = image_helpers.MaybeResizeHWC( frame_cropped, net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH]) frame_colorspace = None if convert_to_yuv: frame_colorspace = image_helpers.RgbToYuv(frame_resized) else: frame_colorspace = frame_resized frame_chw = np.transpose(frame_colorspace, (2, 0, 1)) frame_float = frame_chw.astype(np.float32) / 255.0 # Add a dummy dimension to make it a "batch" of size 1. frame_variable = torch.autograd.Variable( torch.from_numpy(frame_float[np.newaxis, ...])) return frame_variable.cuda(cuda_device_id), frame_resized
# Init model and load weights. net = models.MakeNetwork( args.net_name, in_shape=[3, args.target_height, args.target_width], out_dims=args.net_out_total_dimensions, dropout_prob=0.0) net.load_state_dict(torch.load(args.in_model_weights)) net.eval() net.cuda() result_data = [] frames_generator = image_helpers.VideoFrameGenerator(args.in_video) for raw_frame, frame_index in frames_generator: frame_cropped = image_helpers.CropHWC(raw_frame, args.crop_top, args.crop_bottom, args.crop_left, args.crop_right) frame_resized = image_helpers.MaybeResizeHWC(frame_cropped, args.target_height, args.target_width) frame_chw = np.transpose(frame_resized, (2, 0, 1)) frame_float = frame_chw.astype(np.float32) / 255.0 # Add a dummy dimension to make it a "batch" of size 1. frame_tensor = Variable(torch.from_numpy(frame_float[np.newaxis, ...])).cuda() result_tensor = net(frame_tensor).cpu() result_value = result_tensor.data.numpy()[ 0, args.net_out_dimension_to_use].item() result_data.append({ 'frame_id': frame_index, 'angular_velocity': result_value