def FrameToModelInput( raw_frame, crop_top, crop_bottom, crop_left, crop_right, target_height, target_width, convert_to_grayscale, convert_to_yuv): # Grayscale and YUV outputs are mutually exclusive. # TODO: make an output enum instead (RGB, YUV, GRAY). assert not convert_to_grayscale or not convert_to_yuv # Crop to the ROI for the vision model. frame_image_cropped = image_helpers.CropHWC( raw_frame, crop_top, crop_bottom, crop_left, crop_right) frame_image_resized = image_helpers.MaybeResizeHWC( frame_image_cropped, target_height, target_width) # Optionally convert to grayscale. if convert_to_grayscale: rgb_to_gray_weights = np.array( [0.2989, 0.5870, 0.1140], dtype=np.float64).reshape([1,1,3]) frame_image_resized = np.sum( frame_image_resized.astype(np.float64) * rgb_to_gray_weights, axis=2, keepdims=True).astype(np.uint8) if convert_to_yuv: frame_image_resized = image_helpers.RgbToYuv(frame_image_resized) # Transpose to CHW for pytorch. frame_image_chw = np.transpose(frame_image_resized, (2,0,1)) return frame_image_chw, frame_image_resized
def FrameToModelInput(raw_frame, crop_top, crop_bottom, crop_left, crop_right, target_height, target_width): # Crop to the ROI for the vision model. frame_image_cropped = image_helpers.CropHWC(raw_frame, crop_top, crop_bottom, crop_left, crop_right) frame_image_resized = image_helpers.MaybeResizeHWC(frame_image_cropped, target_height, target_width) # Transpose to CHW for pytorch. frame_image_chw = np.transpose(frame_image_resized, (2, 0, 1)) return frame_image_chw, frame_image_resized
def RawFrameToModelInput(raw_frame, crop_settings, net_settings, convert_to_yuv, cuda_device_id): frame_cropped = image_helpers.CropHWC(raw_frame, crop_settings.crop_top, crop_settings.crop_bottom, crop_settings.crop_left, crop_settings.crop_right) frame_resized = image_helpers.MaybeResizeHWC( frame_cropped, net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH]) frame_colorspace = None if convert_to_yuv: frame_colorspace = image_helpers.RgbToYuv(frame_resized) else: frame_colorspace = frame_resized frame_chw = np.transpose(frame_colorspace, (2, 0, 1)) frame_float = frame_chw.astype(np.float32) / 255.0 # Add a dummy dimension to make it a "batch" of size 1. frame_variable = torch.autograd.Variable( torch.from_numpy(frame_float[np.newaxis, ...])) return frame_variable.cuda(cuda_device_id), frame_resized
args.net_name, in_shape=[3, args.target_height, args.target_width], out_dims=args.net_out_total_dimensions, dropout_prob=0.0) net.load_state_dict(torch.load(args.in_model_weights)) net.eval() net.cuda() result_data = [] frames_generator = image_helpers.VideoFrameGenerator(args.in_video) for raw_frame, frame_index in frames_generator: frame_cropped = image_helpers.CropHWC(raw_frame, args.crop_top, args.crop_bottom, args.crop_left, args.crop_right) frame_resized = image_helpers.MaybeResizeHWC(frame_cropped, args.target_height, args.target_width) frame_chw = np.transpose(frame_resized, (2, 0, 1)) frame_float = frame_chw.astype(np.float32) / 255.0 # Add a dummy dimension to make it a "batch" of size 1. frame_tensor = Variable(torch.from_numpy(frame_float[np.newaxis, ...])).cuda() result_tensor = net(frame_tensor).cpu() result_value = result_tensor.data.numpy()[ 0, args.net_out_dimension_to_use].item() result_data.append({ 'frame_id': frame_index, 'angular_velocity': result_value }) with open(args.out_steering_json, 'w') as out_json:
args.crop_bottom, args.crop_left, args.crop_right) # If required, initialize the parts that depend on the input video # dimensions. if upsampler is None: upsampler = torch.nn.Upsample(cropped_frame.shape[0:2], mode='bilinear').cuda() if frames_cropped is None: frames_cropped = np.zeros(shape=((args.batch_size, ) + cropped_frame.shape), dtype=np.uint8) frames_cropped[in_batch_idx, ...] = cropped_frame frames_resized[in_batch_idx, ...] = image_helpers.MaybeResizeHWC( frames_cropped[in_batch_idx, ...], net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH]) if args.convert_to_yuv: frame_colorspace[in_batch_idx, ...] = image_helpers.RgbToYuv( frames_resized[in_batch_idx, ...]) else: frame_colorspace[in_batch_idx, ...] = frames_resized[in_batch_idx, ...] if in_batch_idx != args.batch_size - 1: continue frame_chw = np.transpose(frame_colorspace, (0, 3, 1, 2)) frame_float = frame_chw.astype(np.float32) / 255.0 frame_input_tensor = Variable(torch.from_numpy(frame_float), requires_grad=True).cuda(
dropout_prob=net_settings[training_helpers.DROPOUT_PROB], options=net_settings[training_helpers.NET_OPTIONS]) net.load_state_dict(torch.load(weights_filename)) net.cuda(args.cuda_device_id) net.eval() nets.append(net) result_data = [] frames_generator = skvideo.io.vreader(args.in_video) trajectory_prediction = None for frame_index, raw_frame in enumerate(frames_generator): frame_cropped = image_helpers.CropHWC( raw_frame, args.crop_top, args.crop_bottom, args.crop_left, args.crop_right) frame_resized = image_helpers.MaybeResizeHWC( frame_cropped, net_settings[training_helpers.TARGET_HEIGHT], net_settings[training_helpers.TARGET_WIDTH]) if args.convert_to_yuv: frame_resized = image_helpers.RgbToYuv(frame_resized) frame_chw = np.transpose(frame_resized, (2,0,1)) frame_float = frame_chw.astype(np.float32) / 255.0 # Add a dummy dimension to make it a "batch" of size 1. frame_tensor = Variable( torch.from_numpy(frame_float[np.newaxis,...])).cuda(args.cuda_device_id) result_components = np.array([ net([frame_tensor, forward_axis_tensor])[0].cpu().data.numpy() for net in nets]) result_averaged = np.mean(result_components, axis=0, keepdims=False) trajectory_prediction = UpdateFutureTrajectoryPrediction( trajectory_prediction, result_averaged,