def FrameToModelInput(
    raw_frame,
    crop_top, crop_bottom, crop_left, crop_right,
    target_height, target_width, convert_to_grayscale,
    convert_to_yuv):
  # Grayscale and YUV outputs are mutually exclusive.
  # TODO: make an output enum instead (RGB, YUV, GRAY).
  assert not convert_to_grayscale or not convert_to_yuv
  # Crop to the ROI for the vision model.
  frame_image_cropped = image_helpers.CropHWC(
      raw_frame, crop_top, crop_bottom, crop_left, crop_right)
  frame_image_resized = image_helpers.MaybeResizeHWC(
      frame_image_cropped, target_height, target_width)
  # Optionally convert to grayscale.
  if convert_to_grayscale:
    rgb_to_gray_weights = np.array(
      [0.2989, 0.5870, 0.1140], dtype=np.float64).reshape([1,1,3])
    frame_image_resized = np.sum(
        frame_image_resized.astype(np.float64) * rgb_to_gray_weights,
        axis=2, keepdims=True).astype(np.uint8)
  if convert_to_yuv:
    frame_image_resized = image_helpers.RgbToYuv(frame_image_resized)
  # Transpose to CHW for pytorch.
  frame_image_chw = np.transpose(frame_image_resized, (2,0,1))
  return frame_image_chw, frame_image_resized
def FrameToModelInput(raw_frame, crop_top, crop_bottom, crop_left, crop_right,
                      target_height, target_width):
    # Crop to the ROI for the vision model.
    frame_image_cropped = image_helpers.CropHWC(raw_frame, crop_top,
                                                crop_bottom, crop_left,
                                                crop_right)
    frame_image_resized = image_helpers.MaybeResizeHWC(frame_image_cropped,
                                                       target_height,
                                                       target_width)
    # Transpose to CHW for pytorch.
    frame_image_chw = np.transpose(frame_image_resized, (2, 0, 1))
    return frame_image_chw, frame_image_resized
Esempio n. 3
0
def RawFrameToModelInput(raw_frame, crop_settings, net_settings,
                         convert_to_yuv, cuda_device_id):
    frame_cropped = image_helpers.CropHWC(raw_frame, crop_settings.crop_top,
                                          crop_settings.crop_bottom,
                                          crop_settings.crop_left,
                                          crop_settings.crop_right)
    frame_resized = image_helpers.MaybeResizeHWC(
        frame_cropped, net_settings[training_helpers.TARGET_HEIGHT],
        net_settings[training_helpers.TARGET_WIDTH])
    frame_colorspace = None
    if convert_to_yuv:
        frame_colorspace = image_helpers.RgbToYuv(frame_resized)
    else:
        frame_colorspace = frame_resized
    frame_chw = np.transpose(frame_colorspace, (2, 0, 1))
    frame_float = frame_chw.astype(np.float32) / 255.0
    # Add a dummy dimension to make it a "batch" of size 1.
    frame_variable = torch.autograd.Variable(
        torch.from_numpy(frame_float[np.newaxis, ...]))
    return frame_variable.cuda(cuda_device_id), frame_resized
        args.net_name,
        in_shape=[3, args.target_height, args.target_width],
        out_dims=args.net_out_total_dimensions,
        dropout_prob=0.0)
    net.load_state_dict(torch.load(args.in_model_weights))
    net.eval()
    net.cuda()

    result_data = []
    frames_generator = image_helpers.VideoFrameGenerator(args.in_video)
    for raw_frame, frame_index in frames_generator:
        frame_cropped = image_helpers.CropHWC(raw_frame, args.crop_top,
                                              args.crop_bottom, args.crop_left,
                                              args.crop_right)
        frame_resized = image_helpers.MaybeResizeHWC(frame_cropped,
                                                     args.target_height,
                                                     args.target_width)
        frame_chw = np.transpose(frame_resized, (2, 0, 1))
        frame_float = frame_chw.astype(np.float32) / 255.0
        # Add a dummy dimension to make it a "batch" of size 1.
        frame_tensor = Variable(torch.from_numpy(frame_float[np.newaxis,
                                                             ...])).cuda()
        result_tensor = net(frame_tensor).cpu()
        result_value = result_tensor.data.numpy()[
            0, args.net_out_dimension_to_use].item()
        result_data.append({
            'frame_id': frame_index,
            'angular_velocity': result_value
        })

    with open(args.out_steering_json, 'w') as out_json:
Esempio n. 5
0
                                              args.crop_bottom, args.crop_left,
                                              args.crop_right)

        # If required, initialize the parts that depend on the input video
        # dimensions.
        if upsampler is None:
            upsampler = torch.nn.Upsample(cropped_frame.shape[0:2],
                                          mode='bilinear').cuda()
        if frames_cropped is None:
            frames_cropped = np.zeros(shape=((args.batch_size, ) +
                                             cropped_frame.shape),
                                      dtype=np.uint8)

        frames_cropped[in_batch_idx, ...] = cropped_frame
        frames_resized[in_batch_idx, ...] = image_helpers.MaybeResizeHWC(
            frames_cropped[in_batch_idx,
                           ...], net_settings[training_helpers.TARGET_HEIGHT],
            net_settings[training_helpers.TARGET_WIDTH])
        if args.convert_to_yuv:
            frame_colorspace[in_batch_idx, ...] = image_helpers.RgbToYuv(
                frames_resized[in_batch_idx, ...])
        else:
            frame_colorspace[in_batch_idx, ...] = frames_resized[in_batch_idx,
                                                                 ...]

        if in_batch_idx != args.batch_size - 1:
            continue

        frame_chw = np.transpose(frame_colorspace, (0, 3, 1, 2))
        frame_float = frame_chw.astype(np.float32) / 255.0
        frame_input_tensor = Variable(torch.from_numpy(frame_float),
                                      requires_grad=True).cuda(
        dropout_prob=net_settings[training_helpers.DROPOUT_PROB],
        options=net_settings[training_helpers.NET_OPTIONS])
    net.load_state_dict(torch.load(weights_filename))
    net.cuda(args.cuda_device_id)
    net.eval()
    nets.append(net)

  result_data = []
  frames_generator = skvideo.io.vreader(args.in_video)
  trajectory_prediction = None
  for frame_index, raw_frame in enumerate(frames_generator):
    frame_cropped = image_helpers.CropHWC(
        raw_frame,
        args.crop_top, args.crop_bottom, args.crop_left, args.crop_right)
    frame_resized = image_helpers.MaybeResizeHWC(
        frame_cropped,
        net_settings[training_helpers.TARGET_HEIGHT],
        net_settings[training_helpers.TARGET_WIDTH])
    if args.convert_to_yuv:
      frame_resized = image_helpers.RgbToYuv(frame_resized)
    frame_chw = np.transpose(frame_resized, (2,0,1))
    frame_float = frame_chw.astype(np.float32) / 255.0
    # Add a dummy dimension to make it a "batch" of size 1.
    frame_tensor = Variable(
        torch.from_numpy(frame_float[np.newaxis,...])).cuda(args.cuda_device_id)
    result_components = np.array([
        net([frame_tensor, forward_axis_tensor])[0].cpu().data.numpy()
        for net in nets])
    result_averaged = np.mean(result_components, axis=0, keepdims=False)
    trajectory_prediction = UpdateFutureTrajectoryPrediction(
        trajectory_prediction,
        result_averaged,