Esempio n. 1
0
    def __init__(self, model_name="mono_1024x320"):
        self.model_name = model_name
        download_model_if_doesnt_exist(model_name)
        encoder_path = os.path.join("./monodepth2/models", model_name,
                                    "encoder.pth")
        depth_decoder_path = os.path.join("./monodepth2/models", model_name,
                                          "depth.pth")

        # LOADING PRETRAINED MODEL
        self.encoder = networks.ResnetEncoder(18, False)
        self.depth_decoder = networks.DepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(4))

        loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self.encoder.state_dict()
        }
        self.encoder.load_state_dict(filtered_dict_enc)

        loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
        self.depth_decoder.load_state_dict(loaded_dict)

        self.encoder.eval()
        self.depth_decoder.eval()

        self.feed_height = loaded_dict_enc['height']
        self.feed_width = loaded_dict_enc['width']
Esempio n. 2
0
    def __init__(self, model_path):
        assert isinstance(model_path, (str))
        self.model_path = model_path

        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")

        # LOADING MODEL
        encoder = networks.ResnetEncoder(18, False)
        depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                              scales=range(4))

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        loaded_dict_enc = torch.load(encoder_path, map_location=device)
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
        }
        encoder.load_state_dict(filtered_dict_enc)

        loaded_dict = torch.load(depth_decoder_path, map_location=device)
        depth_decoder.load_state_dict(loaded_dict)

        self.encoder = encoder
        self.depth_decoder = depth_decoder

        self.feed_height = loaded_dict_enc['height']
        self.feed_width = loaded_dict_enc['width']
Esempio n. 3
0
    def __init__(self, model_name, no_cuda):

        # Setup execution env
        if torch.cuda.is_available() and not no_cuda:
            self._device = torch.device("cuda")
        else:
            self._device = torch.device("cpu")

        # Get model
        download_model_if_doesnt_exist(model_name)
        dir_path = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(dir_path, "monodepth2", "models", model_name)
        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")

        # Load encoder
        self._encoder = networks.ResnetEncoder(18, False)
        loaded_dict_enc = torch.load(encoder_path, map_location=self._device)

        # extract the height and width of image that this model was trained with
        self._feed_height = loaded_dict_enc['height']
        self._feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self._encoder.state_dict()
        }
        self._encoder.load_state_dict(filtered_dict_enc)
        self._encoder.to(self._device)
        self._encoder.eval()

        # Load decoder
        self._depth_decoder = networks.DepthDecoder(
            num_ch_enc=self._encoder.num_ch_enc, scales=range(4))

        loaded_dict = torch.load(depth_decoder_path, map_location=self._device)
        self._depth_decoder.load_state_dict(loaded_dict)

        self._depth_decoder.to(self._device)
        self._depth_decoder.eval()

        # ROS image subscriber and publiser
        self._img_pub = rospy.Publisher('monodepth2')
Esempio n. 4
0
    return x


# ## Setting up Monodepth model

# We build our monocular depth estimation model from the Monodepth module

# Define which model to use and download if not found
model_name = "mono_640x192"
download_model_if_doesnt_exist(model_name)

# Build paths to coders and instantiate from path
encoder_path = os.path.join("models", model_name, "encoder.pth")
depth_decoder_path = os.path.join("models", model_name, "depth.pth")
encoder = networks.ResnetEncoder(18, False).cuda()
depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                      scales=range(4)).cuda()

# Encoder and Decoder loading
loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
filtered_dict_enc = {
    k: v
    for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
}
encoder.load_state_dict(filtered_dict_enc)
loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
depth_decoder.load_state_dict(loaded_dict)

# Put the coders in evaluation mode
encoder.eval()
depth_decoder.eval()
Esempio n. 5
0
def test_simple(image_path, image_size, model_name):
    """Function to predict for a single image or folder of images
    """
    device = torch.device("cpu")

    download_model_if_doesnt_exist(model_name)
    model_path = os.path.join("models", model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                          scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    # FINDING INPUT IMAGES
    if os.path.isfile(image_path):
        # Only testing on a single image
        paths = [image_path]
        output_directory = os.path.dirname(image_path)
    elif os.path.isdir(image_path):
        # Searching folder for images
        paths = glob.glob(os.path.join(image_path, '*.jpg'))
        output_directory = image_path
    else:
        raise Exception("Can not find image_path: {}".format(image_path))

    print("-> Predicting on {:d} test images".format(len(paths)))

    # PREDICTING ON EACH IMAGE IN TURN
    with torch.no_grad():
        for idx, image_path in enumerate(paths):

            if image_path.endswith("_disp.jpg"):
                # don't try to predict disparity for a disparity image!
                continue

            # Load image and preprocess
            input_image = pil.open(image_path).resize(image_size).convert(
                'RGB')
            original_width, original_height = input_image.size
            input_image = input_image.resize((feed_width, feed_height),
                                             pil.LANCZOS)
            input_image = transforms.ToTensor()(input_image).unsqueeze(0)

            # PREDICTION
            input_image = input_image.to(device)
            features = encoder(input_image)
            outputs = depth_decoder(features)

            disp = outputs[("disp", 0)]
            disp_resized = torch.nn.functional.interpolate(
                disp, (original_height, original_width),
                mode="bilinear",
                align_corners=False)

            # Saving numpy file
            output_name = os.path.splitext(os.path.basename(image_path))[0]
            name_dest_npy = os.path.join(output_directory,
                                         "{}_disp.npy".format(output_name))
            scaled_disp, _ = disp_to_depth(disp, 0.1, 100)
            np.save(name_dest_npy, scaled_disp.cpu().numpy())

            # Saving colormapped depth image
            disp_resized_np = disp_resized.squeeze().cpu().numpy()
            vmax = np.percentile(disp_resized_np, 95)
            normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(),
                                              vmax=vmax)
            mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
            colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] *
                              255).astype(np.uint8)
            im = pil.fromarray(colormapped_im)

            name_dest_im = os.path.join(output_directory,
                                        "{}_disp.jpeg".format(output_name))
            im.save(name_dest_im)

            print("   Processed {:d} of {:d} images - saved prediction to {}".
                  format(idx + 1, len(paths), name_dest_im))

    print('-> Done!')
    return colormapped_im