예제 #1
0
    def __init__(self, model_name="mono_1024x320"):
        self.model_name = model_name
        download_model_if_doesnt_exist(model_name)
        encoder_path = os.path.join("./monodepth2/models", model_name,
                                    "encoder.pth")
        depth_decoder_path = os.path.join("./monodepth2/models", model_name,
                                          "depth.pth")

        # LOADING PRETRAINED MODEL
        self.encoder = networks.ResnetEncoder(18, False)
        self.depth_decoder = networks.DepthDecoder(
            num_ch_enc=self.encoder.num_ch_enc, scales=range(4))

        loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self.encoder.state_dict()
        }
        self.encoder.load_state_dict(filtered_dict_enc)

        loaded_dict = torch.load(depth_decoder_path, map_location='cpu')
        self.depth_decoder.load_state_dict(loaded_dict)

        self.encoder.eval()
        self.depth_decoder.eval()

        self.feed_height = loaded_dict_enc['height']
        self.feed_width = loaded_dict_enc['width']
예제 #2
0
def load_model(model_name):
    """
    Returns an encoder, depth decoder, and expected input image size from a
    model name
    Args:
        model_name: One of:
            "mono_640x192",
            "stereo_640x192",
            "mono+stereo_640x192",
            "mono_no_pt_640x192",
            "stereo_no_pt_640x192",
            "mono+stereo_no_pt_640x192",
            "mono_1024x320",
            "stereo_1024x320",
            "mono+stereo_1024x320"
    """

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(model_name)
    model_path = os.path.join("models", model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = monodepth2.networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = monodepth2.networks.DepthDecoder(
        num_ch_enc=encoder.num_ch_enc, scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    return encoder, depth_decoder, (feed_width, feed_height)
예제 #3
0
    def __init__(self, model_name, no_cuda):

        # Setup execution env
        if torch.cuda.is_available() and not no_cuda:
            self._device = torch.device("cuda")
        else:
            self._device = torch.device("cpu")

        # Get model
        download_model_if_doesnt_exist(model_name)
        dir_path = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(dir_path, "monodepth2", "models", model_name)
        encoder_path = os.path.join(model_path, "encoder.pth")
        depth_decoder_path = os.path.join(model_path, "depth.pth")

        # Load encoder
        self._encoder = networks.ResnetEncoder(18, False)
        loaded_dict_enc = torch.load(encoder_path, map_location=self._device)

        # extract the height and width of image that this model was trained with
        self._feed_height = loaded_dict_enc['height']
        self._feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self._encoder.state_dict()
        }
        self._encoder.load_state_dict(filtered_dict_enc)
        self._encoder.to(self._device)
        self._encoder.eval()

        # Load decoder
        self._depth_decoder = networks.DepthDecoder(
            num_ch_enc=self._encoder.num_ch_enc, scales=range(4))

        loaded_dict = torch.load(depth_decoder_path, map_location=self._device)
        self._depth_decoder.load_state_dict(loaded_dict)

        self._depth_decoder.to(self._device)
        self._depth_decoder.eval()

        # ROS image subscriber and publiser
        self._img_pub = rospy.Publisher('monodepth2')
예제 #4
0
def get_depth_model(model_name):

    if torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(model_name)
    model_path = os.path.join("trained_models", model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {
        k: v
        for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
    }
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                 scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    return encoder, depth_decoder, feed_width, feed_height
예제 #5
0
    )  # For some unknown reason the camera input is on BGR format, so we change it into RGBA.
    x = x.transpose((2, 0, 1))
    x = torch.from_numpy(x).float()
    x = normalize(x)
    x = x.to(device)
    x = x[None, ...]
    return x


# ## Setting up Monodepth model

# We build our monocular depth estimation model from the Monodepth module

# Define which model to use and download if not found
model_name = "mono_640x192"
download_model_if_doesnt_exist(model_name)

# Build paths to coders and instantiate from path
encoder_path = os.path.join("models", model_name, "encoder.pth")
depth_decoder_path = os.path.join("models", model_name, "depth.pth")
encoder = networks.ResnetEncoder(18, False).cuda()
depth_decoder = networks.DepthDecoder(num_ch_enc=encoder.num_ch_enc,
                                      scales=range(4)).cuda()

# Encoder and Decoder loading
loaded_dict_enc = torch.load(encoder_path, map_location='cpu')
filtered_dict_enc = {
    k: v
    for k, v in loaded_dict_enc.items() if k in encoder.state_dict()
}
encoder.load_state_dict(filtered_dict_enc)
예제 #6
0
def generate_depth(output_path, mode, model_name='mono+stereo_1024x320', ext='jpg', no_cuda=False):
    # Load model
    if torch.cuda.is_available() and not no_cuda:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    download_model_if_doesnt_exist(model_name)
    model_path = os.path.join('', model_name)
    print("-> Loading model from ", model_path)
    encoder_path = os.path.join(model_path, "encoder.pth")
    depth_decoder_path = os.path.join(model_path, "depth.pth")

    # LOADING PRETRAINED MODEL
    print("   Loading pretrained encoder")
    encoder = monodepth2.networks.ResnetEncoder(18, False)
    loaded_dict_enc = torch.load(encoder_path, map_location=device)

    # extract the height and width of image that this model was trained with
    feed_height = loaded_dict_enc['height']
    feed_width = loaded_dict_enc['width']
    filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
    encoder.load_state_dict(filtered_dict_enc)
    encoder.to(device)
    encoder.eval()

    print("   Loading pretrained decoder")
    depth_decoder = monodepth2.networks.DepthDecoder(
        num_ch_enc=encoder.num_ch_enc, scales=range(4))

    loaded_dict = torch.load(depth_decoder_path, map_location=device)
    depth_decoder.load_state_dict(loaded_dict)

    depth_decoder.to(device)
    depth_decoder.eval()

    for image_path in glob(os.path.join(output_path, mode, '*')):
        print('Scene: {}'.format(image_path))
        # Searching folder for images
        paths = glob(os.path.join(image_path, '*.{}'.format(ext)))
        output_directory = image_path

        print("-> Predicting on {:d} test images".format(len(paths)))
        # PREDICTING ON EACH IMAGE IN TURN
        with torch.no_grad():
            for idx, image_path in enumerate(paths):

                if image_path.endswith("_disp.jpg"):
                    # don't try to predict disparity for a disparity image!
                    continue

                # Load image and preprocess
                input_image = pil.open(image_path).convert('RGB')
                original_width, original_height = input_image.size
                input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
                input_image = transforms.ToTensor()(input_image).unsqueeze(0)

                # PREDICTION
                input_image = input_image.to(device)
                features = encoder(input_image)
                outputs = depth_decoder(features)

                disp = outputs[("disp", 0)]
                disp_resized = torch.nn.functional.interpolate(
                    disp, (original_height, original_width), mode="bilinear", align_corners=False)

                # Saving numpy file
                output_name = os.path.splitext(os.path.basename(image_path))[0]
                output_name = output_name[:-len('_image')]
                name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name))
                scaled_disp, _ = disp_to_depth(disp_resized, 0.1, 100)
                scaled_disp = scaled_disp.view(original_height, original_width)
                np.save(name_dest_npy, scaled_disp.cpu().numpy())

                # Saving colormapped depth image
                disp_resized_np = disp_resized.squeeze().cpu().numpy()
                vmax = np.percentile(disp_resized_np, 95)
                normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
                mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
                colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
                im = pil.fromarray(colormapped_im)

                name_dest_im = os.path.join(output_directory, "{}_disp.jpg".format(output_name))
                im.save(name_dest_im)

                print("   Processed {:d} of {:d} images - saved prediction to {}".format(
                    idx + 1, len(paths), name_dest_im))

    print('-> Done!')