def init_model(transform):
    # set torch options
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    model_path = "../MiDaS/model-f46da743.pt"

    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose(
        [
            Resize(
                384,
                384,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ]
    )

    model.to(device)
    model.eval()
    return (model, transform, device), None
예제 #2
0
def transforms():
    import cv2
    from torchvision.transforms import Compose
    from midas.transforms import Resize, NormalizeImage, PrepareForNet
    from midas import transforms

    transforms.default_transform = Compose([
        lambda img: {
            "image": img / 255.0
        },
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
        lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),
    ])

    transforms.small_transform = Compose([
        lambda img: {
            "image": img / 255.0
        },
        Resize(
            256,
            256,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
        lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),
    ])

    return transforms
예제 #3
0
    def __init__(self, model_type, model_path, optimize):
        print("initialize")

        # select device
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("device: %s" % self.device)

        # load network
        if model_type == "large":
            self.model = MidasNet(model_path, non_negative=True)
            self.net_w, self.net_h = 384, 384
        elif model_type == "small":
            self.model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True,
                                   non_negative=True, blocks={'expand': True})
            self.net_w, self.net_h = 256, 256
        else:
            print(f"model_type '{model_type}' not implemented, use: --model_type large")
            assert False

        self.transform = Compose(
            [
                Resize(
                    self.net_w,
                    self.net_h,
                    resize_target=None,
                    keep_aspect_ratio=True,
                    ensure_multiple_of=32,
                    resize_method="upper_bound",
                    image_interpolation_method=cv2.INTER_CUBIC,
                ),
                NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                PrepareForNet(),
            ]
        )

        self.model.eval()
        self.optimize = optimize
        if self.optimize:
            rand_example = torch.rand(1, 3, self.net_h, self.net_w)
            self.model(rand_example)
            traced_script_module = torch.jit.trace(self.model, rand_example)
            self.model = traced_script_module

            if self.device == torch.device("cuda"):
                self.model = self.model.to(memory_format=torch.channels_last)
                self.model = self.model.half()

        self.model.to(self.device)
예제 #4
0
def depth_processor(ie):
    transform = Compose([
        Resize(
            800,
            800,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])
    net = ie.read_network(model=midas_ir, weights=midas_ir[:-3] + 'bin')
    input_layer = next(iter(net.input_info))
    output_layer = next(iter(net.outputs))
    n, c, _, _ = net.input_info[input_layer].input_data.shape
    net.reshape({input_layer: (n, c, 384, 384)})
    exec_net = ie.load_network(network=net, device_name=DEVICE)

    return transform, exec_net, input_layer, output_layer
def init_model(transform):
    parser = argparse.ArgumentParser()
    parser.add_argument('-mw', '--model_weights', 
        default='model-f6b98070.pt',
        help='path to the trained weights of model'
    )

    parser.add_argument('-mt', '--model_type', 
        default='large',
        help='model type: large or small'
    )

    parser.add_argument('--optimize', dest='optimize', action='store_true')
    parser.add_argument('--no-optimize', dest='optimize', action='store_false')
    parser.set_defaults(optimize=True)

    args, unknown = parser.parse_known_args()    
    
    # set torch options
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    if args.model_type == "large":
        model_path = "../MiDaS/"+args.model_weights
        model = MidasNet(model_path, non_negative=True)
        net_w, net_h = 384, 384
    elif args.model_type == "small":
        if "small" not in args.model_weights:
            args.model_weights = "model-small-70d6b9c8.pt"
        model_path = "../MiDaS/"+args.model_weights
        model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True})
        net_w, net_h = 256, 256
    else:
        print(f"model_type '{model_type}' not implemented, use: --model_type large")
        assert False

    transform = Compose(
        [
            Resize(
                net_w,
                net_h,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ]
    )

    model.eval()
    
    if args.optimize==True:
        rand_example = torch.rand(1, 3, net_h, net_w)
        model(rand_example)
        traced_script_module = torch.jit.trace(model, rand_example)
        model = traced_script_module
    
        if device == torch.device("cuda"):
            model = model.to(memory_format=torch.channels_last)  
            model = model.half()

    model.to(device)    
    
    return (model, transform, device, args.optimize), args
예제 #6
0
파일: run.py 프로젝트: p-ranav/merged_depth
def run(input_path,
        output_path,
        model_path,
        model_type="large",
        optimize=True):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    if model_type == "large":
        model = MidasNet(model_path, non_negative=True)
        net_w, net_h = 384, 384
    elif model_type == "small":
        model = MidasNet_small(model_path,
                               features=64,
                               backbone="efficientnet_lite3",
                               exportable=True,
                               non_negative=True,
                               blocks={'expand': True})
        net_w, net_h = 256, 256
    else:
        print(
            f"model_type '{model_type}' not implemented, use: --model_type large"
        )
        assert False

    transform = Compose([
        Resize(
            net_w,
            net_h,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.eval()

    if optimize == True:
        rand_example = torch.rand(1, 3, net_h, net_w)
        model(rand_example)
        traced_script_module = torch.jit.trace(model, rand_example)
        model = traced_script_module

        if device == torch.device("cuda"):
            model = model.to(memory_format=torch.channels_last)
            model = model.half()

    model.to(device)

    # get input
    img_names = glob.glob(os.path.join(input_path, "*"))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind, img_name in enumerate(img_names):

        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))

        # input

        img = utils.read_image(img_name)
        img_input = transform({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
            if optimize == True and device == torch.device("cuda"):
                sample = sample.to(memory_format=torch.channels_last)
                sample = sample.half()
            prediction = model.forward(sample)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())
            prediction /= 1000

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])
        utils.write_depth(filename, prediction, bits=2)
        print(prediction)
        print(prediction.shape)

    print("finished")
예제 #7
0
def run(input_path, output_path, model_path):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    model = Mynet(model_path, non_negative=True)
    model.inference = True

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    # get input
    img_names = glob.glob(os.path.join(input_path, "*"))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind, img_name in enumerate(img_names):

        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))

        # input

        img = utils.read_image(img_name)
        img_input = transform({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
            prediction, _ = model.forward(
                sample)  # the model outputs depth_images and yolo_layers
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])
        utils.write_depth(filename, prediction, bits=2)

    print("finished")
예제 #8
0
    def __init__(self, planercnn_params, yolo_params, midas_params):
        self.random = True

        ## InferenceDataset start

        self.options = planercnn_params['options']
        self.config = planercnn_params['config']
        self.random = planercnn_params['random']
        #self.camera = camera
        # self.imagePaths = image_list
        self.anchors = generate_pyramid_anchors(self.config.RPN_ANCHOR_SCALES,
                                                self.config.RPN_ANCHOR_RATIOS,
                                                self.config.BACKBONE_SHAPES,
                                                self.config.BACKBONE_STRIDES,
                                                self.config.RPN_ANCHOR_STRIDE)

        # image_list = glob.glob(self.options.customDataFolder + '/*.png') + glob.glob(self.options.customDataFolder + '/*.jpg')
        # print(image_list)
        if os.path.exists(self.options.customDataFolder + '/camera.txt'):
            self.camera = np.zeros(6)
            with open(self.options.customDataFolder + '/camera.txt', 'r') as f:
                for line in f:
                    values = [
                        float(token.strip()) for token in line.split(' ')
                        if token.strip() != ''
                    ]
                    for c in range(6):
                        self.camera[c] = values[c]
                        continue
                    break
                pass
        else:
            self.camera = [
                filename.replace('.png', '.txt').replace('.jpg', '.txt')
                for filename in image_list
            ]
            pass
        #return

        ## InferenceDataset END

        ## Yolo LoadImagesAndLabels Start
        path = yolo_params['path']
        img_size = yolo_params.get('img_size', 416)
        batch_size = yolo_params.get('batch_size', 16)
        augment = yolo_params.get('augment', False)
        hyp = yolo_params.get('hyp', None)
        rect = yolo_params.get('rect', False)
        image_weights = yolo_params.get('image_weights', False)
        cache_labels = yolo_params.get('cache_labels', True)
        cache_images = yolo_params.get('cache_images', True)
        single_cls = yolo_params.get('single_cls', False)

        path = str(Path(path))  # os-agnostic
        assert os.path.isfile(path), 'File not found %s. See %s' % (path,
                                                                    help_url)
        with open(path, 'r') as f:
            self.img_files = [
                x.replace('/', os.sep)
                for x in f.read().splitlines()  # os-agnostic
                if os.path.splitext(x)[-1].lower() in img_formats
            ]

        rm = 'images/7.-With-A-Puffy-Jacket-Boots-And-A-Belt.jpg'
        if rm in self.img_files: self.img_files.remove(rm)
        self.imagePaths = self.img_files

        self.Yolo_transform = Compose([
            Resize(
                512,
                512,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="lower_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
        ])

        n = len(self.img_files)
        #print('No of images found:',n)
        if n > 500:
            np.savetxt('img_files.txt',
                       self.img_files,
                       delimiter="\n",
                       fmt="%s")

        # print(batch_size, "batch_size")

        assert n > 0, 'No images found in %s. See %s' % (path, help_url)
        bi = np.floor(np.arange(n) / batch_size).astype(np.int)  # batch index
        nb = bi[-1] + 1  # number of batches

        self.n = n
        # print(bi,"Hey Batch")
        self.batch = bi  # batch index of image
        self.img_size = img_size
        self.augment = augment
        self.hyp = hyp
        self.image_weights = image_weights
        self.rect = False if image_weights else rect
        self.mosaic = False  #self.augment and not self.rect  # load 4 images at a time into a mosaic (only during training)

        # Define labels
        # self.label_files=[]
        # for x in self.img_files:
        #     x = x.split(os.sep)
        #     x[3]= 'labels'
        #     x[4] = x[4].replace(os.path.splitext(x[4])[-1], '.txt')
        #     x = os.sep.join(x)
        #     self.label_files.append(x)

        self.label_files = [
            x.replace('images',
                      'labels').replace(os.path.splitext(x)[-1], '.txt')
            for x in self.img_files
        ]

        # Rectangular Training  https://github.com/ultralytics/yolov3/issues/232
        if self.rect:
            # Read image shapes (wh)
            sp = path.replace('.txt', '.shapes')  # shapefile path
            try:
                with open(sp, 'r') as f:  # read existing shapefile
                    s = [x.split() for x in f.read().splitlines()]
                    assert len(s) == n, 'Shapefile out of sync'
            except:
                s = [
                    exif_size(Image.open(f))
                    for f in tqdm(self.img_files, desc='Reading image shapes')
                ]
                np.savetxt(sp, s, fmt='%g')  # overwrites existing (if any)

            # Sort by aspect ratio
            s = np.array(s, dtype=np.float64)
            ar = s[:, 1] / s[:, 0]  # aspect ratio
            i = ar.argsort()
            self.img_files = [self.img_files[i] for i in i]
            self.label_files = [self.label_files[i] for i in i]
            self.shapes = s[i]  # wh
            ar = ar[i]

            # Set training image shapes
            shapes = [[1, 1]] * nb
            for i in range(nb):
                ari = ar[bi == i]
                mini, maxi = ari.min(), ari.max()
                if maxi < 1:
                    shapes[i] = [maxi, 1]
                elif mini > 1:
                    shapes[i] = [1, 1 / mini]

            self.batch_shapes = np.ceil(
                np.array(shapes) * img_size / 64.).astype(np.int) * 64

        # Preload labels (required for weighted CE training)
        self.imgs = [None] * n
        self.labels = [None] * n
        if cache_labels or image_weights:  # cache labels for faster training
            self.labels = [np.zeros((0, 5))] * n
            extract_bounding_boxes = False
            create_datasubset = False
            pbar = tqdm(self.label_files, desc='Caching labels')
            nm, nf, ne, ns, nd = 0, 0, 0, 0, 0  # number missing, found, empty, datasubset, duplicate
            for i, file in enumerate(pbar):
                try:
                    with open(file, 'r') as f:
                        l = np.array(
                            [x.split() for x in f.read().splitlines()],
                            dtype=np.float32)
                except:
                    nm += 1  # print('missing labels for image %s' % self.img_files[i])  # file missing
                    continue

                if l.shape[0]:
                    assert l.shape[1] == 5, '> 5 label columns: %s' % file
                    assert (l >= 0).all(), 'negative labels: %s' % file
                    assert (l[:, 1:] <= 1).all(
                    ), 'non-normalized or out of bounds coordinate labels: %s' % file
                    if np.unique(
                            l, axis=0).shape[0] < l.shape[0]:  # duplicate rows
                        nd += 1  # print('WARNING: duplicate rows in %s' % self.label_files[i])  # duplicate rows
                    if single_cls:
                        l[:, 0] = 0  # force dataset into single-class mode
                    self.labels[i] = l
                    nf += 1  # file found

                    # Create subdataset (a smaller dataset)
                    if create_datasubset and ns < 1E4:
                        if ns == 0:
                            create_folder(path='./datasubset')
                            os.makedirs('./datasubset/images')
                        exclude_classes = 43
                        if exclude_classes not in l[:, 0]:
                            ns += 1
                            # shutil.copy(src=self.img_files[i], dst='./datasubset/images/')  # copy image
                            with open('./datasubset/images.txt', 'a') as f:
                                f.write(self.img_files[i] + '\n')

                    # Extract object detection boxes for a second stage classifier
                    if extract_bounding_boxes:
                        p = Path(self.img_files[i])
                        img = cv2.imread(str(p))
                        h, w = img.shape[:2]
                        for j, x in enumerate(l):
                            f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent,
                                                              os.sep, os.sep,
                                                              x[0], j, p.name)
                            if not os.path.exists(Path(f).parent):
                                os.makedirs(
                                    Path(f).parent)  # make new output folder

                            b = x[1:] * [w, h, w, h]  # box
                            b[2:] = b[2:].max()  # rectangle to square
                            b[2:] = b[2:] * 1.3 + 30  # pad
                            b = xywh2xyxy(b.reshape(-1,
                                                    4)).ravel().astype(np.int)

                            b[[0,
                               2]] = np.clip(b[[0, 2]], 0,
                                             w)  # clip boxes outside of image
                            b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
                            assert cv2.imwrite(
                                f, img[b[1]:b[3], b[0]:b[2]]
                            ), 'Failure extracting classifier boxes'
                else:
                    ne += 1  # print('empty labels for image %s' % self.img_files[i])  # file empty
                    # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i]))  # remove

                pbar.desc = 'Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)' % (
                    nf, nm, ne, nd, n)
            assert nf > 0, 'No labels found in %s. See %s' % (
                os.path.dirname(file) + os.sep, help_url)

        # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
        if cache_images:  # if training
            gb = 0  # Gigabytes of cached images
            pbar = tqdm(range(len(self.img_files)), desc='Caching images')
            self.img_hw0, self.img_hw = [None] * n, [None] * n
            for i in pbar:  # max 10k images
                self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image(
                    self, i)  # img, hw_original, hw_resized
                gb += self.imgs[i].nbytes
                pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)

        # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3
        detect_corrupted_images = False
        if detect_corrupted_images:
            from skimage import io  # conda install -c conda-forge scikit-image
            for file in tqdm(self.img_files,
                             desc='Detecting corrupted images'):
                try:
                    _ = io.imread(file)
                except:
                    print('Corrupted image detected: %s' % file)

        ## Yolo LoadImagesAndLabels END

        self.depth_names = []
        for im in self.img_files:
            im = im.split(os.sep)
            im[3] = 'images'
            im[4] = im[4].replace(os.path.splitext(im[4])[-1], '.jpg')
            im = os.sep.join(im)
            # print(im, "hey brother")
            self.depth_names.append(im)

        # self.depth_names = [x.replace('images', 'depth_images').replace(os.path.splitext(x)[-1], '.png') for x in self.img_files]
        #self.img_path = inp_path
        #self.depth_path = depth_path
        self.transform = Compose([
            Resize(
                512,
                512,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="lower_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ])

        # midas dataset end

        self.plane_names = []
        self.plane_nps = []
        for im in self.img_files:
            im = im.split(os.sep)
            im[3] = 'inference'
            np_file = im.copy()
            im[4] = im[4].replace(
                os.path.splitext(im[4])[-1], '_segmentation_0_final.png')
            np_file[4] = np_file[4].replace(
                os.path.splitext(np_file[4])[-1], '.npz')
            im = os.sep.join(im)
            np_file = os.sep.join(np_file)
            self.plane_names.append(im)
            self.plane_nps.append(np_file)
def run(model_path):
    """
    Run MonoDepthNN to compute depth maps.
    """
    # set torch options
    torch.cuda.empty_cache()
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # select device
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    cam = cv2.VideoCapture(0)
    cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 360)
    cam.set(cv2.CAP_PROP_FPS, 30)

    while True:
        t = time.time()
        _, left_img = cam.read()
        image = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGB) / 255.0

        #  Apply transforms
        image = transform({"image": image})["image"]

        #  Predict and resize to original resolution
        with torch.no_grad():
            image = torch.from_numpy(image).to(device).unsqueeze(0)
            depth = model.forward(image)
            depth = (torch.nn.functional.interpolate(
                depth.unsqueeze(1),
                size=left_img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        depth_map = write_depth(depth, bits=2, reverse=False)

        right_img = generate_stereo(left_img, depth_map)
        anaglyph = overlap(left_img, right_img)

        cv2.imshow("anaglyph", anaglyph)

        fps = 1. / (time.time() - t)
        print('\rframerate: %f fps' % fps, end='')
        cv2.waitKey(1)
예제 #10
0
def run(model_path):
    """
    Run MonoDepthNN to compute depth maps.
    """
    # Input images
    img_list = os.listdir(args.input)
    img_list.sort()

    # output dir
    output_dir = './depth'
    os.makedirs(output_dir, exist_ok=True)

    # set torch options
    torch.cuda.empty_cache()
    torch.backends.cudnn.enabled = True
    torch.backends.cudnn.benchmark = True

    # select device
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    for idx in tqdm(range(len(img_list))):
        sample = img_list[idx]
        raw_image = cv2.imread(os.path.join(args.input, sample))
        raw_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0

        #  Apply transforms
        image = transform({"image": raw_image})["image"]

        #  Predict and resize to original resolution
        with torch.no_grad():
            image = torch.from_numpy(image).to(device).unsqueeze(0)
            prediction = model.forward(image)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=raw_image.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        depth_map = write_depth(prediction, bits=2, reverse=False)

        cv2.imwrite(
            os.path.join(output_dir,
                         'MiDaS_{}.png'.format(sample.split('.')[0])),
            depth_map)
예제 #11
0
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"

        # Setup AdaBins model
        self.adabins_nyu_infer_helper = InferenceHelper(dataset='nyu',
                                                        device=self.device)
        self.adabins_kitti_infer_helper = InferenceHelper(dataset='kitti',
                                                          device=self.device)

        # Setup DiverseDepth model
        class DiverseDepthArgs:
            def __init__(self):
                self.resume = False
                self.cfg_file = "lib/configs/resnext50_32x4d_diversedepth_regression_vircam"
                self.load_ckpt = "pretrained/DiverseDepth.pth"

        diverse_depth_args = DiverseDepthArgs()
        merge_cfg_from_file(diverse_depth_args)
        self.diverse_depth_model = RelDepthModel()
        self.diverse_depth_model.eval()
        # load checkpoint
        load_ckpt(diverse_depth_args, self.diverse_depth_model)
        # TODO: update this - see how `device` argument should be processsed
        if self.device == "cuda":
            self.diverse_depth_model.cuda()
        self.diverse_depth_model = torch.nn.DataParallel(
            self.diverse_depth_model)

        # Setup MiDaS model
        self.midas_model_path = "./pretrained/MiDaS_f6b98070.pt"
        midas_model_type = "large"

        # load network
        if midas_model_type == "large":
            self.midas_model = MidasNet(self.midas_model_path,
                                        non_negative=True)
            self.midas_net_w, self.midas_net_h = 384, 384
        elif midas_model_type == "small":
            self.midas_model = MidasNet_small(self.midas_model_path,
                                              features=64,
                                              backbone="efficientnet_lite3",
                                              exportable=True,
                                              non_negative=True,
                                              blocks={'expand': True})
            self.midas_net_w, self.midas_net_h = 256, 256

        self.midas_transform = Compose([
            Resize(
                self.midas_net_w,
                self.midas_net_h,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ])

        self.midas_model.eval()

        self.midas_optimize = True
        if self.midas_optimize == True:
            rand_example = torch.rand(1, 3, self.midas_net_h, self.midas_net_w)
            self.midas_model(rand_example)
            traced_script_module = torch.jit.trace(self.midas_model,
                                                   rand_example)
            self.midas_model = traced_script_module

            if self.device == "cuda":
                self.midas_model = self.midas_model.to(
                    memory_format=torch.channels_last)
                self.midas_model = self.midas_model.half()

        self.midas_model.to(torch.device(self.device))

        # Setup SGDepth model
        self.sgdepth_model = InferenceEngine.SgDepthInference()

        # Setup monodepth2 model
        self.monodepth2_model_path = "pretrained/monodepth2_mono+stereo_640x192"
        monodepth2_device = torch.device(self.device)
        encoder_path = os.path.join(self.monodepth2_model_path, "encoder.pth")
        depth_decoder_path = os.path.join(self.monodepth2_model_path,
                                          "depth.pth")

        # LOADING PRETRAINED MODEL
        print("   Loading Monodepth2 pretrained encoder")
        self.monodepth2_encoder = networks.ResnetEncoder(18, False)
        loaded_dict_enc = torch.load(encoder_path,
                                     map_location=monodepth2_device)

        # extract the height and width of image that this model was trained with
        self.feed_height = loaded_dict_enc['height']
        self.feed_width = loaded_dict_enc['width']
        filtered_dict_enc = {
            k: v
            for k, v in loaded_dict_enc.items()
            if k in self.monodepth2_encoder.state_dict()
        }
        self.monodepth2_encoder.load_state_dict(filtered_dict_enc)
        self.monodepth2_encoder.to(monodepth2_device)
        self.monodepth2_encoder.eval()

        print("   Loading pretrained decoder")
        self.monodepth2_depth_decoder = networks.DepthDecoder(
            num_ch_enc=self.monodepth2_encoder.num_ch_enc, scales=range(4))

        loaded_dict = torch.load(depth_decoder_path,
                                 map_location=monodepth2_device)
        self.monodepth2_depth_decoder.load_state_dict(loaded_dict)

        self.monodepth2_depth_decoder.to(monodepth2_device)
        self.monodepth2_depth_decoder.eval()