예제 #1
0
파일: run.py 프로젝트: safijari/MiDaS
    def __init__(self, model_path, device_name='cuda'):
        if device_name == 'cuda' and not torch.cuda.is_available():
            print("WARN: cuda was selected as device but was not found")
            device_name = 'cpu'
        self.device = torch.device(device_name)

        print(f"device: {device_name}")
        self.model = MidasNet(model_path, non_negative=True)

        self.preprocessor = Compose([
            Resize(
                384,
                384,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ])

        self.model.to(self.device)
        self.model.eval()
예제 #2
0
def MiDaS(pretrained=True, **kwargs):
    """ # This docstring shows up in hub.help()
    MiDaS model for monocular depth estimation
    pretrained (bool): load pretrained weights into model
    """

    model = MidasNet()

    if pretrained:
        checkpoint = "https://github.com/intel-isl/MiDaS/releases/download/v2/model.pt"
        state_dict = torch.hub.load_state_dict_from_url(checkpoint,
                                                        progress=True)
        model.load_state_dict(state_dict)

    return model
예제 #3
0
파일: run.py 프로젝트: safijari/MiDaS
class Runner:
    def __init__(self, model_path, device_name='cuda'):
        if device_name == 'cuda' and not torch.cuda.is_available():
            print("WARN: cuda was selected as device but was not found")
            device_name = 'cpu'
        self.device = torch.device(device_name)

        print(f"device: {device_name}")
        self.model = MidasNet(model_path, non_negative=True)

        self.preprocessor = Compose([
            Resize(
                384,
                384,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ])

        self.model.to(self.device)
        self.model.eval()

    def predict_depth(self, img_rgb):
        img_input = self.preprocessor({"image": img_rgb / 255.0})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(self.device).unsqueeze(0)
            prediction = self.model.forward(sample)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img_rgb.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        return prediction

    def weighted_filtering(self, rgb_image, depth_image):
        return cv2.ximgproc.weightedMedianFilter(rgb_image,
                                                 depth_image.astype('float32'),
                                                 5, 15)
예제 #4
0
파일: demo.py 프로젝트: eight0153/MiDaS
def main(image_path, model_path='model.pt', output_path=None):
    print("Loading model...")
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    model = MidasNet(model_path, non_negative=True)
    model.to(device)
    model.load_state_dict(torch.load(model_path, map_location="cpu"))
    model.eval()

    print("Creating depth maps...")
    rgb_path = os.path.abspath(image_path)

    if os.path.isdir(rgb_path):
        for file in os.listdir(rgb_path):
            test(model, os.path.join(rgb_path, file), output_path)
    else:
        test(model, rgb_path, output_path)

    print("Done.")
예제 #5
0
def run(basedir, input_path, output_path, model_path, resize_height=288):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    img0 = [os.path.join(basedir, 'images', f) \
            for f in sorted(os.listdir(os.path.join(basedir, 'images'))) \
            if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')][0]
    sh = cv2.imread(img0).shape
    height = resize_height
    factor = sh[0] / float(height)
    width = int(round(sh[1] / factor))
    _minify(basedir, resolutions=[[height, width]])

    # select device
    device = torch.device("cuda")
    print("device: %s" % device)

    small_img_dir = input_path + '_*x' + str(resize_height) + '/'
    print(small_img_dir)

    small_img_path = sorted(glob.glob(glob.glob(small_img_dir)[0] +
                                      '/*.png'))[0]

    small_img = cv2.imread(small_img_path)

    print('small_img', small_img.shape)

    # Portrait Orientation
    if small_img.shape[0] > small_img.shape[1]:
        input_h = 640
        input_w = int(
            round(float(input_h) / small_img.shape[0] * small_img.shape[1]))
    # Landscape Orientation
    else:
        input_w = 640
        input_h = int(
            round(float(input_w) / small_img.shape[1] * small_img.shape[0]))

    print('Monocular depth input_w %d input_h %d ' % (input_w, input_h))

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform_1 = Compose([
        Resize(
            input_w,
            input_h,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_AREA,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    # get input
    img_names = sorted(glob.glob(os.path.join(input_path, "*")))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind in range(len(img_names)):

        img_name = img_names[ind]
        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))
        # input
        img = read_image(img_name)
        img_input_1 = transform_1({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample_1 = torch.from_numpy(img_input_1).to(device).unsqueeze(0)
            prediction = model.forward(sample_1)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=[small_img.shape[0], small_img.shape[1]],
                mode="nearest",
            ).squeeze().cpu().numpy())

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])

        if VIZ:
            if not os.path.exists('./midas_otuputs'):
                os.makedirs('./midas_otuputs')

            plt.figure(figsize=(12, 6))
            plt.subplot(1, 2, 1)
            plt.imshow(img)
            plt.subplot(1, 2, 2)
            plt.imshow(prediction, cmap='jet')
            plt.savefig('./midas_otuputs/%s' % (img_name.split('/')[-1]))
            plt.close()

        print(filename + '.npy')
        np.save(filename + '.npy', prediction.astype(np.float32))

    print("finished")
예제 #6
0
파일: run.py 프로젝트: stalin18/MiDaS
def run(input_path, output_path, model_path):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose([
        Resize(
            384,
            384,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.to(device)
    model.eval()

    # get input
    img_names = glob.glob(os.path.join(input_path, "*"))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind, img_name in enumerate(img_names):

        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))

        # input

        img = utils.read_image(img_name)
        img_input = transform({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
            prediction = model.forward(sample)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])
        utils.write_depth(filename, prediction, bits=2)

    print("finished")
def run_only_midas(input_path, output_path, model_path, optimize=True):
    """Run MonoDepthNN to compute depth maps.

    Args:
        input_path (str): path to input folder
        output_path (str): path to output folder
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    model = MidasNet(model_path, non_negative=True)
    # net_w, net_h = 384, 384
    net_w, net_h = 416, 416

    transform = Compose([
        Resize(
            net_w,
            net_h,
            resize_target=None,
            keep_aspect_ratio=True,
            ensure_multiple_of=32,
            resize_method="upper_bound",
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    model.eval()

    if optimize == True:
        rand_example = torch.rand(1, 3, net_h, net_w)
        model(rand_example)
        traced_script_module = torch.jit.trace(model, rand_example)
        model = traced_script_module

        if device == torch.device("cuda"):
            model = model.to(memory_format=torch.channels_last)
            model = model.half()

    model.to(device)

    # get input
    img_names = glob.glob(os.path.join(input_path, "*"))
    num_images = len(img_names)

    # create output folder
    os.makedirs(output_path, exist_ok=True)

    print("start processing")

    for ind, img_name in enumerate(img_names):

        print("  processing {} ({}/{})".format(img_name, ind + 1, num_images))

        # input

        img = read_image(img_name)
        img_input = transform({"image": img})["image"]

        # compute
        with torch.no_grad():
            sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
            if optimize == True and device == torch.device("cuda"):
                sample = sample.to(memory_format=torch.channels_last)
                sample = sample.half()
            prediction = model.forward(sample)
            prediction = (torch.nn.functional.interpolate(
                prediction.unsqueeze(1),
                size=img.shape[:2],
                mode="bicubic",
                align_corners=False,
            ).squeeze().cpu().numpy())

        # ToDo Remove this......
        # break

        # output
        filename = os.path.join(
            output_path,
            os.path.splitext(os.path.basename(img_name))[0])
        write_depth(filename, prediction, bits=2)

        # # Delete the processed file from input folder -- TEST
        # os.remove(img_name)

    print("finished")
예제 #8
0
def run(model_path):
    """Run MonoDepthNN to compute depth maps.

    Args:
        model_path (str): path to saved model
    """
    print("initialize")

    # select device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: %s" % device)

    # load network
    model = MidasNet(model_path, non_negative=True)

    transform = Compose(
        [
            Resize(
                384,
                384,
                resize_target=None,
                keep_aspect_ratio=True,
                ensure_multiple_of=32,
                resize_method="upper_bound",
                image_interpolation_method=cv2.INTER_CUBIC,
            ),
            NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            PrepareForNet(),
        ]
    )

    model.to(device)
    model.eval()

    cap = cv2.VideoCapture(1)
    print("is camera open", cap.isOpened())
    cap.set(3,320)
    cap.set(4,240)
    print("start processing")

    i = 0
    while cap.isOpened():
        start = time.time()
        ret, frame = cap.read()
        print("new frame", ret)
        p1 = time.time()
        print(f"take a picture {p1 - start}")
        if ret:
            img = utils.process_camera_img(frame)
            img_input = transform({"image": img})["image"]
            p2 = time.time()
            print(f"transoform image {p2 - p1}")
            # compute
            with torch.no_grad():
                sample = torch.from_numpy(img_input).to(device).unsqueeze(0)
                p3 = time.time()
                print(f"from numpy to cuda {p3 - p2}")
                prediction = model.forward(sample)
                p4 = time.time()
                print(f"prediction {p4 - p3}")
                prediction = (
                    torch.nn.functional.interpolate(
                        prediction.unsqueeze(1),
                        size=img.shape[:2],
                        mode="bicubic",
                        align_corners=False,
                    )
                    .squeeze()
                    .cpu()
                    .numpy()
                )
                p5 = time.time()
                print(f"prediction from cuda to cpu {p5 - p4}")


            # output

            r = random.randint(0, 10000)
            cv2.imwrite(f"output/input-{i}-{r}.png", frame)
            utils.write_depth(f"output/depth-{i}-{r}", prediction, bits=2)
            p6 = time.time()
            print(f"save input and write depth {p6 - p5}")

            cv2.imshow('frame', frame)
            cv2.imshow('prediction', prediction)
            p7 = time.time()
            print(f"show images {p7 - p6}")
            i += 1

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        else:
            print("Camera is not recording")
        print(f"image took {time.time() - start} s")
        print("\n-----------------------\n")

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()

    print("finished")