def __init__(self, model_path, device_name='cuda'): if device_name == 'cuda' and not torch.cuda.is_available(): print("WARN: cuda was selected as device but was not found") device_name = 'cpu' self.device = torch.device(device_name) print(f"device: {device_name}") self.model = MidasNet(model_path, non_negative=True) self.preprocessor = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) self.model.to(self.device) self.model.eval()
def MiDaS(pretrained=True, **kwargs): """ # This docstring shows up in hub.help() MiDaS model for monocular depth estimation pretrained (bool): load pretrained weights into model """ model = MidasNet() if pretrained: checkpoint = "https://github.com/intel-isl/MiDaS/releases/download/v2/model.pt" state_dict = torch.hub.load_state_dict_from_url(checkpoint, progress=True) model.load_state_dict(state_dict) return model
class Runner: def __init__(self, model_path, device_name='cuda'): if device_name == 'cuda' and not torch.cuda.is_available(): print("WARN: cuda was selected as device but was not found") device_name = 'cpu' self.device = torch.device(device_name) print(f"device: {device_name}") self.model = MidasNet(model_path, non_negative=True) self.preprocessor = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) self.model.to(self.device) self.model.eval() def predict_depth(self, img_rgb): img_input = self.preprocessor({"image": img_rgb / 255.0})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(self.device).unsqueeze(0) prediction = self.model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img_rgb.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) return prediction def weighted_filtering(self, rgb_image, depth_image): return cv2.ximgproc.weightedMedianFilter(rgb_image, depth_image.astype('float32'), 5, 15)
def main(image_path, model_path='model.pt', output_path=None): print("Loading model...") device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = MidasNet(model_path, non_negative=True) model.to(device) model.load_state_dict(torch.load(model_path, map_location="cpu")) model.eval() print("Creating depth maps...") rgb_path = os.path.abspath(image_path) if os.path.isdir(rgb_path): for file in os.listdir(rgb_path): test(model, os.path.join(rgb_path, file), output_path) else: test(model, rgb_path, output_path) print("Done.")
def run(basedir, input_path, output_path, model_path, resize_height=288): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") img0 = [os.path.join(basedir, 'images', f) \ for f in sorted(os.listdir(os.path.join(basedir, 'images'))) \ if f.endswith('JPG') or f.endswith('jpg') or f.endswith('png')][0] sh = cv2.imread(img0).shape height = resize_height factor = sh[0] / float(height) width = int(round(sh[1] / factor)) _minify(basedir, resolutions=[[height, width]]) # select device device = torch.device("cuda") print("device: %s" % device) small_img_dir = input_path + '_*x' + str(resize_height) + '/' print(small_img_dir) small_img_path = sorted(glob.glob(glob.glob(small_img_dir)[0] + '/*.png'))[0] small_img = cv2.imread(small_img_path) print('small_img', small_img.shape) # Portrait Orientation if small_img.shape[0] > small_img.shape[1]: input_h = 640 input_w = int( round(float(input_h) / small_img.shape[0] * small_img.shape[1])) # Landscape Orientation else: input_w = 640 input_h = int( round(float(input_w) / small_img.shape[1] * small_img.shape[0])) print('Monocular depth input_w %d input_h %d ' % (input_w, input_h)) # load network model = MidasNet(model_path, non_negative=True) transform_1 = Compose([ Resize( input_w, input_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_AREA, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = sorted(glob.glob(os.path.join(input_path, "*"))) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind in range(len(img_names)): img_name = img_names[ind] print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = read_image(img_name) img_input_1 = transform_1({"image": img})["image"] # compute with torch.no_grad(): sample_1 = torch.from_numpy(img_input_1).to(device).unsqueeze(0) prediction = model.forward(sample_1) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=[small_img.shape[0], small_img.shape[1]], mode="nearest", ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) if VIZ: if not os.path.exists('./midas_otuputs'): os.makedirs('./midas_otuputs') plt.figure(figsize=(12, 6)) plt.subplot(1, 2, 1) plt.imshow(img) plt.subplot(1, 2, 2) plt.imshow(prediction, cmap='jet') plt.savefig('./midas_otuputs/%s' % (img_name.split('/')[-1])) plt.close() print(filename + '.npy') np.save(filename + '.npy', prediction.astype(np.float32)) print("finished")
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print("finished")
def run_only_midas(input_path, output_path, model_path, optimize=True): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) model = MidasNet(model_path, non_negative=True) # net_w, net_h = 384, 384 net_w, net_h = 416, 416 transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.eval() if optimize == True: rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # ToDo Remove this...... # break # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) write_depth(filename, prediction, bits=2) # # Delete the processed file from input folder -- TEST # os.remove(img_name) print("finished")
def run(model_path): """Run MonoDepthNN to compute depth maps. Args: model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose( [ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.to(device) model.eval() cap = cv2.VideoCapture(1) print("is camera open", cap.isOpened()) cap.set(3,320) cap.set(4,240) print("start processing") i = 0 while cap.isOpened(): start = time.time() ret, frame = cap.read() print("new frame", ret) p1 = time.time() print(f"take a picture {p1 - start}") if ret: img = utils.process_camera_img(frame) img_input = transform({"image": img})["image"] p2 = time.time() print(f"transoform image {p2 - p1}") # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) p3 = time.time() print(f"from numpy to cuda {p3 - p2}") prediction = model.forward(sample) p4 = time.time() print(f"prediction {p4 - p3}") prediction = ( torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ) .squeeze() .cpu() .numpy() ) p5 = time.time() print(f"prediction from cuda to cpu {p5 - p4}") # output r = random.randint(0, 10000) cv2.imwrite(f"output/input-{i}-{r}.png", frame) utils.write_depth(f"output/depth-{i}-{r}", prediction, bits=2) p6 = time.time() print(f"save input and write depth {p6 - p5}") cv2.imshow('frame', frame) cv2.imshow('prediction', prediction) p7 = time.time() print(f"show images {p7 - p6}") i += 1 if cv2.waitKey(1) & 0xFF == ord('q'): break else: print("Camera is not recording") print(f"image took {time.time() - start} s") print("\n-----------------------\n") # When everything done, release the capture cap.release() cv2.destroyAllWindows() print("finished")