def init_model(transform): # set torch options torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True model_path = "../MiDaS/model-f46da743.pt" print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose( [ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.to(device) model.eval() return (model, transform, device), None
def transforms(): import cv2 from torchvision.transforms import Compose from midas.transforms import Resize, NormalizeImage, PrepareForNet from midas import transforms transforms.default_transform = Compose([ lambda img: { "image": img / 255.0 }, Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), ]) transforms.small_transform = Compose([ lambda img: { "image": img / 255.0 }, Resize( 256, 256, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), ]) return transforms
def __init__(self, model_type, model_path, optimize): print("initialize") # select device self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % self.device) # load network if model_type == "large": self.model = MidasNet(model_path, non_negative=True) self.net_w, self.net_h = 384, 384 elif model_type == "small": self.model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) self.net_w, self.net_h = 256, 256 else: print(f"model_type '{model_type}' not implemented, use: --model_type large") assert False self.transform = Compose( [ Resize( self.net_w, self.net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) self.model.eval() self.optimize = optimize if self.optimize: rand_example = torch.rand(1, 3, self.net_h, self.net_w) self.model(rand_example) traced_script_module = torch.jit.trace(self.model, rand_example) self.model = traced_script_module if self.device == torch.device("cuda"): self.model = self.model.to(memory_format=torch.channels_last) self.model = self.model.half() self.model.to(self.device)
def depth_processor(ie): transform = Compose([ Resize( 800, 800, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) net = ie.read_network(model=midas_ir, weights=midas_ir[:-3] + 'bin') input_layer = next(iter(net.input_info)) output_layer = next(iter(net.outputs)) n, c, _, _ = net.input_info[input_layer].input_data.shape net.reshape({input_layer: (n, c, 384, 384)}) exec_net = ie.load_network(network=net, device_name=DEVICE) return transform, exec_net, input_layer, output_layer
def init_model(transform): parser = argparse.ArgumentParser() parser.add_argument('-mw', '--model_weights', default='model-f6b98070.pt', help='path to the trained weights of model' ) parser.add_argument('-mt', '--model_type', default='large', help='model type: large or small' ) parser.add_argument('--optimize', dest='optimize', action='store_true') parser.add_argument('--no-optimize', dest='optimize', action='store_false') parser.set_defaults(optimize=True) args, unknown = parser.parse_known_args() # set torch options torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if args.model_type == "large": model_path = "../MiDaS/"+args.model_weights model = MidasNet(model_path, non_negative=True) net_w, net_h = 384, 384 elif args.model_type == "small": if "small" not in args.model_weights: args.model_weights = "model-small-70d6b9c8.pt" model_path = "../MiDaS/"+args.model_weights model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) net_w, net_h = 256, 256 else: print(f"model_type '{model_type}' not implemented, use: --model_type large") assert False transform = Compose( [ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ] ) model.eval() if args.optimize==True: rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) return (model, transform, device, args.optimize), args
def run(input_path, output_path, model_path, model_type="large", optimize=True): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network if model_type == "large": model = MidasNet(model_path, non_negative=True) net_w, net_h = 384, 384 elif model_type == "small": model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) net_w, net_h = 256, 256 else: print( f"model_type '{model_type}' not implemented, use: --model_type large" ) assert False transform = Compose([ Resize( net_w, net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.eval() if optimize == True: rand_example = torch.rand(1, 3, net_h, net_w) model(rand_example) traced_script_module = torch.jit.trace(model, rand_example) model = traced_script_module if device == torch.device("cuda"): model = model.to(memory_format=torch.channels_last) model = model.half() model.to(device) # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) if optimize == True and device == torch.device("cuda"): sample = sample.to(memory_format=torch.channels_last) sample = sample.half() prediction = model.forward(sample) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) prediction /= 1000 # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print(prediction) print(prediction.shape) print("finished")
def run(input_path, output_path, model_path): """Run MonoDepthNN to compute depth maps. Args: input_path (str): path to input folder output_path (str): path to output folder model_path (str): path to saved model """ print("initialize") # select device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("device: %s" % device) # load network model = Mynet(model_path, non_negative=True) model.inference = True transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() # get input img_names = glob.glob(os.path.join(input_path, "*")) num_images = len(img_names) # create output folder os.makedirs(output_path, exist_ok=True) print("start processing") for ind, img_name in enumerate(img_names): print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) # input img = utils.read_image(img_name) img_input = transform({"image": img})["image"] # compute with torch.no_grad(): sample = torch.from_numpy(img_input).to(device).unsqueeze(0) prediction, _ = model.forward( sample) # the model outputs depth_images and yolo_layers prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) # output filename = os.path.join( output_path, os.path.splitext(os.path.basename(img_name))[0]) utils.write_depth(filename, prediction, bits=2) print("finished")
def __init__(self, planercnn_params, yolo_params, midas_params): self.random = True ## InferenceDataset start self.options = planercnn_params['options'] self.config = planercnn_params['config'] self.random = planercnn_params['random'] #self.camera = camera # self.imagePaths = image_list self.anchors = generate_pyramid_anchors(self.config.RPN_ANCHOR_SCALES, self.config.RPN_ANCHOR_RATIOS, self.config.BACKBONE_SHAPES, self.config.BACKBONE_STRIDES, self.config.RPN_ANCHOR_STRIDE) # image_list = glob.glob(self.options.customDataFolder + '/*.png') + glob.glob(self.options.customDataFolder + '/*.jpg') # print(image_list) if os.path.exists(self.options.customDataFolder + '/camera.txt'): self.camera = np.zeros(6) with open(self.options.customDataFolder + '/camera.txt', 'r') as f: for line in f: values = [ float(token.strip()) for token in line.split(' ') if token.strip() != '' ] for c in range(6): self.camera[c] = values[c] continue break pass else: self.camera = [ filename.replace('.png', '.txt').replace('.jpg', '.txt') for filename in image_list ] pass #return ## InferenceDataset END ## Yolo LoadImagesAndLabels Start path = yolo_params['path'] img_size = yolo_params.get('img_size', 416) batch_size = yolo_params.get('batch_size', 16) augment = yolo_params.get('augment', False) hyp = yolo_params.get('hyp', None) rect = yolo_params.get('rect', False) image_weights = yolo_params.get('image_weights', False) cache_labels = yolo_params.get('cache_labels', True) cache_images = yolo_params.get('cache_images', True) single_cls = yolo_params.get('single_cls', False) path = str(Path(path)) # os-agnostic assert os.path.isfile(path), 'File not found %s. See %s' % (path, help_url) with open(path, 'r') as f: self.img_files = [ x.replace('/', os.sep) for x in f.read().splitlines() # os-agnostic if os.path.splitext(x)[-1].lower() in img_formats ] rm = 'images/7.-With-A-Puffy-Jacket-Boots-And-A-Belt.jpg' if rm in self.img_files: self.img_files.remove(rm) self.imagePaths = self.img_files self.Yolo_transform = Compose([ Resize( 512, 512, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="lower_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) n = len(self.img_files) #print('No of images found:',n) if n > 500: np.savetxt('img_files.txt', self.img_files, delimiter="\n", fmt="%s") # print(batch_size, "batch_size") assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index nb = bi[-1] + 1 # number of batches self.n = n # print(bi,"Hey Batch") self.batch = bi # batch index of image self.img_size = img_size self.augment = augment self.hyp = hyp self.image_weights = image_weights self.rect = False if image_weights else rect self.mosaic = False #self.augment and not self.rect # load 4 images at a time into a mosaic (only during training) # Define labels # self.label_files=[] # for x in self.img_files: # x = x.split(os.sep) # x[3]= 'labels' # x[4] = x[4].replace(os.path.splitext(x[4])[-1], '.txt') # x = os.sep.join(x) # self.label_files.append(x) self.label_files = [ x.replace('images', 'labels').replace(os.path.splitext(x)[-1], '.txt') for x in self.img_files ] # Rectangular Training https://github.com/ultralytics/yolov3/issues/232 if self.rect: # Read image shapes (wh) sp = path.replace('.txt', '.shapes') # shapefile path try: with open(sp, 'r') as f: # read existing shapefile s = [x.split() for x in f.read().splitlines()] assert len(s) == n, 'Shapefile out of sync' except: s = [ exif_size(Image.open(f)) for f in tqdm(self.img_files, desc='Reading image shapes') ] np.savetxt(sp, s, fmt='%g') # overwrites existing (if any) # Sort by aspect ratio s = np.array(s, dtype=np.float64) ar = s[:, 1] / s[:, 0] # aspect ratio i = ar.argsort() self.img_files = [self.img_files[i] for i in i] self.label_files = [self.label_files[i] for i in i] self.shapes = s[i] # wh ar = ar[i] # Set training image shapes shapes = [[1, 1]] * nb for i in range(nb): ari = ar[bi == i] mini, maxi = ari.min(), ari.max() if maxi < 1: shapes[i] = [maxi, 1] elif mini > 1: shapes[i] = [1, 1 / mini] self.batch_shapes = np.ceil( np.array(shapes) * img_size / 64.).astype(np.int) * 64 # Preload labels (required for weighted CE training) self.imgs = [None] * n self.labels = [None] * n if cache_labels or image_weights: # cache labels for faster training self.labels = [np.zeros((0, 5))] * n extract_bounding_boxes = False create_datasubset = False pbar = tqdm(self.label_files, desc='Caching labels') nm, nf, ne, ns, nd = 0, 0, 0, 0, 0 # number missing, found, empty, datasubset, duplicate for i, file in enumerate(pbar): try: with open(file, 'r') as f: l = np.array( [x.split() for x in f.read().splitlines()], dtype=np.float32) except: nm += 1 # print('missing labels for image %s' % self.img_files[i]) # file missing continue if l.shape[0]: assert l.shape[1] == 5, '> 5 label columns: %s' % file assert (l >= 0).all(), 'negative labels: %s' % file assert (l[:, 1:] <= 1).all( ), 'non-normalized or out of bounds coordinate labels: %s' % file if np.unique( l, axis=0).shape[0] < l.shape[0]: # duplicate rows nd += 1 # print('WARNING: duplicate rows in %s' % self.label_files[i]) # duplicate rows if single_cls: l[:, 0] = 0 # force dataset into single-class mode self.labels[i] = l nf += 1 # file found # Create subdataset (a smaller dataset) if create_datasubset and ns < 1E4: if ns == 0: create_folder(path='./datasubset') os.makedirs('./datasubset/images') exclude_classes = 43 if exclude_classes not in l[:, 0]: ns += 1 # shutil.copy(src=self.img_files[i], dst='./datasubset/images/') # copy image with open('./datasubset/images.txt', 'a') as f: f.write(self.img_files[i] + '\n') # Extract object detection boxes for a second stage classifier if extract_bounding_boxes: p = Path(self.img_files[i]) img = cv2.imread(str(p)) h, w = img.shape[:2] for j, x in enumerate(l): f = '%s%sclassifier%s%g_%g_%s' % (p.parent.parent, os.sep, os.sep, x[0], j, p.name) if not os.path.exists(Path(f).parent): os.makedirs( Path(f).parent) # make new output folder b = x[1:] * [w, h, w, h] # box b[2:] = b[2:].max() # rectangle to square b[2:] = b[2:] * 1.3 + 30 # pad b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int) b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image b[[1, 3]] = np.clip(b[[1, 3]], 0, h) assert cv2.imwrite( f, img[b[1]:b[3], b[0]:b[2]] ), 'Failure extracting classifier boxes' else: ne += 1 # print('empty labels for image %s' % self.img_files[i]) # file empty # os.system("rm '%s' '%s'" % (self.img_files[i], self.label_files[i])) # remove pbar.desc = 'Caching labels (%g found, %g missing, %g empty, %g duplicate, for %g images)' % ( nf, nm, ne, nd, n) assert nf > 0, 'No labels found in %s. See %s' % ( os.path.dirname(file) + os.sep, help_url) # Cache images into memory for faster training (WARNING: large datasets may exceed system RAM) if cache_images: # if training gb = 0 # Gigabytes of cached images pbar = tqdm(range(len(self.img_files)), desc='Caching images') self.img_hw0, self.img_hw = [None] * n, [None] * n for i in pbar: # max 10k images self.imgs[i], self.img_hw0[i], self.img_hw[i] = load_image( self, i) # img, hw_original, hw_resized gb += self.imgs[i].nbytes pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9) # Detect corrupted images https://medium.com/joelthchao/programmatically-detect-corrupted-image-8c1b2006c3d3 detect_corrupted_images = False if detect_corrupted_images: from skimage import io # conda install -c conda-forge scikit-image for file in tqdm(self.img_files, desc='Detecting corrupted images'): try: _ = io.imread(file) except: print('Corrupted image detected: %s' % file) ## Yolo LoadImagesAndLabels END self.depth_names = [] for im in self.img_files: im = im.split(os.sep) im[3] = 'images' im[4] = im[4].replace(os.path.splitext(im[4])[-1], '.jpg') im = os.sep.join(im) # print(im, "hey brother") self.depth_names.append(im) # self.depth_names = [x.replace('images', 'depth_images').replace(os.path.splitext(x)[-1], '.png') for x in self.img_files] #self.img_path = inp_path #self.depth_path = depth_path self.transform = Compose([ Resize( 512, 512, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="lower_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) # midas dataset end self.plane_names = [] self.plane_nps = [] for im in self.img_files: im = im.split(os.sep) im[3] = 'inference' np_file = im.copy() im[4] = im[4].replace( os.path.splitext(im[4])[-1], '_segmentation_0_final.png') np_file[4] = np_file[4].replace( os.path.splitext(np_file[4])[-1], '.npz') im = os.sep.join(im) np_file = os.sep.join(np_file) self.plane_names.append(im) self.plane_nps.append(np_file)
def run(model_path): """ Run MonoDepthNN to compute depth maps. """ # set torch options torch.cuda.empty_cache() torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # select device device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() cam = cv2.VideoCapture(0) cam.set(cv2.CAP_PROP_FRAME_WIDTH, 640) cam.set(cv2.CAP_PROP_FRAME_HEIGHT, 360) cam.set(cv2.CAP_PROP_FPS, 30) while True: t = time.time() _, left_img = cam.read() image = cv2.cvtColor(left_img, cv2.COLOR_BGR2RGB) / 255.0 # Apply transforms image = transform({"image": image})["image"] # Predict and resize to original resolution with torch.no_grad(): image = torch.from_numpy(image).to(device).unsqueeze(0) depth = model.forward(image) depth = (torch.nn.functional.interpolate( depth.unsqueeze(1), size=left_img.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) depth_map = write_depth(depth, bits=2, reverse=False) right_img = generate_stereo(left_img, depth_map) anaglyph = overlap(left_img, right_img) cv2.imshow("anaglyph", anaglyph) fps = 1. / (time.time() - t) print('\rframerate: %f fps' % fps, end='') cv2.waitKey(1)
def run(model_path): """ Run MonoDepthNN to compute depth maps. """ # Input images img_list = os.listdir(args.input) img_list.sort() # output dir output_dir = './depth' os.makedirs(output_dir, exist_ok=True) # set torch options torch.cuda.empty_cache() torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = True # select device device = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") print("device: %s" % device) # load network model = MidasNet(model_path, non_negative=True) transform = Compose([ Resize( 384, 384, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) model.to(device) model.eval() for idx in tqdm(range(len(img_list))): sample = img_list[idx] raw_image = cv2.imread(os.path.join(args.input, sample)) raw_image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0 # Apply transforms image = transform({"image": raw_image})["image"] # Predict and resize to original resolution with torch.no_grad(): image = torch.from_numpy(image).to(device).unsqueeze(0) prediction = model.forward(image) prediction = (torch.nn.functional.interpolate( prediction.unsqueeze(1), size=raw_image.shape[:2], mode="bicubic", align_corners=False, ).squeeze().cpu().numpy()) depth_map = write_depth(prediction, bits=2, reverse=False) cv2.imwrite( os.path.join(output_dir, 'MiDaS_{}.png'.format(sample.split('.')[0])), depth_map)
def __init__(self): self.device = "cuda" if torch.cuda.is_available() else "cpu" # Setup AdaBins model self.adabins_nyu_infer_helper = InferenceHelper(dataset='nyu', device=self.device) self.adabins_kitti_infer_helper = InferenceHelper(dataset='kitti', device=self.device) # Setup DiverseDepth model class DiverseDepthArgs: def __init__(self): self.resume = False self.cfg_file = "lib/configs/resnext50_32x4d_diversedepth_regression_vircam" self.load_ckpt = "pretrained/DiverseDepth.pth" diverse_depth_args = DiverseDepthArgs() merge_cfg_from_file(diverse_depth_args) self.diverse_depth_model = RelDepthModel() self.diverse_depth_model.eval() # load checkpoint load_ckpt(diverse_depth_args, self.diverse_depth_model) # TODO: update this - see how `device` argument should be processsed if self.device == "cuda": self.diverse_depth_model.cuda() self.diverse_depth_model = torch.nn.DataParallel( self.diverse_depth_model) # Setup MiDaS model self.midas_model_path = "./pretrained/MiDaS_f6b98070.pt" midas_model_type = "large" # load network if midas_model_type == "large": self.midas_model = MidasNet(self.midas_model_path, non_negative=True) self.midas_net_w, self.midas_net_h = 384, 384 elif midas_model_type == "small": self.midas_model = MidasNet_small(self.midas_model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) self.midas_net_w, self.midas_net_h = 256, 256 self.midas_transform = Compose([ Resize( self.midas_net_w, self.midas_net_h, resize_target=None, keep_aspect_ratio=True, ensure_multiple_of=32, resize_method="upper_bound", image_interpolation_method=cv2.INTER_CUBIC, ), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), PrepareForNet(), ]) self.midas_model.eval() self.midas_optimize = True if self.midas_optimize == True: rand_example = torch.rand(1, 3, self.midas_net_h, self.midas_net_w) self.midas_model(rand_example) traced_script_module = torch.jit.trace(self.midas_model, rand_example) self.midas_model = traced_script_module if self.device == "cuda": self.midas_model = self.midas_model.to( memory_format=torch.channels_last) self.midas_model = self.midas_model.half() self.midas_model.to(torch.device(self.device)) # Setup SGDepth model self.sgdepth_model = InferenceEngine.SgDepthInference() # Setup monodepth2 model self.monodepth2_model_path = "pretrained/monodepth2_mono+stereo_640x192" monodepth2_device = torch.device(self.device) encoder_path = os.path.join(self.monodepth2_model_path, "encoder.pth") depth_decoder_path = os.path.join(self.monodepth2_model_path, "depth.pth") # LOADING PRETRAINED MODEL print(" Loading Monodepth2 pretrained encoder") self.monodepth2_encoder = networks.ResnetEncoder(18, False) loaded_dict_enc = torch.load(encoder_path, map_location=monodepth2_device) # extract the height and width of image that this model was trained with self.feed_height = loaded_dict_enc['height'] self.feed_width = loaded_dict_enc['width'] filtered_dict_enc = { k: v for k, v in loaded_dict_enc.items() if k in self.monodepth2_encoder.state_dict() } self.monodepth2_encoder.load_state_dict(filtered_dict_enc) self.monodepth2_encoder.to(monodepth2_device) self.monodepth2_encoder.eval() print(" Loading pretrained decoder") self.monodepth2_depth_decoder = networks.DepthDecoder( num_ch_enc=self.monodepth2_encoder.num_ch_enc, scales=range(4)) loaded_dict = torch.load(depth_decoder_path, map_location=monodepth2_device) self.monodepth2_depth_decoder.load_state_dict(loaded_dict) self.monodepth2_depth_decoder.to(monodepth2_device) self.monodepth2_depth_decoder.eval()