def get_vram_free(): """ Return total free VRAM on largest card """ stats = GPUStats() vram = stats.get_card_most_free() logger.verbose("Using device %s with %sMB free of %sMB", vram["device"], int(vram["free"]), int(vram["total"])) return int(vram["free"])
def _set_parallel_processing(self, multiprocess): """ Set whether to run detect and align together or separately """ if self._detector.vram == 0 or self._aligner.vram == 0: logger.debug( "At least one of aligner or detector have no VRAM requirement. " "Enabling parallel processing.") return True if not multiprocess: logger.debug("Parallel processing disabled by cli.") return False gpu_stats = GPUStats() if gpu_stats.device_count == 0: logger.debug("No GPU detected. Enabling parallel processing.") return True if get_backend() == "amd": logger.debug("Parallel processing discabled by amd") return False vram_required = self._detector.vram + self._aligner.vram + self._vram_buffer stats = gpu_stats.get_card_most_free() vram_free = int(stats["free"]) logger.verbose("%s - %sMB free of %sMB", stats["device"], vram_free, int(stats["total"])) if vram_free <= vram_required: logger.warning("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def set_parallel_processing(self): """ Set whether to run detect and align together or seperately """ detector_vram = self.detector.vram aligner_vram = self.aligner.vram gpu_stats = GPUStats() if (detector_vram == 0 or aligner_vram == 0 or gpu_stats.device_count == 0): return True if hasattr(self.args, "multiprocess") and not self.args.multiprocess: print("\nNB: Parallel processing disabled.\nYou may get faster " "extraction speeds by enabling it with the -mp switch\n") return False required_vram = detector_vram + aligner_vram + 320 # 320MB buffer stats = gpu_stats.get_card_most_free() free_vram = int(stats["free"]) if self.args.verbose: print("{} - {}MB free of {}MB".format(stats["device"], free_vram, int(stats["total"]))) if free_vram <= required_vram: if self.args.verbose: print("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def set_parallel_processing(self): """ Set whether to run detect and align together or separately """ detector_vram = self.detector.vram aligner_vram = self.aligner.vram gpu_stats = GPUStats() if (detector_vram == 0 or aligner_vram == 0 or gpu_stats.device_count == 0): logger.debug( "At least one of aligner or detector have no VRAM requirement. " "Enabling parallel processing.") return True if hasattr(self.args, "multiprocess") and not self.args.multiprocess: logger.info("NB: Parallel processing disabled.You may get faster " "extraction speeds by enabling it with the -mp switch") return False required_vram = detector_vram + aligner_vram + 320 # 320MB buffer stats = gpu_stats.get_card_most_free() free_vram = int(stats["free"]) logger.verbose("%s - %sMB free of %sMB", stats["device"], free_vram, int(stats["total"])) if free_vram <= required_vram: logger.warning("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def set_parallel_processing(self, multiprocess): """ Set whether to run detect and align together or separately """ detector_vram = self.detector.vram aligner_vram = self.aligner.vram gpu_stats = GPUStats() if detector_vram == 0 or aligner_vram == 0 or gpu_stats.device_count == 0: logger.debug("At least one of aligner or detector have no VRAM requirement. " "Enabling parallel processing.") return True if not multiprocess: logger.info("NB: Parallel processing disabled.You may get faster " "extraction speeds by enabling it with the -mp switch") return False required_vram = detector_vram + aligner_vram + 320 # 320MB buffer stats = gpu_stats.get_card_most_free() free_vram = int(stats["free"]) logger.verbose("%s - %sMB free of %sMB", stats["device"], free_vram, int(stats["total"])) if free_vram <= required_vram: logger.warning("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def get_vram_free(): """ Return free and total VRAM on card with most VRAM free""" stats = GPUStats() vram = stats.get_card_most_free() logger.verbose("Using device %s with %sMB free of %sMB", vram["device"], int(vram["free"]), int(vram["total"])) return int(vram["card_id"]), int(vram["free"]), int(vram["total"])
def _set_parallel_processing(self, multiprocess): """ Set whether to run detect, align, and mask together or separately """ if not multiprocess: logger.debug("Parallel processing disabled by cli.") return False gpu_stats = GPUStats() if gpu_stats.device_count == 0: logger.debug("No GPU detected. Enabling parallel processing.") return True if get_backend() == "amd": logger.debug("Parallel processing disabled by amd") return False stats = gpu_stats.get_card_most_free() vram_free = int(stats["free"]) logger.verbose("%s - %sMB free of %sMB", stats["device"], vram_free, int(stats["total"])) if vram_free <= self._total_vram_required: logger.warning("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def set_parallel_processing(self, multiprocess): """ Set whether to run detect and align together or separately """ detector_vram = self.detector.vram aligner_vram = self.aligner.vram if detector_vram == 0 or aligner_vram == 0: logger.debug("At least one of aligner or detector have no VRAM requirement. " "Enabling parallel processing.") return True gpu_stats = GPUStats() if gpu_stats.device_count == 0: logger.debug("No GPU detected. Enabling parallel processing.") return True if not multiprocess: logger.debug("Parallel processing disabled by cli.") return False required_vram = detector_vram + aligner_vram + 320 # 320MB buffer stats = gpu_stats.get_card_most_free() free_vram = int(stats["free"]) logger.verbose("%s - %sMB free of %sMB", stats["device"], free_vram, int(stats["total"])) if free_vram <= required_vram: logger.warning("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def get_vram_free(self): """ Return total free VRAM on largest card """ stats = GPUStats() vram = stats.get_card_most_free() if self.verbose: print("Using device {} with {}MB free of {}MB".format( vram["device"], int(vram["free"]), int(vram["total"]))) return int(vram["free"])
def get_vram_free(self): """ Return free and total VRAM on card with most VRAM free""" stats = GPUStats() vram = stats.get_card_most_free() if self.verbose: print("Using device {} with {}MB free of {}MB".format( vram["device"], int(vram["free"]), int(vram["total"]))) return int(vram["card_id"]), int(vram["free"]), int(vram["total"])
def _configure_backend(self, arguments): """ Configure the backend. Exclude any GPUs for use by Faceswap when requested. Set Faceswap backend to CPU if all GPUs have been deselected. Add the Keras import interception code. Parameters ---------- arguments: :class:`argparse.Namespace` The command line arguments passed to Faceswap. """ if not hasattr(arguments, "exclude_gpus"): # Cpu backends will not have this attribute logger.debug("Adding missing exclude gpus argument to namespace") setattr(arguments, "exclude_gpus", None) if arguments.exclude_gpus: if not all(idx.isdigit() for idx in arguments.exclude_gpus): logger.error( "GPUs passed to the ['-X', '--exclude-gpus'] argument must all be " "integers.") sys.exit(1) arguments.exclude_gpus = [ int(idx) for idx in arguments.exclude_gpus ] set_exclude_devices(arguments.exclude_gpus) if ((get_backend() == "cpu" or GPUStats().exclude_all_devices) and (self._command == "extract" and arguments.detector == "s3fd")): logger.error( "Extracting on CPU is not currently for detector: '%s'", arguments.detector.upper()) sys.exit(0) if GPUStats().exclude_all_devices and get_backend() != "cpu": msg = "Switching backend to CPU" if get_backend() == "amd": msg += (". Using Tensorflow for CPU operations.") os.environ["KERAS_BACKEND"] = "tensorflow" set_backend("cpu") logger.info(msg) # Add Keras finder to the meta_path list as the first item sys.meta_path.insert(0, KerasFinder()) logger.debug("Executing: %s. PID: %s", self._command, os.getpid()) if get_backend() == "amd": plaidml_found = self._setup_amd(arguments) if not plaidml_found: safe_shutdown(got_error=True) sys.exit(1)
def set_parallel_processing(self, multiprocess): """ Set whether to run detect and align together or separately """ detector_vram = self.detector.vram aligner_vram = self.aligner.vram if detector_vram == 0 or aligner_vram == 0: logger.debug( "At least one of aligner or detector have no VRAM requirement. " "Enabling parallel processing.") return True if not multiprocess: logger.debug("Parallel processing disabled by cli.") return False gpu_stats = GPUStats() if gpu_stats.is_plaidml and (not self.detector.supports_plaidml or not self.aligner.supports_plaidml): logger.debug( "At least one of aligner or detector does not support plaidML. " "Enabling parallel processing.") return True if not gpu_stats.is_plaidml and ( (self.detector.supports_plaidml and aligner_vram != 0) or (self.aligner.supports_plaidml and detector_vram != 0)): logger.warning( "Keras + non-Keras aligner/detector combination does not support " "parallel processing. Switching to serial.") return False if self.detector.supports_plaidml and self.aligner.supports_plaidml: logger.debug( "Both aligner and detector support plaidML. Disabling parallel " "processing.") return False if gpu_stats.device_count == 0: logger.debug("No GPU detected. Enabling parallel processing.") return True required_vram = detector_vram + aligner_vram + 320 # 320MB buffer stats = gpu_stats.get_card_most_free() free_vram = int(stats["free"]) logger.verbose("%s - %sMB free of %sMB", stats["device"], free_vram, int(stats["total"])) if free_vram <= required_vram: logger.warning("Not enough free VRAM for parallel processing. " "Switching to serial") return False return True
def __init__(self): self.verbose = False self.output_shown = False self.stats = GPUStats() self.vram_free = None self.vram_total = None self.scale_to = None self.device = self.set_device() if self.device == -1: return self.vram_total = self.stats.vram[self.device] self.get_available_vram()
def predict_faces(self): """ Get detected faces from images """ faces_seen = 0 consecutive_no_faces = 0 batch = list() is_plaidml = GPUStats().is_plaidml while True: item = self.in_queue.get() if item != "EOF": logger.trace("Got from queue: '%s'", item["filename"]) faces_count = len(item["detected_faces"]) # Safety measure. If a large stream of frames appear that do not have faces, # these will stack up into RAM. Keep a count of consecutive frames with no faces. # If self.batchsize number of frames appear, force the current batch through # to clear RAM. consecutive_no_faces = consecutive_no_faces + 1 if faces_count == 0 else 0 self.faces_count += faces_count if faces_count > 1: self.verify_output = True logger.verbose("Found more than one face in an image! '%s'", os.path.basename(item["filename"])) self.load_aligned(item) faces_seen += faces_count batch.append(item) if item != "EOF" and (faces_seen < self.batchsize and consecutive_no_faces < self.batchsize): logger.trace("Continuing. Current batchsize: %s, consecutive_no_faces: %s", faces_seen, consecutive_no_faces) continue if batch: logger.trace("Batching to predictor. Frames: %s, Faces: %s", len(batch), faces_seen) detected_batch = [detected_face for item in batch for detected_face in item["detected_faces"]] if faces_seen != 0: feed_faces = self.compile_feed_faces(detected_batch) batch_size = None if is_plaidml and feed_faces.shape[0] != self.batchsize: logger.verbose("Fallback to BS=1") batch_size = 1 predicted = self.predict(feed_faces, batch_size) else: predicted = list() self.queue_out_frames(batch, predicted) consecutive_no_faces = 0 faces_seen = 0 batch = list() if item == "EOF": logger.debug("EOF Received") break logger.debug("Putting EOF") self.out_queue.put("EOF") logger.debug("Load queue complete")
def get_batchsize(queue_size): """ Get the batchsize """ is_cpu = GPUStats().device_count == 0 batchsize = 1 if is_cpu else 16 batchsize = min(queue_size, batchsize) logger.debug("Batchsize: %s", batchsize) return batchsize
def _get_vram_stats(): """ Obtain statistics on available VRAM and subtract a constant buffer from available vram. Returns ------- dict Statistics on available VRAM """ vram_buffer = 256 # Leave a buffer for VRAM allocation gpu_stats = GPUStats() stats = gpu_stats.get_card_most_free() retval = dict(count=gpu_stats.device_count, device=stats["device"], vram_free=int(stats["free"] - vram_buffer), vram_total=int(stats["total"])) logger.debug(retval) return retval
def __init__(self): self.initialized = False self.verbose = False self.stats = GPUStats() self.vram_free = None self.vram_total = None self.scale_to = None self.device = self.set_device() if self.device == -1: # Limit ram usage to 2048 for CPU self.vram_total = 2048 else: self.vram_total = self.stats.vram[self.device] self.get_available_vram()
def __init__(self): self._state_file = _State().state_file self._configs = _Configs().configs self._system = dict(platform=platform.platform(), system=platform.system(), machine=platform.machine(), release=platform.release(), processor=platform.processor(), cpu_count=os.cpu_count()) self._python = dict(implementation=platform.python_implementation(), version=platform.python_version()) self._gpu = GPUStats(log=False).sys_info self._cuda_check = CudaCheck()
def _set_extractor_batchsize(self): """ Sets the batch size of the requested plugins based on their vram and vram_per_batch_requirements if the the configured batch size requires more vram than is available. Nvidia only. """ if get_backend() != "nvidia": logger.debug( "Backend is not Nvidia. Not updating batchsize requirements") return if sum([plugin.vram for plugin in self._all_plugins]) == 0: logger.debug( "No plugins use VRAM. Not updating batchsize requirements.") return stats = GPUStats().get_card_most_free() vram_free = int(stats["free"]) if self._is_parallel: batch_required = sum([ plugin.vram_per_batch * plugin.batchsize for plugin in self._all_plugins ]) plugin_required = self._total_vram_required + batch_required if plugin_required <= vram_free: logger.debug( "Plugin requirements within threshold: (plugin_required: %sMB, " "vram_free: %sMB)", plugin_required, vram_free) return # Hacky split across plugins that use vram gpu_plugin_count = sum( [1 for plugin in self._all_plugins if plugin.vram != 0]) available_vram = (vram_free - self._total_vram_required) // gpu_plugin_count for plugin in self._all_plugins: if plugin.vram != 0: self._set_plugin_batchsize(plugin, available_vram) else: for plugin in self._all_plugins: if plugin.vram == 0: continue vram_required = plugin.vram + self._vram_buffer batch_required = plugin.vram_per_batch * plugin.batchsize plugin_required = vram_required + batch_required if plugin_required <= vram_free: logger.debug( "%s requirements within threshold: (plugin_required: %sMB, " "vram_free: %sMB)", plugin.name, plugin_required, vram_free) continue available_vram = vram_free - vram_required self._set_plugin_batchsize(plugin, available_vram)
def __init__(self): self.initialized = False self.verbose = False self.stats = GPUStats() self.vram_free = None self.vram_total = None self.scale_to = None self.device = self.set_device() if self.device == -1: return self.vram_total = self.stats.vram[self.device] self.get_available_vram()
def __init__(self): gpu_stats = GPUStats(log=False) self.platform = platform.platform() self.system = platform.system() self.machine = platform.machine() self.release = platform.release() self.processor = platform.processor() self.cpu_count = os.cpu_count() self.py_implementation = platform.python_implementation() self.py_version = platform.python_version() self._cuda_path = self.get_cuda_path() self.vram = gpu_stats.vram self.gfx_driver = gpu_stats.driver self.gfx_devices = gpu_stats.devices
def _get_batchsize(queue_size): """ Get the batch size for feeding the model. Sets the batch size to 1 if inference is being run on CPU, otherwise the minimum of the :attr:`self._queue_size` and 16. Returns ------- int The batch size that the model is to be fed at. """ logger.debug("Getting batchsize") is_cpu = GPUStats().device_count == 0 batchsize = 1 if is_cpu else 16 batchsize = min(queue_size, batchsize) logger.debug("Batchsize: %s", batchsize) logger.debug("Got batchsize: %s", batchsize) return batchsize
def _configure_backend(self, arguments): """ Configure the backend. Exclude any GPUs for use by Faceswap when requested. Set Faceswap backend to CPU if all GPUs have been deselected. Parameters ---------- arguments: :class:`argparse.Namespace` The command line arguments passed to Faceswap. """ if get_backend() == "cpu": # Cpu backends will not have this attribute logger.debug("Adding missing exclude gpus argument to namespace") setattr(arguments, "exclude_gpus", None) return if arguments.exclude_gpus: if not all(idx.isdigit() for idx in arguments.exclude_gpus): logger.error( "GPUs passed to the ['-X', '--exclude-gpus'] argument must all be " "integers.") sys.exit(1) arguments.exclude_gpus = [ int(idx) for idx in arguments.exclude_gpus ] set_exclude_devices(arguments.exclude_gpus) if GPUStats().exclude_all_devices: msg = "Switching backend to CPU" if get_backend() == "amd": msg += (". Using Tensorflow for CPU operations.") os.environ["KERAS_BACKEND"] = "tensorflow" set_backend("cpu") logger.info(msg) logger.debug("Executing: %s. PID: %s", self._command, os.getpid()) if get_backend() == "amd" and not self._setup_amd(arguments): safe_shutdown(got_error=True)
def process(self): """ Perform the extraction process """ print('Starting, this may take a while...') Utils.set_verbosity(self.args.verbose) if (hasattr(self.args, 'multiprocess') and self.args.multiprocess and GPUStats().device_count == 0): # TODO Checking that there is no available GPU is not # necessarily an indicator of whether the user is actually # using the CPU. Maybe look to implement further checks on # dlib/tensorflow compilations self.extract_multi_process() else: self.extract_single_process() self.write_alignments() images, faces = Utils.finalize(self.images.images_found, self.faces.num_faces_detected, self.faces.verify_output) self.images.images_found = images self.faces.num_faces_detected = faces
def _set_extractor_batchsize(self): """ Sets the batchsize of the requested plugins based on their vram and vram_per_batch_requirements if the the configured batchsize requires more vram than is available. Nvidia only. """ if (self._detector.vram == 0 and self._aligner.vram == 0) or get_backend() != "nvidia": logger.debug( "Either detector and aligner have no VRAM requirements or not running " "on Nvidia. Not updating batchsize requirements.") return stats = GPUStats().get_card_most_free() vram_free = int(stats["free"]) if self._is_parallel: vram_required = self._detector.vram + self._aligner.vram + self._vram_buffer batch_required = ( (self._aligner.vram_per_batch * self._aligner.batchsize) + (self._detector.vram_per_batch * self._detector.batchsize)) plugin_required = vram_required + batch_required if plugin_required <= vram_free: logger.debug( "Plugin requirements within threshold: (plugin_required: %sMB, " "vram_free: %sMB)", plugin_required, vram_free) return # Hacky split across 2 plugins available_vram = (vram_free - vram_required) // 2 for plugin in (self._aligner, self._detector): self._set_plugin_batchsize(plugin, available_vram) else: for plugin in (self._aligner, self._detector): vram_required = plugin.vram + self._vram_buffer batch_required = plugin.vram_per_batch * plugin.batchsize plugin_required = vram_required + batch_required if plugin_required <= vram_free: logger.debug( "%s requirements within threshold: (plugin_required: %sMB, " "vram_free: %sMB)", plugin.name, plugin_required, vram_free) continue available_vram = vram_free - vram_required self._set_plugin_batchsize(plugin, available_vram)
def _get_batchsize(self, queue_size): """ Get the batch size for feeding the model. Sets the batch size to 1 if inference is being run on CPU, otherwise the minimum of the input queue size and the model's `convert_batchsize` configuration option. Parameters ---------- queue_size: int The queue size that is feeding the predictor Returns ------- int The batch size that the model is to be fed at. """ logger.debug("Getting batchsize") is_cpu = GPUStats().device_count == 0 batchsize = 1 if is_cpu else self._model.config["convert_batchsize"] batchsize = min(queue_size, batchsize) logger.debug("Batchsize: %s", batchsize) logger.debug("Got batchsize: %s", batchsize) return batchsize
class GPUMem(object): """ Sets the scale to factor for dlib images and the ratio of vram to use for tensorflow """ def __init__(self): self.initialized = False self.verbose = False self.stats = GPUStats() self.vram_free = None self.vram_total = None self.scale_to = None self.device = self.set_device() if self.device == -1: return self.vram_total = self.stats.vram[self.device] self.get_available_vram() def set_device(self): """ Set the default device """ if self.stats.device_count == 0: return -1 return 0 # TF selects first device, so this is used for stats # TODO select and use device with most available VRAM # TODO create virtual devices/allow multiple GPUs for # parallel processing def set_device_with_max_free_vram(self): """ Set the device with the most available free vram """ # TODO Implement this to select the device with most available VRAM free_mem = self.stats.get_free() self.vram_free = max(free_mem) self.device = free_mem.index(self.vram_free) def get_available_vram(self): """ Recalculate the available vram """ free_mem = self.stats.get_free() self.vram_free = free_mem[self.device] if self.verbose: print("GPU VRAM free: {}".format(self.vram_free)) def output_stats(self): """ Output stats in verbose mode """ if not self.verbose: return print("\n----- Initial GPU Stats -----") self.stats.print_info() print("GPU VRAM free: {}".format(self.vram_free)) print("-----------------------------\n") def get_tensor_gpu_ratio(self): """ Set the ratio of GPU memory to use for tensorflow session for keras points predictor. Ideally 2304MB is required, but will run with less (with warnings). This is only required if running with DLIB. MTCNN will share the tensorflow session. """ if self.vram_free < 2030: ratio = 1024.0 / self.vram_total elif self.vram_free < 3045: ratio = 1560.0 / self.vram_total elif self.vram_free < 4060: ratio = 2048.0 / self.vram_total else: ratio = 2304.0 / self.vram_total return ratio def set_scale_to(self, detector): """ Set the size to scale images down to for specific detector and available VRAM DLIB VRAM allocation is linear to pixel count MTCNN is weird. Not linear at low levels, then fairly linear up to 3360x1890 then requirements drop again. As 3360x1890 is hi-res, just this scale is used for calculating image scaling """ # MTCNN VRAM Usage Stats # Crudely Calculated at default values # The formula may need ammending, but it should # work for most use cases # 480x270 = 267.56 MB # 960x540 = 333.18 MB # 1280x720 = 592.32 MB # 1440x810 = 746.56 MB # 1920x1080 = 1.30 GB # 2400x1350 = 2.03 GB # 2880x1620 = 2.93 GB # 3360x1890 = 3.98 GB # 3840x2160 = 2.62 GB <--?? # 4280x2800 = 3.69 GB detector = "dlib" if detector in ("dlib-cnn", "dlib-hog", "dlib-all") else detector buffer = 64 # 64MB overhead buffer gradient = 3483.2 / 9651200 # MTCNN constant = 1.007533156 # MTCNN if detector == "dlib": self.get_available_vram() gradient = 213 / 524288 constant = 307 if self.device != -1: free_mem = self.vram_free - buffer else: # Limit to 2GB if using CPU free_mem = 2048 self.scale_to = int((free_mem - constant) / gradient) if self.scale_to < 4097: raise ValueError("Images would be shrunk too much " "for successful extraction")
class GPUMem(object): """ Sets the scale to factor for dlib images and the ratio of vram to use for tensorflow """ def __init__(self): self.verbose = False self.output_shown = False self.stats = GPUStats() self.vram_free = None self.vram_total = None self.scale_to = None self.device = self.set_device() if self.device == -1: return self.vram_total = self.stats.vram[self.device] self.get_available_vram() def set_device(self): """ Set the default device """ if self.stats.device_count == 0: return -1 return 0 # TF selects first device, so this is used for stats # TODO select and use device with most available VRAM # TODO create virtual devices/allow multiple GPUs for # parallel processing def set_device_with_max_free_vram(self): """ Set the device with the most available free vram """ # TODO Implement this to select the device with most available VRAM free_mem = self.stats.get_free() self.vram_free = max(free_mem) self.device = free_mem.index(self.vram_free) def get_available_vram(self): """ Recalculate the available vram """ free_mem = self.stats.get_free() self.vram_free = free_mem[self.device] if self.verbose: print("GPU VRAM free: {}".format(self.vram_free)) def output_stats(self): """ Output stats in verbose mode """ if self.output_shown or not self.verbose: return print("\n----- Initial GPU Stats -----") self.stats.print_info() print("GPU VRAM free: {}".format(self.vram_free)) print("-----------------------------\n") self.output_shown = True def get_tensor_gpu_ratio(self): """ Set the ratio of GPU memory to use for tensorflow session for keras points predictor. Ideally 2304MB is required, but will run with less (with warnings). This is only required if running with DLIB. MTCNN will share the tensorflow session. """ if self.vram_free < 2030: ratio = 1024.0 / self.vram_total elif self.vram_free < 3045: ratio = 1560.0 / self.vram_total elif self.vram_free < 4060: ratio = 2048.0 / self.vram_total else: ratio = 2304.0 / self.vram_total return ratio def set_scale_to(self, detector): """ Set the size to scale images down to for specific detector and available VRAM DLIB VRAM allocation is linear to pixel count MTCNN is weird. Not linear at low levels, then fairly linear up to 3360x1890 then requirements drop again. As 3360x1890 is hi-res, just this scale is used for calculating image scaling """ # MTCNN VRAM Usage Stats # Crudely Calculated at default values # The formula may need ammending, but it should # work for most use cases # 480x270 = 267.56 MB # 960x540 = 333.18 MB # 1280x720 = 592.32 MB # 1440x810 = 746.56 MB # 1920x1080 = 1.30 GB # 2400x1350 = 2.03 GB # 2880x1620 = 2.93 GB # 3360x1890 = 3.98 GB # 3840x2160 = 2.62 GB <--?? # 4280x2800 = 3.69 GB detector = "dlib" if detector in ("dlib-cnn", "dlib-hog", "dlib-all") else detector buffer = 64 # 64MB overhead buffer gradient = 3483.2 / 9651200 # MTCNN constant = 1.007533156 # MTCNN if detector == "dlib": self.get_available_vram() gradient = 213 / 524288 constant = 307 free_mem = self.vram_free - buffer self.scale_to = int((free_mem - constant) / gradient) if self.scale_to < 4097: raise ValueError("Images would be shrunk too much " "for successful extraction")