def get_devices_for_type (self, type, multi_gpu): if (type == 'rects' or type == 'landmarks'): if not multi_gpu: devices = [gpufmkmgr.getBestDeviceIdx()] else: devices = gpufmkmgr.getDevicesWithAtLeastTotalMemoryGB(2) devices = [ (idx, gpufmkmgr.getDeviceName(idx), gpufmkmgr.getDeviceVRAMTotalGb(idx) ) for idx in devices] elif type == 'final': devices = [ (i, 'CPU%d' % (i), 0 ) for i in range(0, multiprocessing.cpu_count()) ] return devices
def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, multi_gpu=False, force_best_gpu_idx=-1, force_gpu_idxs=None, write_preview_history=False, debug=False, **in_options): print("Loading model...") self.model_path = model_path self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat')) self.training_data_src_path = training_data_src_path self.training_data_dst_path = training_data_dst_path self.training_datas = [None] * TrainingDataType.QTY self.src_images_paths = None self.dst_images_paths = None self.src_yaw_images_paths = None self.dst_yaw_images_paths = None self.src_data_generator = None self.dst_data_generator = None self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) self.batch_size = 1 self.write_preview_history = write_preview_history self.debug = debug self.supress_std_once = False #True if self.model_data_path.exists(): model_data = pickle.loads(self.model_data_path.read_bytes()) self.epoch = model_data['epoch'] self.options = model_data['options'] self.loss_history = model_data[ 'loss_history'] if 'loss_history' in model_data.keys() else [] self.generator_dict_states = model_data[ 'generator_dict_states'] if 'generator_dict_states' in model_data.keys( ) else None self.sample_for_preview = model_data[ 'sample_for_preview'] if 'sample_for_preview' in model_data.keys( ) else None else: self.epoch = 0 self.options = {} self.loss_history = [] self.generator_dict_states = None self.sample_for_preview = None if self.write_preview_history: self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name())) if not self.preview_history_path.exists(): self.preview_history_path.mkdir(exist_ok=True) else: if self.epoch == 0: for filename in Path_utils.get_image_paths( self.preview_history_path): Path(filename).unlink() self.multi_gpu = multi_gpu gpu_idx = force_best_gpu_idx if ( force_best_gpu_idx >= 0 and gpufmkmgr.isValidDeviceIdx(force_best_gpu_idx) ) else gpufmkmgr.getBestDeviceIdx() gpu_total_vram_gb = gpufmkmgr.getDeviceVRAMTotalGb(gpu_idx) is_gpu_low_mem = (gpu_total_vram_gb < 4) self.gpu_total_vram_gb = gpu_total_vram_gb if self.epoch == 0: #first run self.options['created_vram_gb'] = gpu_total_vram_gb self.created_vram_gb = gpu_total_vram_gb else: #not first run if 'created_vram_gb' in self.options.keys(): self.created_vram_gb = self.options['created_vram_gb'] else: self.options['created_vram_gb'] = gpu_total_vram_gb self.created_vram_gb = gpu_total_vram_gb if force_gpu_idxs is not None: self.gpu_idxs = [int(x) for x in force_gpu_idxs.split(',')] else: if self.multi_gpu: self.gpu_idxs = gpufmkmgr.getDeviceIdxsEqualModel(gpu_idx) if len(self.gpu_idxs) <= 1: self.multi_gpu = False else: self.gpu_idxs = [gpu_idx] self.tf = gpufmkmgr.import_tf(self.gpu_idxs, allow_growth=False) self.keras = gpufmkmgr.import_keras() self.keras_contrib = gpufmkmgr.import_keras_contrib() self.onInitialize(**in_options) if self.debug: self.batch_size = 1 if self.is_training_mode: if self.generator_list is None: raise Exception('You didnt set_training_data_generators()') else: for i, generator in enumerate(self.generator_list): if not isinstance(generator, TrainingDataGeneratorBase): raise Exception( 'training data generator is not subclass of TrainingDataGeneratorBase' ) if self.generator_dict_states is not None and i < len( self.generator_dict_states): generator.set_dict_state(self.generator_dict_states[i]) if self.sample_for_preview is None: self.sample_for_preview = self.generate_next_sample() print("===== Model summary =====") print("== Model name: " + self.get_model_name()) print("==") print("== Current epoch: " + str(self.epoch)) print("==") print("== Options:") print("== |== batch_size : %s " % (self.batch_size)) print("== |== multi_gpu : %s " % (self.multi_gpu)) for key in self.options.keys(): print("== |== %s : %s" % (key, self.options[key])) print("== Running on:") for idx in self.gpu_idxs: print("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx))) if self.gpu_total_vram_gb == 2: print("==") print( "== WARNING: You are using 2GB GPU. If training does not start," ) print("== close all programs and try again.") print( "== Also you can disable Windows Aero Desktop to get extra free VRAM." ) print("==") print("=========================")
def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, batch_size=0, write_preview_history=False, debug=False, **in_options): print("Loading model...") self.model_path = model_path self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat')) self.training_data_src_path = training_data_src_path self.training_data_dst_path = training_data_dst_path self.src_images_paths = None self.dst_images_paths = None self.src_yaw_images_paths = None self.dst_yaw_images_paths = None self.src_data_generator = None self.dst_data_generator = None self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) self.batch_size = batch_size self.write_preview_history = write_preview_history self.debug = debug self.supress_std_once = ('TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1') if self.model_data_path.exists(): model_data = pickle.loads(self.model_data_path.read_bytes()) self.epoch = model_data['epoch'] self.options = model_data['options'] self.loss_history = model_data[ 'loss_history'] if 'loss_history' in model_data.keys() else [] self.sample_for_preview = model_data[ 'sample_for_preview'] if 'sample_for_preview' in model_data.keys( ) else None else: self.epoch = 0 self.options = {} self.loss_history = [] self.sample_for_preview = None if self.write_preview_history: self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name())) if not self.preview_history_path.exists(): self.preview_history_path.mkdir(exist_ok=True) else: if self.epoch == 0: for filename in Path_utils.get_image_paths( self.preview_history_path): Path(filename).unlink() self.gpu_config = gpufmkmgr.GPUConfig(allow_growth=False, **in_options) self.gpu_total_vram_gb = self.gpu_config.gpu_total_vram_gb if self.epoch == 0: #first run self.options['created_vram_gb'] = self.gpu_total_vram_gb self.created_vram_gb = self.gpu_total_vram_gb else: #not first run if 'created_vram_gb' in self.options.keys(): self.created_vram_gb = self.options['created_vram_gb'] else: self.options['created_vram_gb'] = self.gpu_total_vram_gb self.created_vram_gb = self.gpu_total_vram_gb self.tf = gpufmkmgr.import_tf(self.gpu_config) self.tf_sess = gpufmkmgr.get_tf_session() self.keras = gpufmkmgr.import_keras() self.keras_contrib = gpufmkmgr.import_keras_contrib() self.onInitialize(**in_options) if self.debug or self.batch_size == 0: self.batch_size = 1 if self.is_training_mode: if self.generator_list is None: raise Exception('You didnt set_training_data_generators()') else: for i, generator in enumerate(self.generator_list): if not isinstance(generator, SampleGeneratorBase): raise Exception( 'training data generator is not subclass of SampleGeneratorBase' ) if self.sample_for_preview is None: self.sample_for_preview = self.generate_next_sample() print("===== Model summary =====") print("== Model name: " + self.get_model_name()) print("==") print("== Current epoch: " + str(self.epoch)) print("==") print("== Options:") print("== |== batch_size : %s " % (self.batch_size)) print("== |== multi_gpu : %s " % (self.gpu_config.multi_gpu)) for key in self.options.keys(): print("== |== %s : %s" % (key, self.options[key])) print("== Running on:") if self.gpu_config.cpu_only: print("== |== [CPU]") else: for idx in self.gpu_config.gpu_idxs: print("== |== [%d : %s]" % (idx, gpufmkmgr.getDeviceName(idx))) if not self.gpu_config.cpu_only and self.gpu_total_vram_gb == 2: print("==") print( "== WARNING: You are using 2GB GPU. Result quality may be significantly decreased." ) print( "== If training does not start, close all programs and try again." ) print( "== Also you can disable Windows Aero Desktop to get extra free VRAM." ) print("==") print("=========================")