def __init__(self, config): super().__init__(config) self.batch_input_shape = None self._solver = config['solver'] self._num_epochs = self._solver['optimizer']['epochs'] self._lr = self._solver['optimizer']['learning_rate']['rate'] self._decay_rate = self._solver['optimizer']['learning_rate']['decay_rate'] self._val_metric = self._solver['optimizer']['learning_rate'][ 'type'] == 'val_metric' if self._val_metric: self._min_lr = self._solver['optimizer']['learning_rate']['min_rate'] self._patience = self._solver['optimizer']['learning_rate']['patience'] self._clipnorm = self._solver['optimizer']['clip_global_norm'] self._early_stopping = self._solver['optimizer']['early_stopping']['enable'] self._monitor_used = self._solver['metrics']['monitor_used'] self._model_path = self._solver['saver']['model_path'] logging.info('num_epochs : {}'.format(self._num_epochs)) logging.info('lr : {}'.format(self._lr)) logging.info('saver path : {}'.format(self._model_path)) devices, self._ngpu = utils.gpu_device_names() logging.info(f"ngpu: {self._ngpu}, device list: {devices}") #model self._model = None self._parallel_model = None self._built = False
def input_fn(dataset, mode, batch_size, num_epoch=None): ''' params: dataset, tf.data.Dataset params: mode, learning phase params: batch size params: num of epoch ''' if mode == utils.TRAIN: _, num_gpus = utils.gpu_device_names() per_device_batch_size = utils.per_device_batch_size( batch_size, num_gpus) else: # using one device to eval or infer, # otherwise will drop reminder samples, e.g. 32 batch with 3 gpus per_device_batch_size = batch_size num_epoch = 1 logging.info( "Learning Phase: {}, Total Batch size:{}, Per device batch size: {}". format(mode, batch_size, per_device_batch_size)) def _input_fn(): return dataset(mode, per_device_batch_size, num_epoch) return _input_fn
def create_estimator(self): # Set model params model_params = HParams() # create model func model_fn = self.model_fn() # multi-gpus devices, num_gpu = utils.gpu_device_names() distribution = utils.get_distribution_strategy(num_gpu) logging.info('Device: {}/{}'.format(num_gpu, devices)) # run config tfconf = self.config['solver']['run_config'] saverconf = self.config['solver']['saver'] session_config = tf.ConfigProto( allow_soft_placement=tfconf['allow_soft_placement'], log_device_placement=tfconf['log_device_placement'], intra_op_parallelism_threads=tfconf[ 'intra_op_parallelism_threads'], inter_op_parallelism_threads=tfconf[ 'inter_op_parallelism_threads'], gpu_options=tf.GPUOptions(allow_growth=tfconf['allow_growth'])) run_config = tf.estimator.RunConfig( #pylint: disable=no-member tf_random_seed=tfconf['tf_random_seed'], session_config=session_config, save_summary_steps=saverconf['save_summary_steps'], keep_checkpoint_max=saverconf['max_to_keep'], log_step_count_steps=tfconf['log_step_count_steps'], train_distribute=distribution, device_fn=None, protocol=None, eval_distribute=None, experimental_distribute=None, ) # Instantiate Estimator nn = tf.estimator.Estimator( #pylint: disable=no-member,invalid-name model_fn=model_fn, model_dir=saverconf['model_path'], config=run_config, params=model_params, warm_start_from=None, ) return nn
def __init__(self, config): super().__init__(config) self.batch_input_shape = None self._solver = config['solver'] self._num_epochs = self._solver['optimizer']['epochs'] self._lr = self._solver['optimizer']['learning_rate']['rate'] self._decay_rate = self._solver['optimizer']['learning_rate'][ 'decay_rate'] self._val_metric = self._solver['optimizer']['learning_rate'][ 'type'] == 'val_metric' if self._val_metric: self._min_lr = self._solver['optimizer']['learning_rate'][ 'min_rate'] self._patience = self._solver['optimizer']['learning_rate'][ 'patience'] self._clipnorm = self._solver['optimizer']['clip_global_norm'] self._early_stopping = self._solver['optimizer']['early_stopping'][ 'enable'] self._monitor_used = self._solver['metrics'][ 'monitor_used'] or 'val_loss' self._metrics_used = [] if self._solver['metrics'][ 'metrics_used'] is None else self._solver['metrics']['metrics_used'] self._model_path = self._solver['saver']['model_path'] self._model_load_type = self._solver['loader']['model_load_type'] self._init_epoch = self._solver['loader']['init_epoch'] self._specified_model_file = self._solver['loader']['file_name'] self._checkpoint_file_pattern = 'model.{epoch:02d}-{monitor_used:.2f}.ckpt' logging.info('num_epochs : {}'.format(self._num_epochs)) logging.info('lr : {}'.format(self._lr)) logging.info('saver path : {}'.format(self._model_path)) devices, self._ngpu = utils.gpu_device_names() logging.info(f"ngpu: {self._ngpu}, device list: {devices}") #model self._model = None self._parallel_model = None self._built = False
def __init__(self, config): super().__init__(config) self._solver = config['solver'] self._num_epochs = self._solver['optimizer']['epochs'] self._lr = self._solver['optimizer']['learning_rate']['rate'] self._decay = self._solver['optimizer']['learning_rate']['decay_rate'] self._clipnorm = self._solver['optimizer']['clip_global_norm'] self._optimizer = self._solver['optimizer']['name'] self._early_stopping = self._solver['optimizer']['early_stopping'][ 'enable'] self._model_path = self._solver['saver']['model_path'] logging.info('num_epochs : {}'.format(self._num_epochs)) logging.info('lr : {}'.format(self._lr)) logging.info('saver path : {}'.format(self._model_path)) _, self._ngpu = utils.gpu_device_names() #model self._model = None self._parallel_model = None self._built = False
def get_batches(config, mode): ''' make batches of metas and get dataset size''' assert mode in (utils.TRAIN, utils.EVAL, utils.INFER) # read meta of json logging.info("load json data") json_path = config['data'][mode]['paths'] assert len(json_path) == 1 #pylint: disable=invalid-name with open(json_path[0], 'r', encoding='utf-8') as f: metas_raw = json.load(f)['utts'] # sort by utts id metas = OrderedDict(sorted(metas_raw.items(), key=lambda t: t[0])) # dataset size utts = len(metas.keys()) logging.info('# utts: ' + str(utts)) # make batchset use_sortagrad = config['data']['task']['sortagrad'] task = config['data']['task']['type'] assert task in list(TASK_SET.keys()) # using same json for asr and tts task if task == TASK_SET['asr']: src = 'src' tgt = 'tgt' elif task == TASK_SET['tts']: src = 'tgt' tgt = 'src' else: raise ValueError("task type must int : {} get : {}".format( list(TASK_SET.keys()), task)) maxlen_src = config['data']['task'][src]['max_len'] maxlen_tgt = config['data']['task'][tgt]['max_len'] batch_sort_key = config['data']['task']['batch_sort_key'] num_batches = config['data']['task']['num_batches'] _, ngpu = utils.gpu_device_names() global_batch_size = config['solver']['optimizer']['batch_size'] batch_size = utils.per_device_batch_size(global_batch_size, ngpu) batch_bins = config['solver']['optimizer']['batch_bins'] batch_frames_in = config['solver']['optimizer']['batch_frames_in'] batch_frames_out = config['solver']['optimizer']['batch_frames_out'] batch_frames_inout = config['solver']['optimizer']['batch_frames_inout'] batch_strategy = config['solver']['optimizer']['batch_strategy'] minibatches = make_batchset(task=task, data=metas, batch_size=batch_size, max_length_in=maxlen_src, max_length_out=maxlen_tgt, num_batches=num_batches, batch_sort_key=batch_sort_key, min_batch_size=ngpu if ngpu else 1, shortest_first=use_sortagrad, batch_bins=batch_bins, batch_frames_in=batch_frames_in, batch_frames_out=batch_frames_out, batch_frames_inout=batch_frames_inout, batch_strategy=batch_strategy) return {'data': minibatches, 'n_utts': utts}