def get_model(args, scope): with tf.variable_scope(scope): args, base_config, base_model, config_module = get_base_config(args) checkpoint = check_logdir(args, base_config) model = create_model( args, base_config, config_module, base_model, None) return model, checkpoint
def main(): # Parse args and create config args, base_config, base_model, config_module = get_base_config( sys.argv[1:]) if args.mode == "interactive_infer": raise ValueError( "Interactive infer is meant to be run from an IPython", "notebook not from run.py.") # Initilize Horovod if base_config['use_horovod']: import horovod.tensorflow as hvd hvd.init() if hvd.rank() == 0: deco_print("Using horovod") else: hvd = None restore_best_checkpoint = base_config.get('restore_best_checkpoint', False) # Check logdir and create it if necessary checkpoint = check_logdir(args, base_config, restore_best_checkpoint) if args.enable_logs: if hvd is None or hvd.rank() == 0: old_stdout, old_stderr, stdout_log, stderr_log = create_logdir( args, base_config) base_config['logdir'] = os.path.join(base_config['logdir'], 'logs') if args.mode == 'train' or args.mode == 'train_eval' or args.benchmark: if hvd is None or hvd.rank() == 0: if checkpoint is None or args.benchmark: deco_print("Starting training from scratch") else: deco_print( "Restored checkpoint from {}. Resuming training".format( checkpoint), ) elif args.mode == 'eval' or args.mode == 'infer': if hvd is None or hvd.rank() == 0: deco_print("Loading model from {}".format(checkpoint)) # Create model and train/eval/infer with tf.Graph().as_default(): model = create_model(args, base_config, config_module, base_model, hvd) if args.mode == "train_eval": train(model[0], model[1], debug_port=args.debug_port) elif args.mode == "train": train(model, None, debug_port=args.debug_port) elif args.mode == "eval": evaluate(model, checkpoint) elif args.mode == "infer": infer(model, checkpoint, args.infer_output_file, args.use_trt) if args.enable_logs and (hvd is None or hvd.rank() == 0): sys.stdout = old_stdout sys.stderr = old_stderr stdout_log.close() stderr_log.close()
def _get_model(args, scope): ''' A simpler version of what run.py does. It returns the created model and its saved checkpoint ''' with tf.variable_scope(scope): args, base_config, base_model, config_module = get_base_config(args) checkpoint = check_logdir(args, base_config) model = create_model(args, base_config, config_module, base_model, None) return model, checkpoint
def get_model(args): args, base_config, base_model, config_module = get_base_config(args) checkpoint = check_logdir(args, base_config) # infer_config = copy.deepcopy(base_config) if args.mode == "interactive_infer": nested_update(base_config, copy.deepcopy(config_module['interactive_infer_params'])) model = base_model(params=base_config, mode=args.mode, hvd=None) model.compile() return model, checkpoint, base_config["data_layer_params"]
def get_model(args): args, base_config, base_model, config_module = get_base_config(args) checkpoint = check_logdir(args, base_config) model = create_model(args, base_config, config_module, base_model, None) return model, checkpoint
def __init__(self, model_params=MODEL_PARAMS, scope_name='S2T', sr=16000, frame_len=0.2, frame_overlap=2.4, timestep_duration=0.02, ext_model_infer_func=None, merge=True, beam_width=1, language_model=None, alpha=2.8, beta=1.0): ''' Args: model_params: list of OpenSeq2Seq arguments (same as for run.py) scope_name: model's scope name sr: sample rate, Hz frame_len: frame's duration, seconds frame_overlap: duration of overlaps before and after current frame, seconds frame_overlap should be multiple of frame_len timestep_duration: time per step at model's output, seconds ext_model_infer_func: callback for external inference engine, if it is not None, then we don't build TF inference graph merge: whether to do merge in greedy decoder beam_width: beam width for beam search decoder if larger than 1 language_model: path to LM (to use with beam search decoder) alpha: LM weight (trade-off between acoustic and LM scores) beta: word weight (added per every transcribed word in prediction) ''' if ext_model_infer_func is None: # Build TF inference graph self.model_S2T, checkpoint_S2T = self._get_model(model_params, scope_name) # Create the session and load the checkpoints sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.allow_growth = True self.sess = tf.InteractiveSession(config=sess_config) vars_S2T = {} for v in tf.get_collection(tf.GraphKeys.VARIABLES): if scope_name in v.name: vars_S2T['/'.join(v.op.name.split('/')[1:])] = v saver_S2T = tf.train.Saver(vars_S2T) saver_S2T.restore(self.sess, checkpoint_S2T) self.params = self.model_S2T.params else: # No TF, load pre-, post-processing parameters from config, # use external inference engine _, base_config, _, _ = get_base_config(model_params) self.params = base_config self.ext_model_infer_func = ext_model_infer_func self.vocab = self._load_vocab( self.model_S2T.params['data_layer_params']['vocab_file'] ) self.sr = sr self.frame_len = frame_len self.n_frame_len = int(frame_len * sr) self.frame_overlap = frame_overlap self.n_frame_overlap = int(frame_overlap * sr) if self.n_frame_overlap % self.n_frame_len: raise ValueError( "'frame_overlap' should be multiple of 'frame_len'" ) self.n_timesteps_overlap = int(frame_overlap / timestep_duration) - 2 self.buffer = np.zeros(shape=2*self.n_frame_overlap + self.n_frame_len, dtype=np.float32) self.merge = merge self._beam_decoder = None # greedy decoder's state (unmerged transcription) self.text = '' # forerunner greedy decoder's state (unmerged transcription) self.forerunner_text = '' self.offset = 5 # self._calibrate_offset() if beam_width > 1: if language_model is None: self._beam_decoder = BeamDecoder(self.vocab, beam_width) else: self._scorer = Scorer(alpha, beta, language_model, self.vocab) self._beam_decoder = BeamDecoder(self.vocab, beam_width, ext_scorer=self._scorer) self.reset()