Beispiel #1
0
def get_model(args, scope):
    with tf.variable_scope(scope):
        args, base_config, base_model, config_module = get_base_config(args)
        checkpoint = check_logdir(args, base_config)
        model = create_model(
            args, base_config, config_module, base_model, None)
    return model, checkpoint
Beispiel #2
0
def main():
    # Parse args and create config
    args, base_config, base_model, config_module = get_base_config(
        sys.argv[1:])

    if args.mode == "interactive_infer":
        raise ValueError(
            "Interactive infer is meant to be run from an IPython",
            "notebook not from run.py.")

    # Initilize Horovod
    if base_config['use_horovod']:
        import horovod.tensorflow as hvd
        hvd.init()
        if hvd.rank() == 0:
            deco_print("Using horovod")
    else:
        hvd = None

    restore_best_checkpoint = base_config.get('restore_best_checkpoint', False)

    # Check logdir and create it if necessary
    checkpoint = check_logdir(args, base_config, restore_best_checkpoint)
    if args.enable_logs:
        if hvd is None or hvd.rank() == 0:
            old_stdout, old_stderr, stdout_log, stderr_log = create_logdir(
                args, base_config)
        base_config['logdir'] = os.path.join(base_config['logdir'], 'logs')

    if args.mode == 'train' or args.mode == 'train_eval' or args.benchmark:
        if hvd is None or hvd.rank() == 0:
            if checkpoint is None or args.benchmark:
                deco_print("Starting training from scratch")
            else:
                deco_print(
                    "Restored checkpoint from {}. Resuming training".format(
                        checkpoint), )
    elif args.mode == 'eval' or args.mode == 'infer':
        if hvd is None or hvd.rank() == 0:
            deco_print("Loading model from {}".format(checkpoint))

    # Create model and train/eval/infer
    with tf.Graph().as_default():
        model = create_model(args, base_config, config_module, base_model, hvd)
        if args.mode == "train_eval":
            train(model[0], model[1], debug_port=args.debug_port)
        elif args.mode == "train":
            train(model, None, debug_port=args.debug_port)
        elif args.mode == "eval":
            evaluate(model, checkpoint)
        elif args.mode == "infer":
            infer(model, checkpoint, args.infer_output_file, args.use_trt)

    if args.enable_logs and (hvd is None or hvd.rank() == 0):
        sys.stdout = old_stdout
        sys.stderr = old_stderr
        stdout_log.close()
        stderr_log.close()
Beispiel #3
0
 def _get_model(args, scope):
     '''
     A simpler version of what run.py does. It returns the created model and its saved checkpoint
     '''
     with tf.variable_scope(scope):
         args, base_config, base_model, config_module = get_base_config(args)
         checkpoint = check_logdir(args, base_config)
         model = create_model(args, base_config, config_module, base_model, None)
     return model, checkpoint
Beispiel #4
0
def get_model(args):
    args, base_config, base_model, config_module = get_base_config(args)
    checkpoint = check_logdir(args, base_config)
    # infer_config = copy.deepcopy(base_config)
    if args.mode == "interactive_infer":
        nested_update(base_config,
                      copy.deepcopy(config_module['interactive_infer_params']))

    model = base_model(params=base_config, mode=args.mode, hvd=None)
    model.compile()
    return model, checkpoint, base_config["data_layer_params"]
Beispiel #5
0
 def get_model(args):
     args, base_config, base_model, config_module = get_base_config(args)
     checkpoint = check_logdir(args, base_config)
     model = create_model(args, base_config, config_module, base_model,
                          None)
     return model, checkpoint
Beispiel #6
0
    def __init__(self, model_params=MODEL_PARAMS, scope_name='S2T', 
                 sr=16000, frame_len=0.2, frame_overlap=2.4, 
                 timestep_duration=0.02, 
                 ext_model_infer_func=None, merge=True,
                 beam_width=1, language_model=None, 
                 alpha=2.8, beta=1.0):
        '''
        Args:
          model_params: list of OpenSeq2Seq arguments (same as for run.py)
          scope_name: model's scope name
          sr: sample rate, Hz
          frame_len: frame's duration, seconds
          frame_overlap: duration of overlaps before and after current frame, seconds
            frame_overlap should be multiple of frame_len
          timestep_duration: time per step at model's output, seconds
          ext_model_infer_func: callback for external inference engine,
            if it is not None, then we don't build TF inference graph
          merge: whether to do merge in greedy decoder
          beam_width: beam width for beam search decoder if larger than 1
          language_model: path to LM (to use with beam search decoder)
          alpha: LM weight (trade-off between acoustic and LM scores)
          beta: word weight (added per every transcribed word in prediction)
        '''
        if ext_model_infer_func is None:
            # Build TF inference graph
            self.model_S2T, checkpoint_S2T = self._get_model(model_params, scope_name)

            # Create the session and load the checkpoints
            sess_config = tf.ConfigProto(allow_soft_placement=True)
            sess_config.gpu_options.allow_growth = True
            self.sess = tf.InteractiveSession(config=sess_config)
            vars_S2T = {}
            for v in tf.get_collection(tf.GraphKeys.VARIABLES):
                if scope_name in v.name:
                    vars_S2T['/'.join(v.op.name.split('/')[1:])] = v
            saver_S2T = tf.train.Saver(vars_S2T)
            saver_S2T.restore(self.sess, checkpoint_S2T)
            self.params = self.model_S2T.params
        else:
            # No TF, load pre-, post-processing parameters from config,
            # use external inference engine
            _, base_config, _, _ = get_base_config(model_params)
            self.params = base_config

        self.ext_model_infer_func = ext_model_infer_func

        self.vocab = self._load_vocab(
            self.model_S2T.params['data_layer_params']['vocab_file']
        )
        self.sr = sr
        self.frame_len = frame_len
        self.n_frame_len = int(frame_len * sr)
        self.frame_overlap = frame_overlap
        self.n_frame_overlap = int(frame_overlap * sr)
        if self.n_frame_overlap % self.n_frame_len:
            raise ValueError(
                "'frame_overlap' should be multiple of 'frame_len'"
            )
        self.n_timesteps_overlap = int(frame_overlap / timestep_duration) - 2
        self.buffer = np.zeros(shape=2*self.n_frame_overlap + self.n_frame_len, dtype=np.float32)
        self.merge = merge
        self._beam_decoder = None
        # greedy decoder's state (unmerged transcription)
        self.text = ''
        # forerunner greedy decoder's state (unmerged transcription)
        self.forerunner_text = ''

        self.offset = 5
        # self._calibrate_offset()
        if beam_width > 1:
          if language_model is None:
            self._beam_decoder = BeamDecoder(self.vocab, beam_width)
          else:
            self._scorer = Scorer(alpha, beta, language_model, self.vocab)
            self._beam_decoder = BeamDecoder(self.vocab, beam_width, ext_scorer=self._scorer)
        self.reset()