Ejemplo n.º 1
0
  def __init__(self, model_dir):
    """Create G2P model and initialize or load parameters in session."""
    self.model_dir = model_dir

    # Preliminary actions before model creation.
    if not (model_dir and
            os.path.exists(os.path.join(self.model_dir, "model"))):
      return

    #Load model parameters.
    num_layers, size = data_utils.load_params(self.model_dir)
    batch_size = 1 # We decode one word at a time.
    # Load vocabularies
    self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.grapheme"))
    self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.phoneme"))

    self.rev_ph_vocab =\
      data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                 reverse=True)

    self.session = tf.Session()

    # Create model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers, 0, batch_size,
                                            0, 0, forward_only=True)
    self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
    # Check for saved models and restore them.
    print("Reading model parameters from %s" % self.model_dir)
    self.model.saver.restore(self.session, os.path.join(self.model_dir,
                                                        "model"))
Ejemplo n.º 2
0
  def load_decode_model(self):
    """Load G2P model and initialize or load parameters in session."""
    if not os.path.exists(os.path.join(self.model_dir, 'checkpoint')):
      raise RuntimeError("Model not found in %s" % self.model_dir)

    self.batch_size = 1 # We decode one word at a time.
    #Load model parameters.
    num_layers, size = data_utils.load_params(self.model_dir)
    # Load vocabularies
    print("Loading vocabularies from %s" % self.model_dir)
    self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.grapheme"))
    self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.phoneme"))

    self.rev_ph_vocab =\
      data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                 reverse=True)

    self.session = tf.Session()

    # Restore model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers, 0,
                                            self.batch_size, 0, 0,
                                            forward_only=True)
    self.model.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    # Check for saved models and restore them.
    print("Reading model parameters from %s" % self.model_dir)
    self.model.saver.restore(self.session, os.path.join(self.model_dir,
                                                        "model"))
Ejemplo n.º 3
0
  def load_decode_model(self):
    """Load G2P model and initialize or load parameters in session."""
    if not os.path.exists(os.path.join(self.model_dir, 'checkpoint')):
      raise RuntimeError("Model not found in %s" % self.model_dir)

    self.batch_size = 1 # We decode one word at a time.
    #Load model parameters.
    num_layers, size = data_utils.load_params(self.model_dir)
    # Load vocabularies
    print("Loading vocabularies from %s" % self.model_dir)
    self.gr_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.grapheme"))
    self.ph_vocab = data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                            "vocab.phoneme"))

    self.rev_ph_vocab =\
      data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                 reverse=True)

    self.session = tf.Session()

    # Restore model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers, 0,
                                            self.batch_size, 0, 0,
                                            forward_only=True)
    self.model.saver = tf.train.Saver(tf.global_variables(), max_to_keep=1)
    # Check for saved models and restore them.
    print("Reading model parameters from %s" % self.model_dir)
    self.model.saver.restore(self.session, os.path.join(self.model_dir,
                                                        "model"))
Ejemplo n.º 4
0
    def __init__(self, model_dir):
        """Create G2P model and initialize or load parameters in session."""
        self.model_dir = model_dir

        # Preliminary actions before model creation.
        if not (model_dir
                and os.path.exists(os.path.join(self.model_dir, "model"))):
            return

        #Load model parameters.
        num_layers, size = data_utils.load_params(self.model_dir)
        batch_size = 1  # We decode one word at a time.
        # Load vocabularies
        self.gr_vocab = data_utils.load_vocabulary(
            os.path.join(self.model_dir, "vocab.grapheme"))
        self.ph_vocab = data_utils.load_vocabulary(
            os.path.join(self.model_dir, "vocab.phoneme"))

        self.rev_ph_vocab =\
          data_utils.load_vocabulary(os.path.join(self.model_dir, "vocab.phoneme"),
                                     reverse=True)

        self.session = tf.Session()

        # Create model.
        print("Creating %d layers of %d units." % (num_layers, size))
        self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                                len(self.ph_vocab),
                                                self._BUCKETS,
                                                size,
                                                num_layers,
                                                0,
                                                batch_size,
                                                0,
                                                0,
                                                forward_only=True)
        self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        # Check for saved models and restore them.
        print("Reading model parameters from %s" % self.model_dir)
        self.model.saver.restore(self.session,
                                 os.path.join(self.model_dir, "model"))
Ejemplo n.º 5
0
    def __train_init(self,
                     params,
                     train_path,
                     valid_path=None,
                     test_path=None):
        """Create G2P model and initialize or load parameters in session."""

        # Preliminary actions before model creation.
        # Load model parameters.
        num_layers, size = data_utils.save_params(params.num_layers,
                                                  params.size, self.model_dir)
        # Prepare G2P data.
        print("Preparing G2P data")
        train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
        self.ph_vocab, self.test_lines =\
        data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                    test_path)
        # Read data into buckets and compute their sizes.
        print("Reading development and training data.")
        self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
        self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)

        self.rev_ph_vocab =\
            data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                    "vocab.phoneme"),
                                       reverse=True)

        self.session = tf.Session()

        # Create model.
        print("Creating %d layers of %d units." % (num_layers, size))
        self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                                len(self.ph_vocab),
                                                self._BUCKETS,
                                                size,
                                                num_layers,
                                                params.max_gradient_norm,
                                                params.batch_size,
                                                params.learning_rate,
                                                params.lr_decay_factor,
                                                forward_only=False)
        self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        print("Created model with fresh parameters.")
        self.session.run(tf.initialize_all_variables())
Ejemplo n.º 6
0
  def __train_init(self, params, train_path, valid_path=None, test_path=None):
    """Create G2P model and initialize or load parameters in session."""

    # Preliminary actions before model creation.
    # Load model parameters.
    num_layers, size = data_utils.save_params(params.num_layers,
                                              params.size,
                                              self.model_dir)
    # Prepare G2P data.
    print("Preparing G2P data")
    train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
    self.ph_vocab, self.test_lines =\
    data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                test_path)
    # Read data into buckets and compute their sizes.
    print ("Reading development and training data.")
    self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
    self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)

    self.rev_ph_vocab =\
        data_utils.load_vocabulary(os.path.join(self.model_dir,
                                                "vocab.phoneme"),
                                   reverse=True)

    self.session = tf.Session()

    # Create model.
    print("Creating %d layers of %d units." % (num_layers, size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            size, num_layers,
                                            params.max_gradient_norm,
                                            params.batch_size,
                                            params.learning_rate,
                                            params.lr_decay_factor,
                                            forward_only=False)
    self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
    print("Created model with fresh parameters.")
    self.session.run(tf.initialize_all_variables())