Exemple #1
0
    def prepare_data(self, train_path, valid_path, test_path):
        """Prepare train/validation/test sets. Create or load vocabularies."""
        # Prepare data.
        print("Preparing G2P data")
        train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
        self.ph_vocab, self.test_lines =\
        data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                    test_path)
        # Read data into buckets and compute their sizes.
        print("Reading development and training data.")
        if self.mode == 'g2p':
            self.valid_set = self.__put_into_buckets(valid_gr_ids,
                                                     valid_ph_ids)
            self.train_set = self.__put_into_buckets(train_gr_ids,
                                                     train_ph_ids)

            self.rev_ph_vocab = dict([(x, y)
                                      for (y, x) in enumerate(self.ph_vocab)])
        else:
            self.valid_set = self.__put_into_buckets(valid_ph_ids,
                                                     valid_gr_ids)
            self.train_set = self.__put_into_buckets(train_ph_ids,
                                                     train_gr_ids)

            self.rev_gr_vocab = dict([(x, y)
                                      for (y, x) in enumerate(self.gr_vocab)])
Exemple #2
0
  def prepare_data(self, train_path, valid_path, test_path):
    """Prepare train/validation/test sets. Create or load vocabularies."""
    # Prepare data.
    print("Preparing G2P data")
    train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
    self.ph_vocab, self.test_lines =\
    data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                test_path)
    # Read data into buckets and compute their sizes.
    print ("Reading development and training data.")
    self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
    self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)

    self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)])
Exemple #3
0
    def __train_init(self,
                     params,
                     train_path,
                     valid_path=None,
                     test_path=None):
        """Create G2P model and initialize or load parameters in session."""

        # Preliminary actions before model creation.
        # Load model parameters.

        if self.model_dir:
            data_utils.save_params(params.num_layers, params.size,
                                   self.model_dir)

        # Prepare G2P data.
        print("Preparing G2P data")
        train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
        self.ph_vocab, self.test_lines =\
        data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                    test_path)
        # Read data into buckets and compute their sizes.
        print("Reading development and training data.")
        self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
        self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)

        self.rev_ph_vocab = dict([(x, y)
                                  for (y, x) in enumerate(self.ph_vocab)])

        self.session = tf.Session()

        # Create model.
        print("Creating %d layers of %d units." %
              (params.num_layers, params.size))
        self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                                len(self.ph_vocab),
                                                self._BUCKETS,
                                                params.size,
                                                params.num_layers,
                                                params.max_gradient_norm,
                                                params.batch_size,
                                                params.learning_rate,
                                                params.lr_decay_factor,
                                                forward_only=False)
        self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        print("Created model with fresh parameters.")
        self.session.run(tf.initialize_all_variables())
Exemple #4
0
  def __train_init(self, params, train_path, valid_path=None, test_path=None):
    """Create G2P model and initialize or load parameters in session."""

    # Preliminary actions before model creation.
    # Load model parameters.

    if self.model_dir:
      data_utils.save_params(params.num_layers,
                             params.size,
                             self.model_dir)

    # Prepare G2P data.
    print("Preparing G2P data")
    train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\
    self.ph_vocab, self.test_lines =\
    data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path,
                                test_path)
    # Read data into buckets and compute their sizes.
    print ("Reading development and training data.")
    self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids)
    self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids)

    self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)])

    self.session = tf.Session()

    # Create model.
    print("Creating %d layers of %d units." % (params.num_layers, params.size))
    self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab),
                                            len(self.ph_vocab), self._BUCKETS,
                                            params.size, params.num_layers,
                                            params.max_gradient_norm,
                                            params.batch_size,
                                            params.learning_rate,
                                            params.lr_decay_factor,
                                            forward_only=False)
    self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
    print("Created model with fresh parameters.")
    self.session.run(tf.initialize_all_variables())