def prepare_data(self, train_path, valid_path, test_path): """Prepare train/validation/test sets. Create or load vocabularies.""" # Prepare data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab, self.test_lines =\ data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, test_path) # Read data into buckets and compute their sizes. print("Reading development and training data.") if self.mode == 'g2p': self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)]) else: self.valid_set = self.__put_into_buckets(valid_ph_ids, valid_gr_ids) self.train_set = self.__put_into_buckets(train_ph_ids, train_gr_ids) self.rev_gr_vocab = dict([(x, y) for (y, x) in enumerate(self.gr_vocab)])
def prepare_data(self, train_path, valid_path, test_path): """Prepare train/validation/test sets. Create or load vocabularies.""" # Prepare data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab, self.test_lines =\ data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, test_path) # Read data into buckets and compute their sizes. print ("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)])
def __train_init(self, params, train_path, valid_path=None, test_path=None): """Create G2P model and initialize or load parameters in session.""" # Preliminary actions before model creation. # Load model parameters. if self.model_dir: data_utils.save_params(params.num_layers, params.size, self.model_dir) # Prepare G2P data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab, self.test_lines =\ data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, test_path) # Read data into buckets and compute their sizes. print("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)]) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (params.num_layers, params.size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, params.size, params.num_layers, params.max_gradient_norm, params.batch_size, params.learning_rate, params.lr_decay_factor, forward_only=False) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) print("Created model with fresh parameters.") self.session.run(tf.initialize_all_variables())
def __train_init(self, params, train_path, valid_path=None, test_path=None): """Create G2P model and initialize or load parameters in session.""" # Preliminary actions before model creation. # Load model parameters. if self.model_dir: data_utils.save_params(params.num_layers, params.size, self.model_dir) # Prepare G2P data. print("Preparing G2P data") train_gr_ids, train_ph_ids, valid_gr_ids, valid_ph_ids, self.gr_vocab,\ self.ph_vocab, self.test_lines =\ data_utils.prepare_g2p_data(self.model_dir, train_path, valid_path, test_path) # Read data into buckets and compute their sizes. print ("Reading development and training data.") self.valid_set = self.__put_into_buckets(valid_gr_ids, valid_ph_ids) self.train_set = self.__put_into_buckets(train_gr_ids, train_ph_ids) self.rev_ph_vocab = dict([(x, y) for (y, x) in enumerate(self.ph_vocab)]) self.session = tf.Session() # Create model. print("Creating %d layers of %d units." % (params.num_layers, params.size)) self.model = seq2seq_model.Seq2SeqModel(len(self.gr_vocab), len(self.ph_vocab), self._BUCKETS, params.size, params.num_layers, params.max_gradient_norm, params.batch_size, params.learning_rate, params.lr_decay_factor, forward_only=False) self.model.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1) print("Created model with fresh parameters.") self.session.run(tf.initialize_all_variables())