def __init__( self, sess, dataset, dataset_size, logdir, ckpt, decoder_params, model_params): self.decoder_params = decoder_params if self.decoder_params.token_model: self.token_model = token_model.TokenModel( self.decoder_params.token_model) else: self.token_model = token_model.TokenModel( "speech4/conf/token_model_character_simple.pbtxt") self.model_params = model_params self.model_params.attention_params.type = "median" self.model_params.attention_params.median_window_l = 10 self.model_params.attention_params.median_window_r = 100 self.model_params.tokens_len_max = 1 self.model_params.input_layer = "placeholder" self.dataset = dataset self.dataset_size = dataset_size self.logdir = logdir with tf.variable_scope("model"): self.model = las_model.LASModel( sess, dataset, logdir, ckpt, True, self.decoder_params.beam_width, self.model_params) # Graph to read 1 utterance. tf.train.string_input_producer([dataset]) reader = tf.TFRecordReader() filename_queue = tf.train.string_input_producer([self.dataset]) _, serialized = reader.read(filename_queue) serialized = tf.train.batch( [serialized], batch_size=1, num_threads=2, capacity=2) self.features, _, self.features_len, _, _, self.text, _, _, _, _, self.uttid = s4_parse_utterance( serialized, features_len_max=self.model_params.features_len_max, tokens_len_max=1)
def __init__( self, sess, dataset, dataset_size, logdir, ckpt, decoder_params, model_params): self.decoder_params = decoder_params if self.decoder_params.token_model: self.token_model = token_model.TokenModel( self.decoder_params.token_model) else: self.token_model = token_model.TokenModel( "speech4/conf/token_model_character_simple.pbtxt") self.model_params = model_params self.model_params.attention_params.type = "median" self.model_params.attention_params.median_window_l = 10 self.model_params.attention_params.median_window_r = 100 self.model_params.input_layer = "decoder" self.dataset = dataset self.dataset_size = dataset_size self.logdir = logdir with tf.variable_scope("model"): self.model = las_model.LASModel( sess, dataset, logdir, ckpt, True, self.decoder_params.beam_width, self.model_params) # Graph to read 1 utterance. tf.train.string_input_producer([dataset]) reader = tf.TFRecordReader() filename_queue = tf.train.string_input_producer([self.dataset]) _, serialized = reader.read(filename_queue) serialized = tf.train.batch( [serialized], batch_size=1, num_threads=2, capacity=2) #self.features, _, self.features_len, _, _, self.text, _, _, _, _, self.uttid = s4_parse_utterance( # serialized, features_len_max=self.model_params.features_len_max, # tokens_len_max=1) self.features, self.features_fbank, self.features_len, _, self.features_weight, self.text, self.tokens, self.tokens_pinyin, self.tokens_len, self.tokens_weights, self.tokens_pinyin_weights, self.uttid = s4_parse_utterance( serialized, features_len_max=self.model_params.features_len_max, tokens_len_max=self.model_params.tokens_len_max + 1, frame_stack=self.model_params.frame_stack, frame_skip=self.model_params.frame_skip) # Add the shape to the features. for feature in self.features: feature.set_shape([1, self.model_params.features_width * self.model_params.frame_stack]) for token in self.tokens: if token: token.set_shape([1])
def create_graph_inputs(self): dataset_map = {} if self.dataset == 'train_si284': self.dataset = 'speech4/data/train_si284.tfrecords' self.dataset_size = 37416 elif self.dataset == 'test_dev93': self.dataset = 'speech4/data/test_dev93.tfrecords' self.dataset_size = 503 elif self.dataset == 'test_eval92': self.dataset = 'speech4/data/test_eval92.tfrecords' self.dataset_size = 333 elif self.dataset == 'ptb_train': self.dataset = 'speech4/data/ptb_train.tfrecords' self.dataset_size = 42068 elif self.dataset == 'ptb_valid': self.dataset = 'speech4/data/ptb_valid.tfrecords' self.dataset_size = 3370 elif self.dataset == 'ptb_test': self.dataset = 'speech4/data/ptb_test.tfrecords' self.dataset_size = 3761 filename_queue = tf.train.string_input_producer([self.dataset]) reader = tf.TFRecordReader() _, serialized = reader.read(filename_queue) serialized = tf.train.shuffle_batch( [serialized], batch_size=self.batch_size, num_threads=2, capacity=self.batch_size * 4 + 512, min_after_dequeue=512, seed=1000) # Parse the batched of serialized strings into the relevant utterance features. self.features, self.features_len, _, self.text, self.tokens, self.tokens_len, self.tokens_weights, self.uttid = s4_parse_utterance( serialized, features_len_max=self.features_len_max, tokens_len_max=self.tokens_len_max + 1) # Add the shape to the features. for feature in self.features: feature.set_shape([self.batch_size, self.features_width]) for token in self.tokens: token.set_shape([self.batch_size])