def Proc(record): """Parses a serialized tf.Example record.""" features = [ ('uttid', tf.VarLenFeature(tf.string)), ('transcript', tf.VarLenFeature(tf.string)), ('frames', tf.VarLenFeature(tf.float32)), ] example = tf.parse_single_example(record, dict(features)) fval = {k: v.values for k, v in six.iteritems(example)} # Reshape the flattened vector into its original time-major # representation. fval['frames'] = tf.reshape(fval['frames'], shape=[-1, self.params.frame_size]) # Input duration determines the bucket. bucket_key = tf.cast(tf.shape(fval['frames'])[0], tf.int32) if self.params.append_eos_frame: bucket_key += 1 tgt_ids, tgt_labels, tgt_paddings = self.StringsToIds( fval['transcript']) src_paddings = tf.zeros([tf.shape(fval['frames'])[0]], dtype=tf.float32) return [ fval['uttid'], tgt_ids, tgt_labels, tgt_paddings, fval['frames'], src_paddings ], bucket_key
def Proc(record): """Parses a serialized tf.Example record.""" outputs = [ ('inputs', tf.VarLenFeature(tf.int64)), ('targets', tf.VarLenFeature(tf.int64)), ] features = tf.parse_single_example(record, dict(outputs)) for k, v in six.iteritems(features): features[k] = v.values src_ids = features['inputs'] tgt_labels = features['targets'] # Derive src_paddings, tgt_ids, tgt_paddings. # tgt_ids is tgt_labels shifted right by one, with a SOS ID prepended. tgt_ids = tf.concat([[p.sos_id], tgt_labels[:-1]], axis=0) src_paddings = tf.zeros(tf.shape(src_ids), dtype=tf.float32) tgt_paddings = tf.zeros(tf.shape(tgt_ids), dtype=tf.float32) tgt_weights = tf.ones(tf.shape(tgt_ids), dtype=tf.float32) bucket_key = tf.cast( tf.maximum( tf.reduce_sum(1.0 - src_paddings), tf.reduce_sum(1.0 - tgt_paddings)), tf.int32) return [ src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights ], bucket_key
def Proc(record): """Parses a serialized tf.Example record.""" outputs = [ ('source_id', tf.VarLenFeature(tf.int64)), ('source_padding', tf.VarLenFeature(tf.float32)), ('target_id', tf.VarLenFeature(tf.int64)), ('target_padding', tf.VarLenFeature(tf.float32)), ('target_label', tf.VarLenFeature(tf.int64)), ('target_weight', tf.VarLenFeature(tf.float32)), ] features = tf.parse_single_example(record, dict(outputs)) for k, v in six.iteritems(features): features[k] = v.values bucket_key = tf.to_int32( tf.maximum(tf.reduce_sum(1.0 - features['source_padding']), tf.reduce_sum(1.0 - features['target_padding']))) return [features[k] for k, _ in outputs] + [bucket_key]
def _Proc(record): """Parses a serialized tf.Example record.""" outputs = [ ('inputs', tf.VarLenFeature(tf.int64)), ('targets', tf.VarLenFeature(tf.int64)), ] features = tf.parse_single_example(record, dict(outputs)) for k, v in six.iteritems(features): features[k] = v.values src_ids = features['inputs'] tgt_labels = features['targets'] src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key = _DerivePaddingsAndIds( src_ids, tgt_labels) return [ src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights ], bucket_key
def _ProcPacked(record): """TFExample -> Tensors for PackedInput.""" outputs = [ ('inputs', tf.VarLenFeature(tf.int64)), ('targets', tf.VarLenFeature(tf.int64)), ('inputs_segmentation', tf.VarLenFeature(tf.int64)), ('inputs_position', tf.VarLenFeature(tf.int64)), ('targets_segmentation', tf.VarLenFeature(tf.int64)), ('targets_position', tf.VarLenFeature(tf.int64)), ] features = tf.parse_single_example(record, dict(outputs)) for k, v in six.iteritems(features): features[k] = v.values src_ids = features['inputs'] tgt_labels = features['targets'] src_pos = features['inputs_position'] src_seg = features['inputs_segmentation'] tgt_pos = features['targets_position'] tgt_seg = features['targets_segmentation'] src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key = _DerivePaddingsAndIds( src_ids, tgt_labels) return [ src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights, src_pos, src_seg, tgt_pos, tgt_seg, ], bucket_key
def _Proc(record): """Parses a serialized tf.Example record.""" outputs = [ ('inputs', tf.VarLenFeature(tf.int64)), ('targets', tf.VarLenFeature(tf.int64)), ] features = tf.parse_single_example(record, dict(outputs)) for k, v in six.iteritems(features): features[k] = v.values src_ids = features['inputs'] tgt_labels = features['targets'] # Derive trivial segmentation for unpacked input. src_paddings, tgt_ids, tgt_paddings, tgt_weights, bucket_key = _DerivePaddingsAndIds( src_ids, tgt_labels) src_len = tf.shape(src_ids)[0] tgt_len = tf.shape(tgt_ids)[0] src_pos = tf.range(src_len, dtype=tf.int32) src_seg = tf.zeros_like(src_paddings) tgt_pos = tf.range(tgt_len, dtype=tf.int32) tgt_seg = tf.zeros_like(tgt_paddings) return [ src_ids, src_paddings, tgt_ids, tgt_paddings, tgt_labels, tgt_weights, src_pos, src_seg, tgt_pos, tgt_seg, ], bucket_key