Ejemplo n.º 1
0
def convert_to_TF(deep_speech_dataset):
    data_entries = deep_speech_dataset.entries
    num_feature_bins = deep_speech_dataset.num_feature_bins
    audio_featurizer = deep_speech_dataset.audio_featurizer
    feature_normalize = deep_speech_dataset.config.audio_config.normalize
    text_featurizer = deep_speech_dataset.text_featurizer
    filename = "E:/TUDA/german-speechdata-package-v2/test.tfrecords"
    print('Writing', filename)
    with tf.python_io.TFRecordWriter(filename) as writer:
        for audio_file, _, transcript in data_entries:
            features = dataset._preprocess_audio(audio_file, audio_featurizer,
                                                 feature_normalize)
            labels = featurizer.compute_label_feature(
                transcript, text_featurizer.token_to_index)

            flattened_features = [
                item for sublist_20ms in features for item in sublist_20ms
            ]

            example = tf.train.Example(features=tf.train.Features(
                feature={
                    'dim1':
                    _int64_feature(len(features)),
                    'dim2':
                    _int64_feature(num_feature_bins),
                    'dim3':
                    _int64_feature(1),
                    'labels':
                    _bytes_feature(np.asarray(labels).tostring()),
                    'features':
                    _bytes_feature(np.asarray(flattened_features).tostring())
                }))
        writer.write(example.SerializeToString())
Ejemplo n.º 2
0
def gen_TFRecord(deep_speech_dataset):

    data_entries = deep_speech_dataset.entries
    num_feature_bins = deep_speech_dataset.num_feature_bins
    audio_featurizer = deep_speech_dataset.audio_featurizer
    feature_normalize = deep_speech_dataset.config.audio_config.normalize
    text_featurizer = deep_speech_dataset.text_featurizer
    writers = []

    records_csv = []

    for data_entry in data_entries:
        #TODO BUCKETT
        writers.append(
            tf.python_io.TFRecordWriter(data_entry[0][:-4] + ".tfrecord"))
        records_csv.append(data_entry[0][:-4] + ".tfrecord")
    """Dataset generator function."""

    writer_index = 0
    for audio_file, _, transcript in data_entries:
        features = dataset._preprocess_audio(audio_file, audio_featurizer,
                                             feature_normalize)
        labels = featurizer.compute_label_feature(
            transcript, text_featurizer.token_to_index)

        flattened_features = [
            item for sublist_20ms in features for item in sublist_20ms
        ]
        ff = tf.reshape(flattened_features, [161, 1])
        feature_dict = collections.OrderedDict()
        feature_dict["features"] = _bytes_feature(
            tf.compat.as_bytes(np.asarray(flattened_features).tostring()))
        # create_float_feature(flattened_features)
        feature_dict["shape"] = _bytes_feature(
            tf.compat.as_bytes(
                np.asarray([len(features), num_feature_bins, 1]).tostring()))
        #create_int_feature([len(features),num_feature_bins,1])
        feature_dict["labels"] = _bytes_feature(
            tf.compat.as_bytes(np.asarray(labels).tostring()))
        #create_int_feature(labels)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature=feature_dict))
        writers[writer_index].write(tf_example.SerializeToString())

        writer_index += 1
        print("WRITING: " + str(writer_index + 1) + "/" + str(len(writers)),
              end='\r')

    for writer in writers:
        writer.close()

    with open("E:/TUDA/german-speechdata-package-v2/records_test.csv",
              'w') as f:
        for record in records_csv:
            f.write(record + '\n')
Ejemplo n.º 3
0
 def _gen_data():
     """Dataset generator function."""
     for audio_file, _, transcript in data_entries:
         features = _preprocess_audio(audio_file, audio_featurizer,
                                      feature_normalize)
         labels = featurizer.compute_label_feature(
             transcript, text_featurizer.token_to_index)
         input_length = [features.shape[0]]
         label_length = [len(labels)]
         # Yield a tuple of (features, labels) where features is a dict containing
         # all info about the actual data features.
         yield ({
             "features": features,
             "input_length": input_length,
             "label_length": label_length
         }, labels)
Ejemplo n.º 4
0
 def _gen_data():
   """Dataset generator function."""
   for audio_file, _, transcript in data_entries:
     features = _preprocess_audio(
         audio_file, audio_featurizer, feature_normalize)
     labels = featurizer.compute_label_feature(
         transcript, text_featurizer.token_to_index)
     input_length = [features.shape[0]]
     label_length = [len(labels)]
     # Yield a tuple of (features, labels) where features is a dict containing
     # all info about the actual data features.
     yield (
         {
             "features": features,
             "input_length": input_length,
             "label_length": label_length
         },
         labels)
Ejemplo n.º 5
0
def check_reshape(deep_speech_dataset):

    data_entries = deep_speech_dataset.entries
    num_feature_bins = deep_speech_dataset.num_feature_bins
    audio_featurizer = deep_speech_dataset.audio_featurizer
    feature_normalize = deep_speech_dataset.config.audio_config.normalize
    text_featurizer = deep_speech_dataset.text_featurizer
    """Dataset generator function."""

    for audio_file, _, transcript in data_entries:
        features = dataset._preprocess_audio(audio_file, audio_featurizer,
                                             feature_normalize)
        labels = featurizer.compute_label_feature(
            transcript, text_featurizer.token_to_index)

        flattened_features = [
            item for sublist_20ms in features for item in sublist_20ms
        ]
        #ff = tf.reshape(flattened_features,[features.shape[0],features.shape[1]])
        ff = np.reshape(flattened_features,
                        [features.shape[0], features.shape[1], 1])
        #for i in range(len(features)):
        #   for j in range(len(features[i])):
        #print(str(ff[i][j]) +"#############"+str( features[i][j]))

        #print(ff == features)
        print(ff[0])
        print("################")
        print(features[0])
        #print(features.shape)
        break


#ds = generate_dataset("E:/TUDA/german-speechdata-package-v2/test.csv")
#convert_to_TF(ds)
#check_reshape(ds)
#gen_TFRecord(ds)
#input_fn(128, "E:/TUDA/german-speechdata-package-v2/records_test.csv", 1)
#input_fn(128, "E:/TUDA/german-speechdata-package-v2/records_test.csv", 1)
#read_tfRecord()
Ejemplo n.º 6
0
def _preprocess_transcript(transcript, token_to_index):
    """Process transcript as label features."""
    return featurizer.compute_label_feature(transcript, token_to_index)