from data_processing.mozilla_common_voice import MozillaCommonVoiceDataset
from data_processing.urban_sound_8K import UrbanSound8K
from data_processing.dataset import Dataset
import warnings

warnings.filterwarnings(action='ignore')

mozilla_basepath = 'D:/downloads/fr1/mcv/fr~'
urbansound_basepath = 'D:/downloads/UrbanSound8K/UrbanSound8k'

mcv = MozillaCommonVoiceDataset(mozilla_basepath, val_dataset_size=900)
clean_train_filenames, clean_val_filenames = mcv.get_train_val_filenames()

us8K = UrbanSound8K(urbansound_basepath, val_dataset_size=900)
noise_train_filenames, noise_val_filenames = us8K.get_train_val_filenames()


print(mozilla_basepath)
print(urbansound_basepath)

windowLength = 256
config = {'windowLength': windowLength,
          'overlap': round(0.25 * windowLength),
          'fs': 16000,
          'audio_max_duration': 0.8}

if __name__=='__main__':

    val_dataset = Dataset(clean_val_filenames, noise_val_filenames, **config)
    val_dataset.create_tf_record(prefix='val', subset_size=300)
Beispiel #2
0
from data_processing.mozilla_common_voice import MozillaCommonVoiceDataset
from data_processing.urban_sound_8K import UrbanSound8K
from data_processing.dataset import Dataset
from default_config import args
import warnings

warnings.filterwarnings(action='ignore')

mcv = MozillaCommonVoiceDataset(args.mozilla_basepath, val_dataset_size=1000)
clean_train_filenames, clean_val_filenames = mcv.get_train_val_filenames()

us8K = UrbanSound8K(args.urbansound_basepath, val_dataset_size=200)
noise_train_filenames, noise_val_filenames = us8K.get_train_val_filenames()

config = {
    'windowLength': args.windowLength,
    'overlap': round(0.25 * args.windowLength),
    'fs': 16000,
    'audio_max_duration': 0.8
}

val_dataset = Dataset(clean_val_filenames, noise_val_filenames, **config)
val_dataset.create_tf_record(prefix='val', subset_size=2000)

train_dataset = Dataset(clean_train_filenames, noise_train_filenames, **config)
train_dataset.create_tf_record(prefix='train', subset_size=4000)

## Create Test Set
clean_test_filenames = mcv.get_test_filenames()

noise_test_filenames = us8K.get_test_filenames()