def PrepareGoogleSpeechCmd(version=2, forceDownload=False, task='20cmd'):
    """
    Prepares Google Speech commands dataset version 2 for use

    tasks: 20cmd, 12cmd, leftright or 35word

    Returns full path to training, validation and test file list and file categories
    """
    allowedTasks = ['12cmd', 'leftright', '35word', '20cmd']
    if task not in allowedTasks:
        raise Exception('Task must be one of: {}'.format(allowedTasks))

    basePath = None
    if version == 2:
        _DownloadGoogleSpeechCmdV2(forceDownload)
        basePath = 'sd_GSCmdV2'
    elif version == 1:
        _DownloadGoogleSpeechCmdV1(forceDownload)
        basePath = 'sd_GSCmdV1'
    else:
        raise Exception('Version must be 1 or 2')

    if task == '12cmd':
        GSCmdV2Categs = {
            'unknown': 0,
            'silence': 1,
            '_unknown_': 0,
            '_silence_': 1,
            '_background_noise_': 1,
            'yes': 2,
            'no': 3,
            'up': 4,
            'down': 5,
            'left': 6,
            'right': 7,
            'on': 8,
            'off': 9,
            'stop': 10,
            'go': 11
        }
        numGSCmdV2Categs = 12
    elif task == 'leftright':
        GSCmdV2Categs = {
            'unknown': 0,
            'silence': 0,
            '_unknown_': 0,
            '_silence_': 0,
            '_background_noise_': 0,
            'left': 1,
            'right': 2
        }
        numGSCmdV2Categs = 3
    elif task == '35word':
        GSCmdV2Categs = {
            'unknown': 0,
            'silence': 0,
            '_unknown_': 0,
            '_silence_': 0,
            '_background_noise_': 0,
            'yes': 2,
            'no': 3,
            'up': 4,
            'down': 5,
            'left': 6,
            'right': 7,
            'on': 8,
            'off': 9,
            'stop': 10,
            'go': 11,
            'zero': 12,
            'one': 13,
            'two': 14,
            'three': 15,
            'four': 16,
            'five': 17,
            'six': 18,
            'seven': 19,
            'eight': 20,
            'nine': 1,
            'backward': 21,
            'bed': 22,
            'bird': 23,
            'cat': 24,
            'dog': 25,
            'follow': 26,
            'forward': 27,
            'happy': 28,
            'house': 29,
            'learn': 30,
            'marvin': 31,
            'sheila': 32,
            'tree': 33,
            'visual': 34,
            'wow': 35
        }
        numGSCmdV2Categs = 36
    elif task == '20cmd':
        GSCmdV2Categs = {
            'unknown': 0,
            'silence': 0,
            '_unknown_': 0,
            '_silence_': 0,
            '_background_noise_': 0,
            'yes': 2,
            'no': 3,
            'up': 4,
            'down': 5,
            'left': 6,
            'right': 7,
            'on': 8,
            'off': 9,
            'stop': 10,
            'go': 11,
            'zero': 12,
            'one': 13,
            'two': 14,
            'three': 15,
            'four': 16,
            'five': 17,
            'six': 18,
            'seven': 19,
            'eight': 20,
            'nine': 1
        }
        numGSCmdV2Categs = 21

    print('Converting test set WAVs to numpy files')
    audioUtils.WAV2Numpy(basePath + '/test/')
    print('Converting training set WAVs to numpy files')
    audioUtils.WAV2Numpy(basePath + '/train/')

    # read split from files and all files in folders
    testWAVs = pd.read_csv(basePath + '/train/testing_list.txt',
                           sep=" ",
                           header=None)[0].tolist()
    valWAVs = pd.read_csv(basePath + '/train/validation_list.txt',
                          sep=" ",
                          header=None)[0].tolist()

    testWAVs = [
        os.path.join(basePath + '/train/', f + '.npy') for f in testWAVs
        if f.endswith('.wav')
    ]
    valWAVs = [
        os.path.join(basePath + '/train/', f + '.npy') for f in valWAVs
        if f.endswith('.wav')
    ]
    allWAVs = []
    for root, dirs, files in os.walk(basePath + '/train/'):
        allWAVs += [root + '/' + f for f in files if f.endswith('.wav.npy')]
    trainWAVs = list(set(allWAVs) - set(valWAVs) - set(testWAVs))

    testWAVsREAL = []
    for root, dirs, files in os.walk(basePath + '/test/'):
        testWAVsREAL += [
            root + '/' + f for f in files if f.endswith('.wav.npy')
        ]

    # get categories
    testWAVlabels = [_getFileCategory(f, GSCmdV2Categs) for f in testWAVs]
    valWAVlabels = [_getFileCategory(f, GSCmdV2Categs) for f in valWAVs]
    trainWAVlabels = [_getFileCategory(f, GSCmdV2Categs) for f in trainWAVs]
    testWAVREALlabels = [
        _getFileCategory(f, GSCmdV2Categs) for f in testWAVsREAL
    ]

    # background noise should be used for validation as well
    backNoiseFiles = [
        trainWAVs[i] for i in range(len(trainWAVlabels))
        if trainWAVlabels[i] == GSCmdV2Categs['silence']
    ]
    backNoiseCats = [
        GSCmdV2Categs['silence'] for i in range(len(backNoiseFiles))
    ]
    if numGSCmdV2Categs == 12:
        valWAVs += backNoiseFiles
        valWAVlabels += backNoiseCats

    # build dictionaries
    testWAVlabelsDict = dict(zip(testWAVs, testWAVlabels))
    valWAVlabelsDict = dict(zip(valWAVs, valWAVlabels))
    trainWAVlabelsDict = dict(zip(trainWAVs, trainWAVlabels))
    testWAVREALlabelsDict = dict(zip(testWAVsREAL, testWAVREALlabels))

    # a tweak here: we will heavily underuse silence samples because there are few files.
    # we can add them to the training list to reuse them multiple times
    # note that since we already added the files to the label dicts we don't
    # need to do it again

    # for i in range(200):
    #     trainWAVs = trainWAVs + backNoiseFiles

    # info dictionary
    trainInfo = {'files': trainWAVs, 'labels': trainWAVlabelsDict}
    valInfo = {'files': valWAVs, 'labels': valWAVlabelsDict}
    testInfo = {'files': testWAVs, 'labels': testWAVlabelsDict}
    testREALInfo = {'files': testWAVsREAL, 'labels': testWAVREALlabelsDict}
    gscInfo = {
        'train': trainInfo,
        'test': testInfo,
        'val': valInfo,
        'testREAL': testREALInfo
    }

    print('Done preparing Google Speech commands dataset version {}'.format(
        version))

    return gscInfo, numGSCmdV2Categs
Exemple #2
0
def prepareKeyword(basePath):
    numKeyWordCategs, keywords79 = getCategs()
    print('Converting train set WAVs to numpy files')
    trainFolders = os.listdir(basePath + '/train')
    trainWAVs = []
    #traintxts = []
    for trainFolder in trainFolders:
        root = basePath + '/train/' + trainFolder
        if not os.path.isdir(root):
            continue
        # 如果.wav.npy文件存在,表明.wav文件转换成了.npy无需再次转换
        npyFile = [
            os.path.join(root, f) for f in os.listdir(root)
            if f.endswith('.wav.npy')
        ]
        #traintxt = [os.path.join(root, f) for f in os.listdir(root) if "seg" in f]  # 通过seg.txt去对应utterance.wav,删除没有.wav文件的冗余seg.txt
        #traintxts.extend(traintxt)
        if npyFile:
            trainWAVs.extend(npyFile)
            continue
        # 如果.wav.npy文件不存在,则转换.wav文件
        audioUtils.WAV2Numpy(root)
        npyFile = [
            os.path.join(root, f) for f in os.listdir(root)
            if f.endswith('.wav.npy')
        ]
        trainWAVs.extend(npyFile)

    #trainWAVtxt = [os.path.join(os.path.dirname(f), os.path.basename(f).split("seg")[0] + "utterance.wav.npy") for f in traintxts]
    #wav_txt(trainWAVtxt)
    #trainNpy = [len(np.load(x)) for x in trainWAVs]
    #print("最大长度:%s, 最小长度:%s,平均长度:%s" % (max(trainNpy),min(trainNpy), sum(trainNpy)/len(trainNpy)))
    trainWAVs = bad_audio_fn(deepcopy(trainWAVs))
    print('Converting test set WAVs to numpy files')
    testFolders = os.listdir(basePath + '/test')
    testWAVs = []
    #testtxts = []
    for testFolder in testFolders:
        if testFolder != "0800":  # 一个文件一个文件测试
            root = basePath + '/test/' + testFolder
            if not os.path.isdir(root):
                continue
            # 如果.wav.npy文件存在,表明.wav文件转换成了.npy无需再次转换
            npyFile = [
                os.path.join(root, f) for f in os.listdir(root)
                if f.endswith('.wav.npy')
            ]
            #testtxt = [os.path.join(root, f) for f in os.listdir(root) if "seg" in f]  # 通过seg.txt去对应utterance.wav,删除没有.wav文件的冗余seg.txt
            #testtxts.extend(testtxt)
            if npyFile:
                testWAVs.extend(npyFile)
                continue
            # 如果.wav.npy文件不存在,则转换.wav文件
            audioUtils.WAV2Numpy(root)
            npyFile = [
                os.path.join(root, f) for f in os.listdir(root)
                if f.endswith('.wav.npy')
            ]
            testWAVs.extend(npyFile)

    #testWAVtxt = [os.path.join(os.path.dirname(f), os.path.basename(f).split("seg")[0] + "utterance.wav.npy") for f in testtxts]
    #wav_txt(testWAVtxt)
    #testNpy = [len(np.load(x)) for x in testWAVs]
    #print("最大长度:%s, 最小长度:%s,平均长度:%s" % (max(testNpy),min(testNpy), sum(testNpy)/len(testNpy)))
    testWAVs = bad_audio_fn(deepcopy(testWAVs))

    print('Converting dev set WAVs to numpy files')
    devFolders = os.listdir(basePath + '/dev')
    devWAVs = []
    #devtxts = []
    for devFolder in devFolders:
        root = basePath + '/dev/' + devFolder
        if not os.path.isdir(root):
            continue
        # 如果.wav.npy文件存在,表明.wav文件转换成了.npy无需再次转换
        npyFile = [
            os.path.join(root, f) for f in os.listdir(root)
            if f.endswith('.wav.npy')
        ]
        #devtxt = [os.path.join(root, f) for f in os.listdir(root) if "seg" in f]  # 通过seg.txt去对应utterance.wav,删除没有.wav文件的冗余seg.txt
        #devtxts.extend(devtxt)
        if npyFile:
            devWAVs.extend(npyFile)
            continue
        # 如果.wav.npy文件不存在,则转换.wav文件
        audioUtils.WAV2Numpy(root)
        npyFile = [
            os.path.join(root, f) for f in os.listdir(root)
            if f.endswith('.wav.npy')
        ]
        devWAVs.extend(npyFile)

    #devWAVtxt = [os.path.join(os.path.dirname(f), os.path.basename(f).split("seg")[0] + "utterance.wav.npy") for f in devtxts]
    #wav_txt(devWAVtxt)
    #devNpy = [len(np.load(x)) for x in devWAVs]
    #print("最大长度:%s, 最小长度:%s,平均长度:%s" % (max(devNpy),min(devNpy), sum(devNpy)/len(devNpy)))
    devWAVs = bad_audio_fn(deepcopy(devWAVs))
    # 准备训练数据的Label
    trainLabelFiles = [
        os.path.join(os.path.dirname(f),
                     os.path.basename(f).split("utterance")[0] + "seg.txt")
        for f in trainWAVs
    ]
    trainLabels = encodeLabel(trainLabelFiles, keywords79)
    testLabelFiles = [
        os.path.join(os.path.dirname(f),
                     os.path.basename(f).split("utterance")[0] + "seg.txt")
        for f in testWAVs
    ]
    testLabels = encodeLabel(testLabelFiles, keywords79)
    devLabelFiles = [
        os.path.join(os.path.dirname(f),
                     os.path.basename(f).split("utterance")[0] + "seg.txt")
        for f in devWAVs
    ]
    devLabels = encodeLabel(devLabelFiles, keywords79)
    testWAVlabelsDict = dict(zip(testWAVs, testLabels))
    devWAVlabelsDict = dict(zip(devWAVs, devLabels))
    trainWAVlabelsDict = dict(zip(trainWAVs, trainLabels))

    #correspond(testWAVlabelsDict, keywords79, basePath + "/testList.txt")
    #correspond(devWAVlabelsDict, keywords79, basePath + "/devList.txt")
    #correspond(trainWAVlabelsDict, keywords79, basePath + "/trainList.txt") # 将train,dev,test数据集的文件路径及标签写到txt文件中

    trainInfo = {'files': trainWAVs, 'labels': trainWAVlabelsDict}
    valInfo = {'files': devWAVs, 'labels': devWAVlabelsDict}
    testInfo = {'files': testWAVs, 'labels': testWAVlabelsDict}

    gscInfo = {'train': trainInfo, 'test': testInfo, 'val': valInfo}

    print("Done Keywords data set prepare.")
    return gscInfo, numKeyWordCategs + 1, keywords79
Exemple #3
0
def convertWAV2Numpy():
    print('Converting test set WAVs to numpy files')
    audioUtils.WAV2Numpy(basePath + '/test/')
    print('Converting training set WAVs to numpy files')
    audioUtils.WAV2Numpy(basePath + '/train/')
Exemple #4
0
def PrepareGoogleSpeechCmd(version, forceDownload=False, task='35word'):
    """
    Prepare google speech command data version2 
    tasks: Just '35word' for our project
    Returns full path to training, validation and test file list and file categories
    """
    basePath = None

    _DownloadGoogleSpeechCmdV2(forceDownload)
    basePath = 'sd_GSCmdV2'

    # categories in task '35word'
    if task == '35word':
        GSCmdV2Categs = {
            'unknown': 0,
            'silence': 0,
            '_unknown_': 0,
            '_silence_': 0,
            '_background_noise_': 0,
            'yes': 2,
            'no': 3,
            'up': 4,
            'down': 5,
            'left': 6,
            'right': 7,
            'on': 8,
            'off': 9,
            'stop': 10,
            'go': 11,
            'zero': 12,
            'one': 13,
            'two': 14,
            'three': 15,
            'four': 16,
            'five': 17,
            'six': 18,
            'seven': 19,
            'eight': 20,
            'nine': 1,
            'backward': 21,
            'bed': 22,
            'bird': 23,
            'cat': 24,
            'dog': 25,
            'follow': 26,
            'forward': 27,
            'happy': 28,
            'house': 29,
            'learn': 30,
            'marvin': 31,
            'sheila': 32,
            'tree': 33,
            'visual': 34,
            'wow': 35
        }
        numGSCmdV2Categs = 36

    print(
        'Converting test WAVs to numpy files, Data augmentation for test set')
    test_aug = audioUtils.WAV2Numpy(basePath + '/test/')
    print(
        'Converting training set WAVs to numpy files, Data augmentation for train set'
    )
    train_aug = audioUtils.WAV2Numpy(basePath + '/train/')

    # read split from files and all files in folders
    testing = pd.read_csv(basePath + '/train/testing_list.txt',
                          sep=" ",
                          header=None)[0].tolist()
    validation = pd.read_csv(basePath + '/train/validation_list.txt',
                             sep=" ",
                             header=None)[0].tolist()

    testing = [
        os.path.join(basePath + '/train/', f + '.npy') for f in testing
        if f.endswith('.wav')
    ]
    validation = [
        os.path.join(basePath + '/train/', f + '.npy') for f in validation
        if f.endswith('.wav')
    ]

    if test_aug is not None:
        testing.extend(test_aug)  # add augmentated test files name

    allWAVs = []
    for Path, dirs, files in os.walk(basePath + '/train/'):
        allWAVs += [Path + '/' + f for f in files if f.endswith('.wav.npy')]

    if train_aug is not None:
        allWAVs.extend(train_aug)  #add augmentated train files name
    training = list(set(allWAVs) - set(validation) - set(testing))

    testWAVsREAL = []
    for Path, dirs, files in os.walk(basePath + '/test/'):
        testWAVsREAL += [
            Path + '/' + f for f in files if f.endswith('.wav.npy')
        ]

    # get categories
    testing_label = [_getFileCategory(f, GSCmdV2Categs) for f in testing]
    validation_label = [_getFileCategory(f, GSCmdV2Categs) for f in validation]
    training_label = [_getFileCategory(f, GSCmdV2Categs) for f in training]
    testWAVREALlabels = [
        _getFileCategory(f, GSCmdV2Categs) for f in testWAVsREAL
    ]

    # use background noise as validation
    BN_list = [
        training[i] for i in range(len(training_label))
        if training_label[i] == GSCmdV2Categs['silence']
    ]
    BN_categ = [GSCmdV2Categs['silence'] for i in range(len(BN_list))]
    if numGSCmdV2Categs == 12:
        validation += BN_list
        validation_label += BN_categ

    # build dictionaries
    testing_dict = dict(zip(testing, testing_label))
    validation_dict = dict(zip(validation, validation_label))
    training_dict = dict(zip(training, training_label))
    testing_dict_real = dict(zip(testWAVsREAL, testWAVREALlabels))

    trainInfo = {'files': training, 'labels': training_dict}
    valInfo = {'files': validation, 'labels': validation_dict}
    testInfo = {'files': testing, 'labels': testing_dict}
    testREALInfo = {'files': testWAVsREAL, 'labels': testing_dict_real}
    gscInfo = {
        'train': trainInfo,
        'test': testInfo,
        'val': valInfo,
        'testREAL': testREALInfo
    }

    print('Done preparing Google Speech commands dataset version {}'.format(
        version))

    return gscInfo, numGSCmdV2Categs