Esempio n. 1
0
from matplotlib.animation import FuncAnimation
import librosa
import dataLoader
import audioProcessing
import config_

sr = 16000

loader = dataLoader.DCASE2dataPrep(SR=sr, dev=True, test=False)
print(len(loader.trainData))
print(len(loader.holdoutData))
minTimeStretch, maxTimeStretch, stepsTimeStretch, minPitchShift, maxPitchShift, stepsPitchShift, minLevelAdjust, maxLevelAdjust, stepsLevelAdjust = config_.getAugmentConfig(
    config=5)
print('Augment')
aug = audioProcessing.augment(loader.trainData, sr, minTimeStretch,
                              maxTimeStretch, stepsTimeStretch, minPitchShift,
                              maxPitchShift, stepsPitchShift, minLevelAdjust,
                              maxLevelAdjust, stepsLevelAdjust)
nBin = 99
print('Prepare Train Set')
trainSet = loader.prepareTrainSet(aug,
                                  deriv=False,
                                  frameEnergy=False,
                                  N_FFT=2048,
                                  hop=512,
                                  n_mels=nBin,
                                  lengthScene=10,
                                  eventsPerScene=5,
                                  lengthExample=10,
                                  hopExample=10,
                                  asFeatureVector=False,
                                  shuffle=False)
Esempio n. 2
0
def main():
    randGen = np.random.RandomState(1337)
    baseFolder = 'ESC/ESC-US/'

    signalRate = 30000
    lengthScene = 10
    eventsPerScene = 5
    numberMelBins = 80

    nfft = 2048
    hop = 512
    layer = 2
    extractionLayer = 5
    hidden = 80
    lr = 0.002
    sigma = 0.25
    log_power = True
    deriv = False
    frameEnergy = False

    batchSize = 128
    epochs = 100000

    asFeatureVector = False
    temporalCompression = True

    loader = dataLoader.DCASE2dataPrep(SR=signalRate, dev=False, test=False)
    trainData = loader.trainData

    #TODO: also get the augmented trainData
    print('augment train data')
    augmentedTrainData = audioProcessing.augment(trainData, signalRate, 0.9,
                                                 1.1, 2, -1, 1, 2, 0, 3, 4)
    lengthExample = 11
    hopExample = 11
    #to compute a mean and std deviation for normalization
    print('prepare train set')
    trainSet = loader.prepareTrainSet(augmentedTrainData,
                                      lengthScene=lengthScene,
                                      lengthExample=lengthExample,
                                      hopExample=hopExample,
                                      eventsPerScene=eventsPerScene,
                                      N_FFT=nfft,
                                      hop=hop,
                                      randSeed=1337,
                                      log_power=log_power,
                                      deriv=deriv,
                                      frameEnergy=frameEnergy,
                                      asFeatureVector=asFeatureVector,
                                      temporalCompression=temporalCompression,
                                      n_mels=numberMelBins,
                                      predDelay=0)
    val = trainSet[0][0:10000]

    zNorm = (loader.trainMean, loader.trainStd)
    fullStats = (loader.trainMean, loader.trainStd, loader.trainMin,
                 loader.trainMax)
    modelName = 'ESC_PreTrainNew6'
    #1 normal full
    #2 lower lr
    #3 no "noise" classes, Insects, rain, sea waves, crackling fire, crickets,
    #chirping birds, water drops, wind, pouring water, toilet flush, thunderstorm, clapping,
    #breathing, brushing teeth, drinking sipping, washing machine, vacuum cleaner,
    #helicopter, chainsaw, engine, train, airplane, fireworks, hand saw
    #4 no "noise" classes, also remove choughing, laughing, door knock, keyboard typing
    #5 new training variant with loading full file and chopping subsets
    #6 0.25 sigma dropout on input
    modelsFolder = config_.modelsFolder
    logFolder = config_.logFolder

    statsFileName = modelsFolder + 'ST_' + modelName
    print('Saving stats', statsFileName)
    np.savez(statsFileName,
             mean=fullStats[0],
             std=fullStats[1],
             minimum=fullStats[2],
             maximum=fullStats[3])

    lengthExample = 11
    print('Create Generator')
    generator = genBatchPreLoadWhole(baseFolder, batchSize, signalRate,
                                     lengthExample, asFeatureVector,
                                     temporalCompression, nfft, hop, log_power,
                                     deriv, frameEnergy, numberMelBins, zNorm)
    print('Create initial fill')
    testExample = next(generator)

    #dataX = loadAll(baseFolder,batchSize,signalRate,lengthExample,asFeatureVector,
    #                     temporalCompression,nfft,hop,log_power,deriv,frameEnergy,numberMelBins,zNorm)
    #testExample = dataX[0:1]
    #print(dataX.shape)

    inDim = testExample[0].shape  #(1,lengthExample* (numberMelBins+1))
    outDim = inDim
    #model = modelLoader.createFeedForward_AE(layer,hidden,inDim,outDim,lr,sigma)
    model = modelLoader.createSeqToSeq(layer, hidden, inDim, outDim, lr, sigma)

    saveCallback = ModelCheckpoint(modelsFolder + modelName,
                                   monitor='val_loss',
                                   save_best_only=True)
    breakCallback = TerminateOnNaN()
    lrCallback = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=100)
    logCallback = CSVLogger(logFolder + modelName, append=True)
    history = model.fit_generator(
        generator,
        steps_per_epoch=200,
        epochs=epochs,
        verbose=2,
        max_queue_size=30,
        callbacks=[saveCallback, breakCallback, lrCallback, logCallback],
        validation_data=(val, val),
        workers=0)
    #history = model.fit(x=dataX,y=dataX,batch_size=batchSize,epochs=epochs,verbose=2,callbacks=[saveCallback,breakCallback,lrCallback,logCallback],validation_data=(val,val))
    exit(0)
Esempio n. 3
0
    valData = reducedValData
    loader.devData = valData
    #"""
    
    #Augmentation
    signal_rate = loader.rate
    minTimeStretch = 0.9
    maxTimeStretch = 1.1
    stepsTimeStretch = 0
    minPitchShift = -1
    maxPitchShift = 1.1
    stepsPitchShift = 0
    minLevelAdjust = 0.0
    maxLevelAdjust = 2.0
    stepsLevelAdjust = 3
    augmentedTrainData = audioProcessing.augment(trainData,signal_rate,minTimeStretch,maxTimeStretch,stepsTimeStretch,minPitchShift,maxPitchShift,stepsPitchShift,minLevelAdjust,maxLevelAdjust,stepsLevelAdjust)
    
    lengthScene = 15
    eventsPerScene = 5
    numberMelBins = 81

    nfft = 2048
    hop = 512
    lengthExample=40 #40 should now be roughly a second input
    hopExample=20
    layer = 2
    extractionLayer = 4
    hidden = 80
    lr = 0.002
    sigma = 0.0
    asFeatureVector=False
 lengthExample = 1
 hopExample = 1
 asFeatureVector = True
 #TODO: augment data
 data = loader.trainData
 minTime = 0.9
 maxTime = 1.1
 stepsTime = 0
 minPitch = 0.9
 maxPitch = 1.1
 stepsPitch = 0
 minLevel = 0.0
 maxLevel = 2.0
 stepsLevel = 3
 augmentedData = audioProcessing.augment(data, loader.rate, minTime,
                                         maxTime, stepsTime, minPitch,
                                         maxPitch, stepsPitch, minLevel,
                                         maxLevel, stepsLevel)
 loader.estimateStatistics(augmentedData,
                           20,
                           deriv=False,
                           frameEnergy=False,
                           n_mels=25)
 datasplits = loader.splitDataByLabel(augmentedData, loader.labels)
 genExample = loader.genTrainSetExample(datasplits[0],
                                        lengthExample=lengthExample,
                                        hopExample=hopExample,
                                        asFeatureVector=asFeatureVector,
                                        deriv=False,
                                        frameEnergy=False,
                                        n_mels=25,
                                        N_FFT=2048,

if __name__ == "__main__":
    loader = DCASE2dataPrep(SR=30000)
    normalTrainData = loader.trainData
    signal_rate = loader.rate
    minTimeStretch = 0.9
    maxTimeStretch = 1.1
    stepsTimeStretch = 2
    minPitchShift = -2.
    maxPitchShift = 2.
    stepsPitchShift = 2
    minLevelAdjust = 0.0
    maxLevelAdjust = 2.0
    stepsLevelAdjust = 3
    augmentedTrainData = augment(normalTrainData,signal_rate,minTimeStretch,maxTimeStretch,stepsTimeStretch,minPitchShift,maxPitchShift,stepsPitchShift,minLevelAdjust,maxLevelAdjust,stepsLevelAdjust)
    
    lengthScene = 10
    eventsPerScene = 5
    numberMelBins = 80
    nfft=2048
    hop=512
    lenExample = 10
    hopExample = 10
    trainSet = loader.prepareTrainSet(augmentedTrainData,lengthScene=lengthScene,lengthExample=lenExample,hopExample=hopExample,
                                      eventsPerScene=eventsPerScene,N_FFT=nfft,hop=hop,randSeed=1337,
                                      log_power=True,deriv=False,frameEnergy=False,asFeatureVector=False,temporalCompression=True,
                                      n_mels=numberMelBins,predDelay=0,zNorm=True)
    validationSet = loader.prepareValSet(lengthScene=lengthScene,lengthExample=lenExample,hopExample=hopExample,eventsPerScene=eventsPerScene,
                                         N_FFT=nfft,hop=hop,randSeed=1337,log_power=True,asFeatureVector=False,temporalCompression=True,
                                         deriv=False,frameEnergy=False,n_mels=numberMelBins,