Exemple #1
0
def createMelSpectrogram(input_path, fileName, output_path, saveOrShow=0):
    
    # load sound signal
    signal, sr = librosa.load(os.path.join(input_path, fileName), duration=10, sr=16000)
    
    #signal = filter_signal(signal, sr, target_audio_length)
    
    # create Mel Spectrogram
    S = Melspectrogram(n_dft=1024, 
                       n_hop=320,
                       #n_hop=256,
                       input_shape=(1, signal.shape[0]),
                       padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                       power_melgram=2.0, return_decibel_melgram=True,
                       trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy()
    
    S = S.reshape(S.shape[1], S.shape[2])
    
    print(S.shape)
    
    if saveOrShow == 0:   
        matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + ".png"), S, cmap='inferno')
    else:
        #plt.imshow(S)
        #plt.show()
        display.specshow(S, sr=sr)
        plt.show()
Exemple #2
0
def createMelSpectrogramNew(input_path, fileName, output_path_train, output_path_val):
    
    # load sound signal
    signal, sr = librosa.load(os.path.join(input_path, fileName), sr=16000, mono=True)
    
    abs_signal = [np.abs(s) for s in signal]
    rolling_5s_abs_signal = [sum(abs_signal[i*5*sr:(i*5+5)*sr]) for i in range(int(len(abs_signal) // sr // 5))]
    
    if len(signal) <= sr * 5:
        
        # add 0 padding
        signal = list(signal) + [0 for i in range(sr*5 - len(signal))]
        signal = np.array(signal, dtype=np.float32)
        
        # draw random number
        rand = np.random.randint(0, 10)
        if rand <= 7:
            output_path = output_path_train
            toAug = 1
        else:
            output_path = output_path_val
            toAug = 0
        
        if toAug == 0:
            # normal mel spectrogram
            S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]),
                           padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                           power_melgram=2.0, return_decibel_melgram=True,
                           trainable_fb=False, trainable_kernel=False)(signal.reshape(1, 1, -1)).numpy()
        
            S = S.reshape(S.shape[1], S.shape[2])
            
            matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + ".png"), S, cmap='inferno')
        
        else:
            # augmentation
            mySignal = augmenter(signal, sr)
            S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]),
                               padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                               power_melgram=2.0, return_decibel_melgram=True,
                               trainable_fb=False, trainable_kernel=False)(mySignal.reshape(1, 1, -1)).numpy()
            S = S.reshape(S.shape[1], S.shape[2])
            
            matplotlib.image.imsave(os.path.join(output_path, fileName.split(".")[0] + "_noise.png"), 
                                    S, cmap='inferno')
        
    else:
        
        q_signal = np.quantile(rolling_5s_abs_signal, 0.75)
        
        count = 0
        
        numSamples = int((len(signal) // sr) // 5)
        
        for i in range(numSamples):
            tmpSignal = signal[int(i*5)*sr:int((i*5+5))*sr]
            
            # cut out region is highest intensity
            #window = 224 * 256
            #interval = int((sr * 5 - window) // 10)
            #intensity = [sum(tmpSignal[i*interval:i*interval+window]) for i in range(10)]
            #idx = intensity.index(max(intensity))
            #tmpSignal = tmpSignal[idx*interval:idx*interval+window]
            
            #mask = [1 if np.abs(s) > median_signal else 0 for s in tmpSignal]
                
            if sum([abs(j) for j in tmpSignal]) >= q_signal:
                
                # draw random number
                rand = np.random.randint(0, 10)
                if rand <= 7:
                    output_path = output_path_train
                    toAug = 1
                else:
                    output_path = output_path_val
                    toAug = 0
                    
                if toAug == 0:
                    S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]),
                                   padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                                   power_melgram=2.0, return_decibel_melgram=True,
                                   trainable_fb=False, 
                                   trainable_kernel=False)(tmpSignal.reshape(1, 1, -1)).numpy()
            
                    S = S.reshape(S.shape[1], S.shape[2])
                    matplotlib.image.imsave(os.path.join(output_path, 
                                                     fileName.split(".")[0] + "_" + str(count) + ".png"), 
                                        S, cmap='inferno')
                else:
                    # augmentation
                    mySignal = augmenter(tmpSignal, sr)
                    S = Melspectrogram(n_dft=1024, n_hop=320, input_shape=(1, signal.shape[0]),
                                   padding='same', sr=sr, n_mels=224, fmin=1400, fmax=sr/2,
                                   power_melgram=2.0, return_decibel_melgram=True,
                                   trainable_fb=False, 
                                   trainable_kernel=False)(mySignal.reshape(1, 1, -1)).numpy()
                
                    S = S.reshape(S.shape[1], S.shape[2])
            
                    matplotlib.image.imsave(os.path.join(output_path, 
                                             fileName.split(".")[0] + "_" + str(count) + "_noise.png"), 
                                        S, cmap='inferno')
            
            count += 1
 d.extend(data_raw)
 frames = (np.hstack(d)).flatten()
 melspec = Melspectrogram(n_dft=1024,
                          n_hop=256,
                          input_shape=(1, frames.shape[0]),
                          padding='same',
                          sr=sr,
                          n_mels=224,
                          fmin=1400,
                          fmax=sr / 2,
                          power_melgram=2.0,
                          return_decibel_melgram=True,
                          trainable_fb=False,
                          trainable_kernel=False)(
                              frames.reshape(1, 1, -1)).numpy()
 melspec = melspec.reshape(melspec.shape[1], melspec.shape[2])
 print(
     f"Frames array: {frames.shape}, Melspec array: {melspec.shape}"
 )
 melplot = display.specshow(melspec, sr=sr)
 melplot.set_frame_on(False)
 plt.tight_layout(pad=0)
 plt.draw()
 plt.pause(0.0001)
 plt.clf()
 if (melspec.shape[1] >= IM_SIZE[0]):
     img = Image.frombuffer("RGBA",
                            fig.canvas.get_width_height(),
                            fig.canvas.buffer_rgba(), "raw",
                            "RGBA", 0, 1)
     img = img.convert('RGB').resize(IM_SIZE[0:2])