def analyze_audio(infile): # Load the audio (y, sr) = librosa.load(infile, target_sr=TARGET_SR) # Generate a mel spectrogram S = librosa.melspectrogram(y, sr, window_length=FFT_WINDOW, hop_length=HOP_SIZE, mel_channels=MEL_BINS) # Generate per-band onsets onsets = numpy.empty( (S.shape[0]+1, S.shape[1]-1) ) for i in range(MEL_BINS): onsets[i,:] = librosa.beat.onset_strength(y, sr, window_length=FFT_WINDOW, hop_length=HOP_SIZE, S=S[i:(i+1),:]) pass # Generate the global onset profile onsets[-1,:] = librosa.beat.onset_strength(y, sr, window_length=FFT_WINDOW, hop_length=HOP_SIZE, S=S) # Per-band onset correlation P = 0.0 for t in xrange(0, onsets.shape[1] - BEAT_WINDOW - BEAT_CLEAR): P = P + numpy.dot(numpy.diag(onsets[:, t]), onsets[:, (t+BEAT_CLEAR):(t+BEAT_WINDOW+BEAT_CLEAR)]) pass return P
def __call__(self, y) -> np.ndarray: melspec = librosa.melspectrogram( y, sr=self.sr, n_mels=self.n_mels, fmin=self.fmin, fmax=self.fmax, **self.kwargs, ) melspec = librosa.power_to_db(melspec).astype(np.float32) return melspec
def audio_onset_correlation(infile): # Load the audio (y, sr) = librosa.load(infile, target_sr=PARAMETERS['TARGET_SR']) # Generate a mel spectrogram S = librosa.melspectrogram(y, sr, window_length=PARAMETERS['FFT_WINDOW'], hop_length=PARAMETERS['HOP_SIZE'], mel_channels=PARAMETERS['MEL_BINS']) # Generate per-band onsets onsets = numpy.empty( (S.shape[0], S.shape[1]-1) ) for i in range(PARAMETERS['MEL_BINS']): onsets[i,:] = librosa.beat.onset_strength(y, sr, window_length=PARAMETERS['FFT_WINDOW'], hop_length=PARAMETERS['HOP_SIZE'], S=S[i:(i+1),:]) pass # Per-band onset correlation P = 0.0 for t in xrange(0, onsets.shape[1] - PARAMETERS['BEAT_WINDOW'] - PARAMETERS['BEAT_CLEAR']): P = P + numpy.dot(numpy.diag(onsets[:, t]), onsets[:, (t+PARAMETERS['BEAT_CLEAR']):(t+PARAMETERS['BEAT_WINDOW']+PARAMETERS['BEAT_CLEAR'])]) pass return P
def continuousGridSearch( datasetDirectory, csvName ): ''' test ''' downsamplingFactors = np.array([1]) frameSizes = np.array([1024]) hopSizeScales = np.array([8]) windows = [np.hanning] offsets = np.array([100]) ''' For continuous downsamplingFactors = np.array([1]) frameSizes = np.array([1024, 2048, 4096]) hopSizeScales = np.array([8, 4]) windows = [np.hanning] offsets = np.array([20]) ''' # Get subdirectories for the input folder, corresponding to different MIDI files directories = [os.path.join( datasetDirectory, folder ) for folder in os.listdir(datasetDirectory) if os.path.isdir(os.path.join(datasetDirectory, folder)) and folder[0] is not '.'] # The variations on the MIDI files filenames = [] for shift in xrange( 0, 60, 10 ): filenames += ['0-' + str(shift) + 'ms.wav'] filenames += ['1-' + str(shift) + 'ms.wav'] nFiles = len( filenames ) # Calculate number of tests about to be run nTests = np.product( [len(dimension) for dimension in (directories, downsamplingFactors, frameSizes, hopSizeScales, windows, offsets)] ) print "About to run " + str( nTests ) + " tests." # Keep track of which test is being run testNumber = 0 startTime = time.time() # Store the parameters corresponding to each result gridSearchResults = collections.defaultdict(list) # The data, being manipulated each step of the way audioData = {} audioDataDownsampled = {} spectrograms = {} ODFOutput = {} synchronizationScores = {} # Test to plot histograms allAccuracies = np.zeros( nTests ) # Can we calculate the spectrogram from the previous spectrogram? previousHopSizeScale = 0 for directory in directories: # Read in wav data for each file for file in filenames: audioData[file], fs = librosa.load( os.path.join( directory, file ) ) for downsamplingFactor in downsamplingFactors: for file in filenames: audioDataDownsampled[file] = scipy.signal.decimate( audioData[file], downsamplingFactor ) for frameSize, window, hopSizeScale in itertools.product( frameSizes, windows, hopSizeScales ): if hopSizeScale < previousHopSizeScale and np.mod( previousHopSizeScale, hopSizeScale ) == 0: # Instead of calculating a new spectrogram, just grab the frames newHopRatio = previousHopSizeScale/hopSizeScale for file in filenames: spectrograms[file] = spectrograms[file][:, ::newHopRatio] else: # Calculate spectrograms - should not re-calculate if just the hop size changes. for file in filenames: spectrograms[file] = librosa.melspectrogram( audioDataDownsampled[file], fs, window_length=frameSize, hop_length=frameSize/hopSizeScale, mel_channels=40) previousHopSizeScale = hopSizeScale # Get the onset detection function for file in filenames: ODFOutput[file] = librosa.beat.onset_strength( audioDataDownsampled[file], fs, window_length=frameSize, hop_length=frameSize/hopSizeScale, S=spectrograms[file]) for offset in offsets: # Compute the synchronization score for the syncrhonized and unsynchronized files for n in xrange( nFiles/2 ): synchronizationScores[n] = synchronizationScore.getScore( ODFOutput[filenames[2*n]], ODFOutput[filenames[2*n + 1]], offset=offset ) # Add in the ratio of the scores, we will take the per-MIDI-file-average later. print "{} -> {}, {:.3f}% done in {:.3f} minutes".format( (directory, downsamplingFactor, frameSize, hopSizeScale, window.__name__, offset), synchronizationScores.values(), (100.0*testNumber)/nTests, (time.time() - startTime)/60.0) testNumber += 1 gridSearchResults[(downsamplingFactor, frameSize, hopSizeScale, window.__name__, offset)] += [np.array(synchronizationScores.values())] # Write out CSV results csvWriter = csv.writer( open( csvName, 'wb' ) ) for parameters, results in gridSearchResults.items(): resultArray = np.array( results ) resultArray = (resultArray.T/np.max( resultArray, axis=1 )).T csvWriter.writerow( list( parameters ) + list(np.mean( resultArray, axis=0 )) + [np.mean( np.diff( resultArray ), axis=0 )] + [np.mean( np.diff( resultArray ) )] + [np.mean( np.sum( np.diff( resultArray ) < 0, axis=1 ) )/(0.5*nFiles - 1)] )