def play_melody(filename): y, sr = librosa.load(filename) y, _ = librosa.effects.trim(y) p = librosa.estimate_tuning(y=y, sr=sr, bins_per_octave=1) y = librosa.effects.pitch_shift(y, sr, -1 * p, bins_per_octave=1) melody = melody_generator(random.random()) beat_length = 0.75 sample_len = librosa.get_duration(y, sr) while True: for i, j in melody: if i is None: time.sleep(beat_length * j) else: target_len = beat_length * j sample = librosa.effects.time_stretch( y, 1 / (sample_len * target_len)) sample = librosa.effects.pitch_shift(sample, sr=sr, n_steps=i, bins_per_octave=12) sd.play(sample, blocking=True)
def callback(in_data, frame_count, time_info, flag): audio_data = np.fromstring(in_data, dtype=np.int16) audio_data = librosa.resample(audio_data.astype('float32'), 44100, 11025) tuning = librosa.estimate_tuning(y=audio_data, sr=11025) X = np.abs( librosa.core.cqt(audio_data, sr=11025, n_bins=84, bins_per_octave=12, tuning=tuning, window='hamming', norm=2)).T with torch.no_grad(): global prev_chord if torch.cuda.is_available(): X = torch.tensor(X).cuda() else: X = torch.tensor(X) X = X.unsqueeze(0) pred = model(X) y = pred.topk(1, dim=2)[1].squeeze().view(-1) from collections import Counter counter = Counter(ind_to_chord_names(y, category)) current_chord = counter.most_common(1)[0][0] if prev_chord != current_chord: print(current_chord) prev_chord = current_chord return in_data, pyaudio.paContinue
def loop(self): try: data = self.Q.get(block=False) except: pass else: t = time.time() signal = np.fromstring(data, dtype=np.float32) pitch = self._pitch_o(signal)[0] confidence = self._pitch_o.get_confidence() #print(pitch) if confidence > 0.9: signal = np.fromstring(data, dtype=np.float32).astype(np.uint16) tuning = librosa.estimate_tuning(y=signal, sr=self.sampleRate) self.pitch = pitch - tuning self.confidence = confidence self.pitches.append((self.pitch, self.confidence, t)) else: self.pitch = 0 self.confidence = 0 time.sleep(0.00001)
def getChromagramSTFT(audioSegment, sampleRate, FFTLength, hopLength, tuning=True): """ Description: This function calculates short time chromagram using an STFT of FFTLength every hopLength samples Arguments: audioSegment: a length of audio read into an array (For example : use audioSegment = librosa.load(filename.wav)) sampleRate: sample rate of the audio file hopLength: determines time resolution of the short time chromagram. Corresponds to number of samples captured in each frame tuning (bool): If true, it calculates a tuning estimate to adjust chromagram with (check librosa.estimate_tuning()) Returns: short time chromagram: 12 x N dimensional (N = number of frames of size HopLength in audioSegment) """ if tuning: tuningDiff = librosa.estimate_tuning(y=audioSegment, sr=sampleRate, resolution=1e-2) else: tuningDiff = 0 chroma_stft = librosa.feature.chroma_stft(audioSegment, sr=sampleRate, S=None, n_fft=FFTLength, hop_length=hopLength, norm=np.inf, tuning=tuningDiff) return chroma_stft
def calculate_tuning_and_tones(self, y_harmonic, sr): start = time.time() self.tuning = librosa.estimate_tuning(y=y_harmonic, sr=sr).tolist() debug_print('tuning',time.time()-start) start = time.time() chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, tuning=self.tuning) self.pitchMeanEnergies = [] for pitch in chroma: self.pitchMeanEnergies += [sum(pitch)/len(pitch)] debug_print('tones',time.time()-start)
def get_features(self, sr): """ calculates tempo and pitch using librosa documentation https://librosa.github.io/librosa/ """ timeseries = buf_to_float(self.bytes) pitch = estimate_tuning(timeseries, sr) # onset_env = onset_strength(timeseries, sr) # temp = tempo(onset_env, sr)[0] return([pitch])
def getData(filename): print("Gretting data for {}".format(filename)) hop_length = 256; # Load the example clip y, sr = librosa.load(filename) # Short-time Fourier transform (STFT) S = np.abs(librosa.stft(y)) # Separate harmonics and percussives into two waveforms y_harmonic, y_percussive = librosa.effects.hpss(y) # Beat track on the percussive signal tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr) # Compute MFCC features from the raw signal mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) # And the first-order differences (delta features) mfcc_delta = librosa.feature.delta(mfcc) # Stack and synchronize between beat events # This time, we'll use the mean value (default) instead of median beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames) # Compute chroma features from the harmonic signal chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) # Aggregate chroma features between beat events # We'll use the median value of each feature between beat frames beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median) # Finally, stack all beat-synchronous features together beat_features = np.vstack([beat_chroma, beat_mfcc_delta]) # Average the energy avgEnergy = np.mean(librosa.feature.rmse(y=y)) # Estimate tuning tuning = librosa.estimate_tuning(y=y, sr=sr) zeroCrossings = np.sum(librosa.core.zero_crossings(y=y)) avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr)) avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr)) raw = [avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc), np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma), np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo] norm = [(float(i)-min(raw))/((max(raw)-min(raw))) for i in raw] # normalise numbers between -1 and 1 return raw
def load_and_gen_obj(self): print("Loading song.") self.librosa_rep, self.samp_rate = lbr.load(self.filename) # ".T" gives the transposed version of the NumPy array self.spectro = lbr.feature.melspectrogram(self.librosa_rep, self.samp_rate, **MEL_KWARGS).T self.duration = lbr.get_duration(self.librosa_rep, self.samp_rate) self.onset_env = lbr.onset.onset_strength(self.librosa_rep, self.samp_rate) self.tempo = lbr.beat.tempo(self.onset_env, self.samp_rate) self.tuning = lbr.estimate_tuning(self.librosa_rep, self.samp_rate) #self.mel_freq = lbr.mel_frequencies(40) print("Features and spectrogram extracted.")
def estimate_tuning(input_file): '''Load an audio file and estimate tuning (in cents)''' print('Loading ', input_file) y, sr = librosa.load(input_file) print('Separating harmonic component ... ') y_harm = librosa.effects.harmonic(y) print('Estimating tuning ... ') # Just track the pitches associated with high magnitude tuning = librosa.estimate_tuning(y=y_harm, sr=sr) print('{:+0.2f} cents'.format(100 * tuning))
def getData(filename): print("Gretting data for{}".format(filename)) y, sr = librosa.load(filename) # load song S = np.abs(librosa.stft(y)) avgEnergy = np.mean(librosa.feature.rmse(y=y)) tuning = librosa.estimate_tuning(y=y, sr=sr) tempo = librosa.beat.estimate_tempo(librosa.onset.onset_strength(y, sr=sr), sr=sr) # zeroCrossings = np.sum(librosa.core.zero_crossings(y=y)) avgChroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr)) avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr)) avgMFCC = np.mean(librosa.feature.mfcc(y=y, sr=sr)) avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr)) raw = [avgEnergy, tuning, tempo, avgChroma, avgMelSpectro, avgMFCC, avgSpectralContrast] norm = [float(i)/sum(raw) for i in raw] # normalise numbers between -1 and 1 return np.array([norm])
def add_tuning_tempo(y, sr, label_id, features, labels): """ Input y: song data sr: sameple rate label_id: label(genre) id features: array of ffts labels: array of labels Description extracts tuning tempo from y and appends it to features. """ tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) tuning = librosa.estimate_tuning(y=y, sr=sr) features.append((tuning,tempo)) labels.append(label_id)
def extract_feature(X, sample_rate): stft = np.abs(librosa.stft(X)) mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0) tuning = librosa.estimate_tuning(y=X, sr=sample_rate) mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0) contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0) tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0) return mfccs, chroma, mel, contrast, tonnetz, tuning
def __test(target_hz, resolution, bins_per_octave, tuning): y = np.sin(2 * np.pi * target_hz * t) tuning_est = librosa.estimate_tuning(resolution=resolution, bins_per_octave=bins_per_octave, y=y, sr=sr, n_fft=2048, fmin=librosa.note_to_hz('C4'), fmax=librosa.note_to_hz('G#9')) # Round to the proper number of decimals deviation = np.around(np.abs(tuning - tuning_est), int(-np.log10(resolution))) # We'll accept an answer within three bins of the resolution assert deviation <= 3 * resolution
def adjust_tuning(input_file, output_file): '''Load audio, estimate tuning, apply pitch correction, and save.''' print('Loading ', input_file) y, sr = librosa.load(input_file) print('Separating harmonic component ... ') y_harm = librosa.effects.harmonic(y) print('Estimating tuning ... ') # Just track the pitches associated with high magnitude tuning = librosa.estimate_tuning(y=y_harm, sr=sr) print('{:+0.2f} cents'.format(100 * tuning)) print('Applying pitch-correction of {:+0.2f} cents'.format(-100 * tuning)) y_tuned = librosa.effects.pitch_shift(y, sr, -tuning) print('Saving tuned audio to: ', output_file) librosa.output.write_wav(output_file, y_tuned, sr)
def adjust_tuning(input_file, output_file): '''Load audio, estimate tuning, apply pitch correction, and save.''' print('Loading ', input_file) y, sr = librosa.load(input_file) print('Separating harmonic component ... ') y_harm = librosa.effects.harmonic(y) print('Estimating tuning ... ') # Just track the pitches associated with high magnitude tuning = librosa.estimate_tuning(y=y_harm, sr=sr) print('{:+0.2f} cents'.format(100 * tuning)) print('Applying pitch-correction of {:+0.2f} cents'.format(-100 * tuning)) y_tuned = librosa.effects.pitch_shift(y, sr, -tuning) print('Saving tuned audio to: ', output_file) sf.write(output_file, y_tuned, sr)
def __test(target_hz, resolution, bins_per_octave, tuning): y = np.sin(2 * np.pi * target_hz * t) tuning_est = librosa.estimate_tuning(resolution=resolution, bins_per_octave=bins_per_octave, y=y, sr=sr, n_fft=2048, fmin=librosa.note_to_hz('C4'), fmax=librosa.note_to_hz('G#9')) # Round to the proper number of decimals deviation = np.around(tuning - tuning_est, int(-np.log10(resolution))) # Take the minimum floating point for positive and negative deviations max_dev = np.min([np.mod(deviation, 1.0), np.mod(-deviation, 1.0)]) # We'll accept an answer within three bins of the resolution assert max_dev <= 3 * resolution
def beat_track(input_file): # === LOADING === print('Loading ', input_file) y, sr = librosa.load(input_file, sr=22050) y_harmonic, y_percussive = librosa.effects.hpss(y) # Use a default hop size of 512 samples @ 22KHz ~= 23ms hop_length = 512 # This is the window length used by default in stft print('Tracking beats') tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr, hop_length=hop_length) print('Estimated tempo: {:0.2f} beats per minute'.format(tempo)) # === TEMPO === # 'beats' will contain the frame numbers of beat events. beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=hop_length) print(tempo) print(beats) print(beat_times) # === TUNING === print('Estimating tuning ... ') # Just track the pitches associated with high magnitude tuning = librosa.estimate_tuning(y=y_harmonic, sr=sr) print(tuning) print('{:+0.2f} cents'.format(100 * tuning)) # === NOTES === chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, tuning=tuning) #My first code! pitchesMeanEnergy = [] for pitch in chroma: pitchesMeanEnergy += [sum(pitch) / len(pitch)] print(pitchesMeanEnergy)
def preprocess_librosa(audiopath, feparam, n_bins=84, bins_per_octave=12, mod_steps=(0, )): x, sr = librosa.load(audiopath, feparam['fs'], mono=feparam['stereo_to_mono']) Xs = [] tuning = librosa.estimate_tuning(y=x, sr=sr) for mod_step in mod_steps: X_pitched = librosa.effects.pitch_shift(x, sr, n_steps=mod_step) X = np.abs( librosa.core.cqt(X_pitched, sr=sr, n_bins=n_bins, bins_per_octave=bins_per_octave, tuning=tuning, window='hamming', norm=2)) Xs.append(X.T) return Xs
def getChromagramCQT(audioSegment, sampleRate, hopLength, tuning=False): """ Description: This function calculates short time chromagram using a constant Q transform. Arguments: audioSegment: a length of audio read into an array (For example : use audioSegment = librosa.load(filename.wav)) sampleRate: sample rate of the audio file hopLength: determines time resolution of the short time chromagram. Corresponds to number of samples captured in each frame tuning (bool): If true, it calculates a tuning estimate to adjust chromagram with (check librosa.estimate_tuning()) Returns: short time chromagram: 12 x N dimensional (N = number of frames of size HopLength in audioSegment) """ if tuning: tuningDiff = librosa.estimate_tuning(y=audioSegment, sr=sampleRate, resolution=1e-2) else: tuningDiff = 0 chroma_cq = librosa.feature.chroma_cqt(audioSegment, sr=sampleRate, C=None, hop_length=hopLength, fmin=None, norm=np.inf, threshold=0.0, tuning=tuningDiff, n_chroma=12, n_octaves=7, window=None, bins_per_octave=36, cqt_mode='full') return chroma_cq
def extract_feature(X, sample_rate): stft = np.abs(librosa.stft(X)) tuning = librosa.estimate_tuning(y=X, sr=sample_rate) mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) return mel, contrast, tuning
def tunning(signal, fs, **kwargs): "It estimates *signal*'s tuning offset (in fractions of a bin) relative to A440=440.0Hz." return librosa.estimate_tuning(y=signal, sr=fs)
def create_dataset(input_path, output_path): print("Creating h5 from {} to file {}".format(input_path, output_path)) feature_vector_dim = 35 labels = os.listdir(input_path) data_matrix = np.empty((len(labels) * 100, feature_vector_dim)) data_labels = np.chararray((len(labels) * 100, 1), itemsize=10) index = 0 for l, label in enumerate(labels): print("Data for {}".format(label)) instrument_dir = os.path.join(input_path, label) files = os.listdir(instrument_dir) skipped = 0 # Read files for each genre for i, track in enumerate(files): print(" {} of {}".format(i + 1, len(files)), end="\r") sys.stdout.flush() try: y, sr = librosa.load(os.path.join(instrument_dir, track)) stft = np.abs(librosa.stft(y)) arr = np.empty(feature_vector_dim) arr[0] = librosa.beat.beat_track(y, sr)[0] arr[1] = librosa.estimate_tuning(y, sr) arr[2:8] = np.mean(librosa.feature.tonnetz( librosa.effects.harmonic(y), sr), axis=1) arr[8:28] = np.mean(librosa.feature.mfcc(y, sr), axis=1) arr[28:] = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sr), axis=1) data_matrix[index] = arr data_labels[index] = label index += 1 except (KeyboardInterrupt, SystemExit): raise except: skipped += 1 print("") if skipped > 0: print(" Skipped {} corrupted files".format(skipped)) # Resize array data_matrix = data_matrix[:index] data_labels = data_labels[:index] # Normalise minis = np.min(data_matrix, axis=0) data_matrix2 = data_matrix + minis minis = np.min(data_matrix2, axis=0) maxis = np.max(data_matrix2, axis=0) diff = maxis - minis diff[diff == 0] = 1 data_matrix = (data_matrix2 - minis) / diff # Write to file out_file = h5py.File(output_path, 'w') for l, label in enumerate(labels): labeled_data = data_matrix[[ d[0] == label.encode('UTF-8') for d in data_labels ]].flatten() out_file.create_dataset(label, data=labeled_data) out_file.create_dataset('vector_size', data=[feature_vector_dim]) out_file.close() print("Done")
def run(inputDirectory, outputDirectory, parameterFileName, spectrumFileName, fileNameAppendix, attackTime, sustainTime, \ centroid_flag, f0normCentroid_flag, rolloff_flag, bandwidth_flag, spread_flag, highLowEnergy_flag, \ tristimulus_flag, inharmonicity_flag, noisiness_flag, oddeven_flag, tuning_flag, crossingRate_flag, \ rms_flag, entropy_flag, temporalCentroid_flag, logAttackTime_flag, decayTime_flag, vectorOutput_flag): # clear output folder if os.path.isdir(outputDirectory): shutil.rmtree(outputDirectory) os.mkdir(outputDirectory) samplingRate = 0 # ----------Setting up variables required for calculation and saving of parameter data--------------- data_array = [] series_names, centroid_values, centroid_deviations, f0NormalizedCentroid_values, f0NormalizedCentroid_deviations, \ rolloff_values, rolloff_deviations, bandwidth_values, bandwidth_deviation, spread_values, spread_deviations, \ highLowEnergy_values, highLowEnegry_deviations, tristimulus1_values, tristimulus1_deviations, \ tristimulus2_values, tristimulus2_deviations, tristimulus3_values, tristimulus3_deviations, \ inharmonicity_values, inharmonicity_deviations, noisiness_values, noisiness_deviations, \ oddEvenRatio_values, oddEvenRatio_deviations, tuning_values, tuning_deviations, \ zeroCrossingRate_values, zeroCrossingRate_deviations, rms_values, rms_deviations, entropy_values, entropy_deviations, \ temporalCentroid_values, temporalCentroid_deviations, logAttackTime_values, logAttackTime_deviations, \ decayTime_values, decayTime_deviations, foundFundumentalPitches = \ [" "], ["Spectrum Centroid"], ["Centroid Deviation"], ["F0 Normalized Centroid"], ["F0 Normalized Centroid Deviations"],\ ["Rolloff"], ["Rolloff Deviation"], ["Bandwidth"], ["Bandwidth Deviation"], ["Spread"], ["Spread Deviation"], \ ["High Energy - Low Energy Ratio"], ["High Energy - Low Energy Ratio Deviations"], ["Tristimulus 1"], ["Tristimulus 1 Deviations"], \ ["Tristimulus 2"], ["Tristimulus 2 Deviations"], ["Tristimulus 3"], ["Tristimulus 3 Deviations"], \ ["Inharmonicity"], ["Inharmonicity Deviation"], ["Noisiness"], ["Noisiness Deviations"], \ ["Odd-Even Ratio"], ["Odd-Even Ratio Deviation"], ["Tuning"], ["Tuning Deviation"], \ ["Zero Crossing Rate"], ["Zero Crossing Rate Deviation"], ["RMS"], ["RMS Deviation"], ["Entropy"], ["Entropy Deviation"], \ ["Temporal Centroid"], ["Temporal Centroid Deviations"], ["Log Attack Time"], ["Log Attack Time Deviations"], \ ["Decay Time"], ["Decay Time Deviation"], ["Average Found Fundumental Pitches"] allAttackSpectrums, allSustainSpectrums, allDecaySpectrums, allAttackFrequencies, allSustainFrequencies, \ allDecayFrequencies, seriesNames = [], [], [], [], [], [], [] # Spectrum scaling factors impulseTime, maxAttack, maxSustain, maxDecay = 0, 0, 0, 0 # ---------------Calculating spectrums and parameters------------------ for seriesDirectory in os.listdir(os.fsencode(inputDirectory)): seriesDirectory = inputDirectory + "/" + os.fsdecode(seriesDirectory) print("Entering folder: " + seriesDirectory) impulses, attackSpectrums, sustainSpectrums, decaySpectrums, centroids, f0normCentroids, rolloffs, bandwidths, \ spreads, highLowEnergies, tristimulus1s, tristimulus2s, tristimulus3s, inharmonicities, noisinesses, \ oddEvenRatios, tunings, crossingRates, rmss, entropies, temporalCentroids, logAttackTimes, decayTimes, \ pitchesHz = [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [] # If harmonic data and normalized centroids make no sense it may be caused by improper fundumental pitch detection. # Check in parameterData.csv whether the fundumentals were properly found. # If not, then manually add the correct pitch below and rerun the offending sounds. fundumentalPitch = 261.63 for impulseFile in os.listdir(os.fsencode(seriesDirectory)): impulseFileName = seriesDirectory + "/" + os.fsdecode(impulseFile) args = Arguments() args.fundumentalPitch = fundumentalPitch # librosa loading print("Loading file: " + impulseFileName) args.impulseLIB, samplingRate = librosa.load(impulseFileName) samplingRate = 44100 #args.impulseLIB = librosa.to_mono(args.impulseLIB) # iracema loading args.impulseIRA = iracema.Audio(impulseFileName) args.impulseFFT = iracema.spectral.fft(args.impulseIRA, window_size=2048, hop_size=1024) args.pitch = iracema.pitch.hps(args.impulseFFT, minf0=50, maxf0=500) args.harmonicsIRA = iracema.harmonics.extract(args.impulseFFT, args.pitch) pitchesHz.append(np.median(args.pitch.data)) if centroid_flag: centroids.append(np.mean(librosa.feature.spectral_centroid(args.impulseLIB))) if f0normCentroid_flag: if fundumentalPitch == 0: f0normCentroids.append((np.mean(librosa.feature.spectral_centroid(args.impulseLIB) / np.median(args.pitch.data)))) else: f0normCentroids.append((np.mean(librosa.feature.spectral_centroid(args.impulseLIB) / fundumentalPitch))) if rolloff_flag: rolloffs.append(np.mean(librosa.feature.spectral_rolloff(args.impulseLIB))) if bandwidth_flag: bandwidths.append(np.mean(librosa.feature.spectral_bandwidth(args.impulseLIB))) if spread_flag: spreads.append(np.mean(iracema.features.spectral_spread(args.impulseFFT).data)) if tuning_flag: tunings.append(np.mean(librosa.estimate_tuning(args.impulseLIB))) if crossingRate_flag: crossingRates.append(np.mean(librosa.feature.zero_crossing_rate(args.impulseLIB))) if rms_flag: rmss.append(pc.CalculateRMS(args)) if entropy_flag: entropies.append(np.mean(iracema.features.spectral_entropy(args.impulseFFT).data)) if temporalCentroid_flag: temporalCentroids.append(pc.CalculateTemporalCentroid(args)) if logAttackTime_flag: logAttackTimes.append(pc.CalculateLogAttackTime(args)) if decayTime_flag: decayTimes.append(pc.CalculateDecayTime(args)) impulses = pc.InsertIntoVstack(args.impulseLIB, impulses) fullFrequencies, fullSpectrums, attackFrequencies, attackSpectrums, sustainFrequencies, sustainSpectrums, \ decayFrequencies, decaySpectrums = pc.CalculateFFTs(impulses, samplingRate, attackTime, sustainTime) if fundumentalPitch == 0: fundumentalPitch = np.median(pitchesHz) foundFundumentalPitches.append(fundumentalPitch) mathHarmFreq = pc.CreateMathematicalHarmonicFrequencyVector(fundumentalPitch, n=20) harmonicData = pc.ExtractHarmonicDataFromSpectrums(fullSpectrums, fullFrequencies, mathHarmFreq, bufforInHZ=20) if noisiness_flag: noisinesses = pc.CalculateNoisiness(fullSpectrums, fullFrequencies, harmonicData) if highLowEnergy_flag: highLowEnergies = pc.CalculateHighEnergyLowEnergyRatio(fullSpectrums, fullFrequencies) if tristimulus_flag: tristimulus1s, tristimulus2s, tristimulus3s = pc.CalculateTristimulus(harmonicData) if inharmonicity_flag: inharmonicities = pc.CalculateInharmonicity(harmonicData) if oddeven_flag: oddEvenRatios = pc.CalculateOERs(harmonicData) # Dividing spectrum data into segments avrAttackSpectrum = pc.CalculateAverageVector(attackSpectrums) avrSustainSpectrum = pc.CalculateAverageVector(sustainSpectrums) avrDecaySpectrum = pc.CalculateAverageVector(decaySpectrums) allAttackSpectrums.append(avrAttackSpectrum) allSustainSpectrums.append(avrSustainSpectrum) allDecaySpectrums.append(avrDecaySpectrum) allAttackFrequencies.append(attackFrequencies) allSustainFrequencies.append(sustainFrequencies) allDecayFrequencies.append(decayFrequencies) seriesNames.append(seriesDirectory) impulseTime = len(impulses[0,:])/samplingRate #maxAttack = max([maxAttack, max(avrAttackSpectrum)]) #maxSustain = max([maxSustain, max(avrSustainSpectrum)]) #maxDecay = max([maxDecay, max(avrDecaySpectrum)]) seriesName = seriesDirectory.replace(inputDirectory + "/", "") series_names.append(seriesName) CalculateStatistics(centroids, centroid_values, centroid_deviations) CalculateStatistics(f0normCentroids, f0NormalizedCentroid_values, f0NormalizedCentroid_deviations) CalculateStatistics(rolloffs, rolloff_values, rolloff_deviations) CalculateStatistics(bandwidths, bandwidth_values, bandwidth_deviation) CalculateStatistics(spreads, spread_values, spread_deviations) CalculateStatistics(highLowEnergies, highLowEnergy_values, highLowEnegry_deviations) CalculateStatistics(tristimulus1s, tristimulus1_values, tristimulus1_deviations) CalculateStatistics(tristimulus2s, tristimulus2_values, tristimulus2_deviations) CalculateStatistics(tristimulus3s, tristimulus3_values, tristimulus3_deviations) CalculateStatistics(inharmonicities, inharmonicity_values, inharmonicity_deviations) CalculateStatistics(noisinesses, noisiness_values, noisiness_deviations) CalculateStatistics(oddEvenRatios, oddEvenRatio_values, oddEvenRatio_deviations) CalculateStatistics(tunings, tuning_values, tuning_deviations) CalculateStatistics(crossingRates, zeroCrossingRate_values, zeroCrossingRate_deviations) CalculateStatistics(rmss, rms_values, rms_deviations) CalculateStatistics(entropies, entropy_values, entropy_deviations) CalculateStatistics(temporalCentroids, temporalCentroid_values, temporalCentroid_deviations) CalculateStatistics(logAttackTimes, logAttackTime_values, logAttackTime_deviations) CalculateStatistics(decayTimes, decayTime_values, decayTime_deviations) # -----------------Saving results------------------- # Saving parameter data into .npy file data_array = series_names if centroid_flag: data_array = np.vstack((data_array, centroid_values, centroid_deviations)) if f0normCentroid_flag: data_array = np.vstack((data_array, f0NormalizedCentroid_values, f0NormalizedCentroid_deviations)) if rolloff_flag: data_array = np.vstack((data_array, rolloff_values, rolloff_deviations)) if bandwidth_flag: data_array = np.vstack((data_array, bandwidth_values, bandwidth_deviation)) if spread_flag: data_array = np.vstack((data_array, spread_values, spread_deviations)) if highLowEnergy_flag: data_array = np.vstack((data_array, highLowEnergy_values, highLowEnegry_deviations)) if tristimulus_flag: data_array = np.vstack((data_array, tristimulus1_values, tristimulus1_deviations)) data_array = np.vstack((data_array, tristimulus2_values, tristimulus2_deviations)) data_array = np.vstack((data_array, tristimulus3_values, tristimulus3_deviations)) if inharmonicity_flag: data_array = np.vstack((data_array, inharmonicity_values, inharmonicity_deviations)) if noisiness_flag: data_array = np.vstack((data_array, noisiness_values, noisiness_deviations)) if oddeven_flag: data_array = np.vstack((data_array, oddEvenRatio_values, oddEvenRatio_deviations)) if tuning_flag: data_array = np.vstack((data_array, tuning_values, tuning_deviations)) if crossingRate_flag: data_array = np.vstack((data_array, zeroCrossingRate_values, zeroCrossingRate_deviations)) if rms_flag: data_array = np.vstack((data_array, rms_values, rms_deviations)) if entropy_flag: data_array = np.vstack((data_array, entropy_values, entropy_deviations)) if temporalCentroid_flag: data_array = np.vstack((data_array, temporalCentroid_values, temporalCentroid_deviations)) if logAttackTime_flag: data_array = np.vstack((data_array, logAttackTime_values, logAttackTime_deviations)) if decayTime_flag: data_array = np.vstack((data_array, decayTime_values, decayTime_deviations)) np.save(outputDirectory + '/' + parameterFileName + '_' + fileNameAppendix + '.npy', data_array) # Saving data into .csv file with open(outputDirectory + '/' + parameterFileName + '_' + fileNameAppendix + '.csv', 'w', newline='') as csvfile: dataWriter = csv.writer(csvfile, delimiter=',', quotechar=';', quoting=csv.QUOTE_MINIMAL) dataWriter.writerow(series_names) if centroid_flag: dataWriter.writerow(centroid_values) dataWriter.writerow(centroid_deviations) if f0normCentroid_flag: dataWriter.writerow(f0NormalizedCentroid_values) dataWriter.writerow(f0NormalizedCentroid_deviations) if rolloff_flag: dataWriter.writerow(rolloff_values) dataWriter.writerow(rolloff_deviations) if bandwidth_flag: dataWriter.writerow(bandwidth_values) dataWriter.writerow(bandwidth_deviation) if spread_flag: dataWriter.writerow(spread_values) dataWriter.writerow(spread_deviations) if highLowEnergy_flag: dataWriter.writerow(highLowEnergy_values) dataWriter.writerow(highLowEnegry_deviations) if tristimulus_flag: dataWriter.writerow(tristimulus1_values) dataWriter.writerow(tristimulus1_deviations) dataWriter.writerow(tristimulus2_values) dataWriter.writerow(tristimulus2_deviations) dataWriter.writerow(tristimulus3_values) dataWriter.writerow(tristimulus3_deviations) if inharmonicity_flag: dataWriter.writerow(inharmonicity_values) dataWriter.writerow(inharmonicity_deviations) if noisiness_flag: dataWriter.writerow(noisiness_values) dataWriter.writerow(noisiness_deviations) if oddeven_flag: dataWriter.writerow(oddEvenRatio_values) dataWriter.writerow(oddEvenRatio_deviations) if tuning_flag: dataWriter.writerow(tuning_values) dataWriter.writerow(tuning_deviations) if crossingRate_flag: dataWriter.writerow(zeroCrossingRate_values) dataWriter.writerow(zeroCrossingRate_deviations) if rms_flag: dataWriter.writerow(rms_values) dataWriter.writerow(rms_deviations) if entropy_flag: dataWriter.writerow(entropy_values) dataWriter.writerow(entropy_deviations) if temporalCentroid_flag: dataWriter.writerow(temporalCentroid_values) dataWriter.writerow((temporalCentroid_deviations)) if logAttackTime_flag: dataWriter.writerow(logAttackTime_values) dataWriter.writerow(logAttackTime_deviations) if decayTime_flag: dataWriter.writerow(decayTime_values) dataWriter.writerow(decayTime_deviations) dataWriter.writerow(foundFundumentalPitches) print("Data saved to: " + parameterFileName + '_' + fileNameAppendix) # Saving spectrum data with open(outputDirectory + '/' + spectrumFileName + '_' + fileNameAppendix + '.csv', 'w', newline='') as csvfile: dataWriter = csv.writer(csvfile, delimiter=',', quotechar=';', quoting=csv.QUOTE_MINIMAL) for iterator in range(0, len(seriesNames)): dataWriter.writerow("Name: ") dataWriter.writerow(seriesNames[iterator]) dataWriter.writerow("Attack spectrum: ") dataWriter.writerow(allAttackSpectrums[iterator]) dataWriter.writerow("Attack frequencies: ") dataWriter.writerow(allAttackFrequencies[iterator]) dataWriter.writerow("Sustain spectrum: ") dataWriter.writerow(allSustainSpectrums[iterator]) dataWriter.writerow("Sustain frequencies: ") dataWriter.writerow(allSustainFrequencies[iterator]) dataWriter.writerow("Decay Spectrum: ") dataWriter.writerow(allDecaySpectrums[iterator]) dataWriter.writerow("Decay frequencies: ") dataWriter.writerow(allDecayFrequencies[iterator]) print("Spectrums saved to: " + spectrumFileName + '_' + fileNameAppendix) # --------------------Plotting spectrums---------------------- for iterator in range(0, len(seriesNames)): #plt.suptitle(seriesNames[iterator].replace(inputDirectory, ''), fontsize='xx-large') #converting frequencies to kHz for easier legibility kAttackFrequencies = allAttackFrequencies[iterator]/1000 kSustainFrequencies = allSustainFrequencies[iterator]/1000 kDecayFrequencies = allDecayFrequencies[iterator]/1000 plt.subplot(131) DrawSpectrum(allAttackFrequencies[iterator], allAttackSpectrums[iterator], maxAttack, '0', attackTime) plt.subplot(132) DrawSpectrum(allSustainFrequencies[iterator], allSustainSpectrums[iterator], maxSustain, attackTime, sustainTime) plt.subplot(133) DrawSpectrum(allDecayFrequencies[iterator], allDecaySpectrums[iterator], maxDecay, sustainTime, round(impulseTime, 2)) outputFile = seriesNames[iterator].replace(inputDirectory, outputDirectory) print("Outputing to: " + outputFile) figure = plt.gcf() figure.set_size_inches(19, 8) if vectorOutput_flag: plt.savefig(outputFile, dpi=100, format="eps") else: plt.savefig(outputFile, dpi = 100) #plt.show() plt.clf()
print(y.shape) print(y_8k.shape) print('sdgad') plt.figure(2) plt.subplot(211) c = librosa.stft(y) plt.plot(librosa.stft(y)) plt.subplot(212) plt.plot(librosa.istft(c)) plt.show() print('lllllllllllllllllllllll') print(librosa.estimate_tuning(y, sr)) # # 定义数据流块 # CHUNK = 1024 # # 只读方式打开wav文件 # wf = wave.open('weina.wav', 'rb') # # p = pyaudio.PyAudio() # # # 打开数据流 # stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), # channels=wf.getnchannels(), # rate=wf.getframerate(), # output=True) # # # 读取数据
def generate_accomp(melody_audio_fname,\ out_fname,\ use_pyin_notes=True,\ alyzer=None,\ generator=None,\ gen_method=None,\ roman_to_chords=True,\ do_estimate_tuning=False,\ do_remove_perc=False,\ min_chord_dur_beats=None,\ build_dir='.'): """ Generate accompaniment from the melody audio file. Parameters ---------- melody_audio_fname: string full path of audio file containing melody out_fname: string full path of audio file containing melody plus generated accompaniment use_pyin_notes: Boolean use pYIN:Notes to estimate melody notes """ # create analyzer and chord generator objects if alyzer is None: print 'make an AudioAnalyzer' alyzer = AudioAnalyzer() if gen_method is None: gen_method = chordgen.GEN_METHOD_SHORTEST if generator is None: # load FST print 'make ChordGenerator' if USE_BQ: isyms = fsm.SymbolTable(filename=BQ_INPUT_SYMS_FILE) osyms = fsm.SymbolTable(filename=BQ_OUTPUT_SYMS_FILE) fst = fsm.FST(filename=BQ_LOG_FST_FILE,isyms_table=isyms,osyms_table=osyms) rom2label = False else: isyms = fsm.SymbolTable(filename=INPUT_SYMS_FILE) osyms = fsm.SymbolTable(filename=OUTPUT_SYMS_FILE) fst = fsm.FST(filename=LOG_FST_FILE,isyms_table=isyms,osyms_table=osyms) rom2label = True generator = ChordGenerator(fst=fst,method=gen_method,roman_to_chords=rom2label,\ build_dir=build_dir) # load audio file and re-tune print 80*'.' print 'analyzing audio...\n\n' print 'loading audio file:',melody_audio_fname,'\n' y_orig, sr = librosa.load(melody_audio_fname) print 'computing beat times...' tempo, beats = librosa.beat.beat_track(y=y_orig, sr=sr, trim=False) beat_times = librosa.frames_to_time(beats, sr=sr) # print 'beat times:',beat_times beat_times = fill_beat_times(beat_times,tempo) print 'beat times:',beat_times onset_frames = librosa.onset.onset_detect(y=y_orig, sr=sr) onset_times = librosa.frames_to_time(onset_frames, sr=sr) print 'onset times:',onset_times # get rid of percussive parts of signal if do_remove_perc: print 'retain only harmonic portion of signal...' y_harm = librosa.effects.harmonic(y_orig) else: y_harm = y_orig if do_estimate_tuning: print 'estimate tuning...' tuning = librosa.estimate_tuning(y=y_harm, sr=sr) print 'correct tuning...' y_tuned = librosa.effects.pitch_shift(y_harm, sr, -tuning) else: tuning = 0 y_tuned = y_harm print 80*'.' if use_pyin_notes: print 'estimating melody...' tmp_audio_fname = 'tmp_melody_audio.wav' librosa.output.write_wav(path=tmp_audio_fname, y=y_tuned, sr=sr) mseq = alyzer.get_estimated_melody_sequence_pyin(audio_fname=tmp_audio_fname) else: # generate the accompaniment and write to a file # print 'generate accompaniment...' mseq = alyzer.get_estimated_melody_sequence(y_tuned,sr) if USE_BQ: mseq.repeat_events(beat_quant_level=BEAT_QUANT_LEVEL) print 'estimating key...' key = generator.estimate_key(mseq) print 'synthesizing audio to file...' generator.synthesize_to_file(output_audio_fname=out_fname,\ melody_sequence=mseq,\ fs=sr,\ beat_times=beat_times,\ # beat_times=onset_times,\ melody_audio=y_orig,\ tuning_offset=tuning,\ min_chord_dur_beats=min_chord_dur_beats) print 'done' print 80*'.'
def getFeature(self): #端点检测 vad = Vad(self.filename) newVoice = vad.getNewVoice() y, sr = librosa.load(newVoice) #获取mfcc ccc = librosa.feature.mfcc(y=y, sr=sr) #获取ccc最大特征值对应特征向量 A = np.mat(ccc) B = A * A.T a, b = np.linalg.eig(B) m = np.argmax(a) ccc1 = np.array(b[m].tolist()[0]) #能量构造 #S, phase = librosa.magphase(librosa.stft(y)) S = librosa.magphase(librosa.stft(y, window=np.ones, center=False))[0] rms = librosa.feature.rmse(S=S) rms_max = np.max(rms[0]) rms_min = np.min(rms[0]) rms_mean = np.mean(rms[0]) rms_std = np.std(rms[0]) #过零率 rate = librosa.feature.zero_crossing_rate(y) rete_max = np.max(rate) rate_mean = np.mean(rate) rate_std = np.std(rate) # ============================================================================= # S = np.abs(librosa.stft(y)) # chroma = librosa.feature.chroma_stft(S=S, sr=sr) # A = np.mat(chroma) # B = A * A.T # a,b=np.linalg.eig(B) # m = np.argmax(a); # chroma1 = np.array(b[m].tolist()[0]) # ============================================================================= # pitch = Pitch(newVoice, sr) # pitches = np.array(pitch.getPitch()) # pitch_max = np.max(pitches) # pitch_mean = np.mean(pitches) # pitch_std = np.std(pitches) # result3 = np.array([pitch_max, pitch_mean, pitch_std]) # melEnergy = MelEnergy(newVoice) # melEnergys = melEnergy.getMelEnergy() e = librosa.estimate_tuning(y=y, sr=sr) # mel = librosa.feature.melspectrogram(y=y, sr=sr) # A = np.mat(mel) # B = A * A.T; # a, b = np.linalg.eig(B) # m = np.argmax(a); # mel1 = np.array(b[m].tolist()[0]) # odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512) # ac = librosa.autocorrelate(odf, max_size=4 * sr / 512) # ac_min = np.min(ac) # ac_max = np.max(ac) # ac_mean = np.mean(ac) # ac_std = np.std(ac) result1 = np.array([ e, rms_max, rms_min, rms_mean, rms_std, rete_max, rate_mean, rate_std ]) result2 = ccc1 #result3 = melEnergys result = np.append(result1, result2) # result = np.append(result,mel1) return result
def getData(filename, answers): print("Gretting data for {}".format(filename)) hop_length = 256; # Load the example clip y, sr = librosa.load(filename) # Short-time Fourier transform (STFT) S = np.abs(librosa.stft(y)) # Separate harmonics and percussives into two waveforms y_harmonic, y_percussive = librosa.effects.hpss(y) # Beat track on the percussive signal tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr) # Compute MFCC features from the raw signal mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) # And the first-order differences (delta features) mfcc_delta = librosa.feature.delta(mfcc) # Stack and synchronize between beat events # This time, we'll use the mean value (default) instead of median beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames) # Compute chroma features from the harmonic signal chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) # Aggregate chroma features between beat events # We'll use the median value of each feature between beat frames beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median) # Finally, stack all beat-synchronous features together beat_features = np.vstack([beat_chroma, beat_mfcc_delta]) # Average the energy avgEnergy = np.mean(librosa.feature.rmse(y=y)) # Estimate tuning tuning = librosa.estimate_tuning(y=y, sr=sr) zeroCrossings = np.sum(librosa.core.zero_crossings(y=y)) avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr)) avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr)) raw = [ avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc), np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma), np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo] #norm = [(float(i)-min(raw))/((max(raw)-min(raw))) for i in raw] # normalise numbers between -1 and 1 return np.array(raw.reshape((1,2)) def gethiphop(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Hiphop-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getjazz(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Jazz-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getclassical(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Classical-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getcountry(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Country-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getdance(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Dance-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getmetal(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Metal-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getreggae(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Reggae-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass def getrock(Data,genre, prefix, answers, fileName): for song in genre: try: Data = np.vstack([Data, getData("{}Rock-Samples/{}".format(prefix,song), answers)]) except: print("ERROR ON SONG {}".format(song)) pass try: np.save(fileName, Data) print("SAVED {}!".format(fileName)) except: print("ERROR COUNLDN'T SAVE {}".format(fileName)) pass # create dataset for NN with 12 inputes and 4 outputs # DS = SupervisedDataSet(12, 8) # DS = SupervisedDataSet.loadFromFile("DataSet") answers = [[1,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],[0,0,1,0,0,0,0,0],[0,0,0,1,0,0,0,0],[0,0,0,0,1,0,0,0],[0,0,0,0,0,1,0,0],[0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,1]] Data = np.array([]).reshape((0,2)) # list of song names on my computer by genre hiphop = listdir_nohidden("{}Hiphop-Samples".format(prefix)) jazz = listdir_nohidden("{}Jazz-Samples".format(prefix)) classical = listdir_nohidden("{}Classical-Samples".format(prefix)) country = listdir_nohidden("{}Country-Samples".format(prefix)) dance = listdir_nohidden("{}Dance-Samples".format(prefix)) metal = listdir_nohidden("{}Metal-Samples".format(prefix)) reggae = listdir_nohidden("{}Reggae-Samples".format(prefix)) rock = listdir_nohidden("{}Rock-Samples".format(prefix)) p1 = Process(target=gethiphop, args=((Data, hiphop, prefix, answers[0], "HiphopDataSet.npy"))) p2 = Process(target=getjazz, args=((Data, jazz, prefix, answers[1], "JazzDataSet.npy"))) p3 = Process(target=getclassical, args=((Data, classical, prefix, answers[2], "ClassicalDataSet.npy"))) p4 = Process(target=getcountry, args=((Data, country, prefix, answers[3], "CountryDataSet.npy"))) p5 = Process(target=getdance, args=((Data, dance, prefix, answers[4], "DanceDataSet.npy"))) p6 = Process(target=getmetal, args=((Data, metal, prefix, answers[5], "MetalDataSet.npy"))) p7 = Process(target=getreggae, args=((Data, reggae, prefix, answers[6], "ReggaeDataSet.npy"))) p8 = Process(target=getrock, args=((Data, rock, prefix, answers[7], "RockDataSet.npy"))) p1.start() p2.start() p3.start() p4.start() p5.start() p6.start() p7.start() p8.start() p1.join() p2.join() p3.join() p4.join() p5.join() p6.join() p7.join() p8.join() # np.save("CompleteRawDataSet.npy", Data) # tdata = np.array([]).reshape(0,12) # for i in range(5): # tdata = np.vstack([tdata, Data[i][0]]) # tdata = whiten(tdata) # print(tdata) # print(np.argmin(tdata, axis=0)) #DS.saveToFile("DataSetComplete")
import librosa vox, err = librosa.load("teste.m4a") vox_harm = librosa.effects.harmonic( vox ) tunning = librosa.estimate_tuning( vox_harm, err) vox_tunned = librosa.effects.pitch_shift( vox_harm, err, tunning ) librosa.output.write_wav( "res.wav", vox_tunned, err )
beatcounter =1 while beatcounter <= 16: length = int(beatcounter/(newportion ^ 3)) % 4 pitch = int(beatcounter/(newportion ^ beatcounter)) % 8 melody.append((pitch, length)) beatcounter += length return melody def play_melody(filename) y, sr = librosa.load(filename) y, _ = librosa.effects.trim(y) p = librosa.estimate_tuning(y = y, sr = sr, bins_per_octave=1) y = librosa.effects.pitch_shift(y, sr, -1*p, bins_per_octave=1) melody = melody_generator(random.random()) beat_length = 0.75 sample_len = librosa.get_duration(y, sr) while True: for i, j in melody: if i is None: time.sleep(beat_length*j) else: target_len = beat_length*j sample = librosa.effects.time_stretch(y, 1/(sample_len*target_len))
def states(self, jam): mudabox = jam.sandbox.muda state = dict(tuning=librosa.estimate_tuning(y=mudabox._audio['y'], sr=mudabox._audio['sr'])) yield state
def feature_extraction(y, sr, opt_tuning): if opt_tuning: #extraction of tuning A440 = librosa.estimate_tuning(y=y, sr=sr, resolution=1e-3) print('Deviation from A440 is : {0:.2f}'.format(A440)) else: A440 = 0.0 print('Features for local similarity: ', ' '.join(params.feat)) full = [] idx_chroma = 0 if 'cepstral' in params.feat: mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20, n_fft=NFFT, hop_length=STEP) mfcc_delta = librosa.feature.delta(mfcc) fcep = np.concatenate((mfcc, mfcc_delta), axis=0) full.append(fcep) if 'chroma' in params.feat: chroma = librosa.feature.chroma_cqt(y=y, sr=sr, n_chroma=12, n_octaves=N_OCTAVES, hop_length=STEP, norm=None, tuning=A440) chroma_delta = librosa.feature.delta(chroma) fchr = np.concatenate((chroma, chroma_delta), axis=0) idx_chroma = len(full) full.append(fchr) if 'spectral' in params.feat: centroid = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=NFFT, hop_length=STEP) contrast = librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=NFFT, n_bands=6, hop_length=STEP) flatness = librosa.feature.spectral_flatness(y=y, n_fft=NFFT, hop_length=STEP) rolloff05 = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=NFFT, hop_length=STEP, roll_percent=0.05) rolloff25 = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=NFFT, hop_length=STEP, roll_percent=0.25) rolloff50 = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=NFFT, hop_length=STEP, roll_percent=0.50) rolloff75 = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=NFFT, hop_length=STEP, roll_percent=0.75) rolloff95 = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=NFFT, hop_length=STEP, roll_percent=0.95) spec = np.concatenate((centroid, contrast, flatness, rolloff05, rolloff25, rolloff50, rolloff75, rolloff95), axis=0) spec_delta = librosa.feature.delta(spec) fspec = np.concatenate((spec, spec_delta), axis=0) full.append(fspec) full = np.array(full)[0] print('feature shape', full.shape) return full, idx_chroma
# y, sr = librosa.load(filename, offset=1.3, duration=0.2) # 0.160 # y, sr = librosa.load(filename, offset=2.6, duration=0.2) # -0.48 # y, sr = librosa.load(filename, offset=2.8, duration=0.2) #-0.169 # y, sr = librosa.load(filename, offset=3, duration=0.2) # y, sr = librosa.load(filename, offset=3.3, duration=0.2) # y, sr = librosa.load(filename, offset=3.55, duration=0.2) pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr) np.set_printoptions(threshold=np.nan) print(pitches[np.nonzero(pitches)]) pitches = pitches[magnitudes > np.median(magnitudes)] p = librosa.pitch_tuning(pitches) print(p) tun = librosa.estimate_tuning(y=y, sr=sr) print(tun) onset_frames_time = [ 0.7662585, 1.27709751, 2.80961451, 3.0185941, 3.29723356, 3.57587302, 3.80807256, 4.80653061, 7.2678458, 7.70902494 ] onset_frames_time_diff = np.diff(onset_frames_time) onset_frames_time_diff = list(onset_frames_time_diff) onset_frames_time_diff.append(0.2) for i, o in enumerate(onset_frames_time): offset = round(o, 2) duration = round(onset_frames_time_diff[i], 2) y, sr = librosa.load(filename, offset=offset, duration=duration) pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr) pitches = pitches[magnitudes > np.median(magnitudes)]
def feature_extraction(path, sr, mono, frame_len, hop_len): # load wav file y, sr = librosa.load(path, sr=sr, mono=mono) # print('-' * 60) # print('y shape =>', y.shape) # print('y duration => ', librosa.core.get_duration(y)) # print('-' * 60) # trim it (remove silence from beggining and end) yt, index = librosa.effects.trim(y) y = np.array(yt) # print('trim_y shape =>', y.shape) # print('terim_y duration => ', librosa.core.get_duration(y)) # print('-' * 60) frames = framing(y, frame_len, hop_len) # print('frames shape =>', frames.shape) # print('frames[0] shape =>', frames[0].shape) # print('-' * 60) features = [] energys = [] zcrs = [] estimate_tunings = [] pitch_tunings = [] mfccs = [] # chroma_stfts = [] # stfts = [] for i in range(n_mfcc): mfccs.append([]) # for i in range(12): # chroma_stfts.append([]) for frame in frames: energy = np.sum(np.power(frame, 2)) energys.append(energy) zcr = np.count_nonzero(librosa.core.zero_crossings(frame)) zcrs.append(zcr) estimate_tuning = librosa.estimate_tuning(y=frame, sr=sr) estimate_tunings.append(estimate_tuning) pitch_tuning = librosa.core.pitch_tuning(frame) pitch_tunings.append(pitch_tuning) mfcc = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc) for i in range(n_mfcc): mfccs[i].append(np.average(mfcc[i])) # chroma_stft = librosa.feature.chroma_stft(y=frame, sr=sr) # for i in range(12): # chroma_stfts[i].append(np.average(chroma_stft[i])) # print(chroma_stfts) # exit() zcrs = np.array(zcrs) energys = np.array(energys) mfccs = np.array(mfccs) # print('zcrs shape =>', zcrs.shape) # print('energys shape =>', energys.shape) # print('mfccs shape =>', mfccs.shape) def temp(base_features): base_features = np.array(base_features) features.append(base_features.min()) features.append(base_features.max()) # print(base_features.min()) # print(base_features.max()) # print(np.mean(base_features)) # print(np.std(base_features)) # print(scipy.stats.kurtosis(base_features)) # print(scipy.stats.skew(base_features)) features.append(np.mean(base_features)) features.append(np.std(base_features)) features.append(scipy.stats.kurtosis(base_features)) features.append(scipy.stats.skew(base_features)) temp(zcrs) temp(energys) temp(estimate_tunings) temp(pitch_tunings) for mfcc in mfccs: temp(mfcc) # for chroma_stft in chroma_stfts: # temp(chroma_stft) # print('features shape =>', np.array(features).shape) return np.array(features)
def tunning(signal,fs, **kwargs): "It estimates *signal*'s tuning offset (in fractions of a bin) relative to A440=440.0Hz." return librosa.estimate_tuning(y=signal,sr=fs)
import matplotlib.pyplot as plt import numpy as np from scipy import signal import preprocessing as pre print('\033[1m' + 'Debugging Prints' + '\033[0m') ref_track = 'WAM20_20sek.wav' test_track = 'WAM21_30sek.wav' #Importing audio files ref_recording, sr = music_parser.readMusicFile(f'assets/{ref_track}') test_recording, sr = music_parser.readMusicFile(f'assets/{test_track}') #Estimate Tuning ref_tuning = librosa.estimate_tuning(ref_recording, sr) test_tuning = librosa.estimate_tuning(test_recording, sr) #Parameter extraction ref_length = librosa.get_duration(ref_recording, sr=sr) test_length = librosa.get_duration(test_recording, sr=sr) frame_length = 9600 hopsize = int(frame_length / 2) window = 'hann' # ##Sample properties Visualization # # Compute waveform Visualization # t_ref = np.arange(ref_recording.shape[0]) / sr # t_test = np.arange(test_recording.shape[0]) / sr # title_r = 'Waveform, Sample: Reference Recording' # title_t = 'Waveform, Sample: Test Recording'
def getData(filename): print("Gretting data for {}".format(filename)) hop_length = 256 # Load the example clip y, sr = librosa.load(filename) # Short-time Fourier transform (STFT) S = np.abs(librosa.stft(y)) # Separate harmonics and percussives into two waveforms y_harmonic, y_percussive = librosa.effects.hpss(y) # Beat track on the percussive signal tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr) # Compute MFCC features from the raw signal mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) # And the first-order differences (delta features) mfcc_delta = librosa.feature.delta(mfcc) # Stack and synchronize between beat events # This time, we'll use the mean value (default) instead of median beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames) # Compute chroma features from the harmonic signal chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) # Aggregate chroma features between beat events # We'll use the median value of each feature between beat frames beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median) # Finally, stack all beat-synchronous features together beat_features = np.vstack([beat_chroma, beat_mfcc_delta]) # Average the energy avgEnergy = np.mean(librosa.feature.rmse(y=y)) # Estimate tuning tuning = librosa.estimate_tuning(y=y, sr=sr) zeroCrossings = np.sum(librosa.core.zero_crossings(y=y)) avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr)) avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr)) raw = [avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc), np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma), np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo] data = np.array([raw]) rDataStack = np.load("ANN/Data/RawDataStack.npy") rDataStack = np.vstack([rDataStack, data]) np.save("ANN/Data/RawDataStack.npy", rDataStack) wDataStack = whiten(rDataStack) np.save("ANN/Data/WhitenDataStack.npy", wDataStack) wmin = np.argmin(wDataStack, axis=0) wmax = np.argmax(wDataStack, axis=0) dmin = [] dmax = [] for i, val in enumerate(wmin): dmin.append(wDataStack[val][i]) for i, val in enumerate(wmax): dmax.append(wDataStack[val][i]) for j in range(len(wDataStack[-1])): wDataStack[-1][j] = (wDataStack[-1][j] - dmin[j]) / (dmax[j] - dmin[j]) print wDataStack[-1] return wDataStack[-1] # <<-- This is the single normalized feature set to feed into the network.
def chromagram(y=None, sr=44100, S=None, norm=np.inf, n_fft=2048, hop_length=None, seconds=4, tuning=None, center=True, **kwargs): """ Derivation of chromagram from librosa python package. Bins spectrogram on a larger frame size than it was originally calculated with. Parameters ---------- y : numpy array [shape=(n,)] a numpy array of time series to calculate the chromagram of. Default is none. sr : integer The sampling rate of the audio file. Default is 44100 Hz. S : np.ndarray. The spectrogram from which to calculate the chromagram. Default is None (function calculates spectrogram first). norm : float or None. Column-wise normalization. Default np.inf. n_fft : integer. The window size with which to calculate the spectrogram. Default is 2048. hop_length : integer. The amount of overlap between the frames. Default is half-overlap. seconds : integer. The amount of seconds to bin the spectrogram into before calculating the chromagram. Default is 4 seconds. tuning : float in '[-0.5, 0.5]' or None. Deviation from A440; tuning in fractional bins. Default is None (automatically estimated) center : boolean. Whether or not to center the spectrogram before calculating the chromagram. Default is True. kwargs : array the arguments for librosa.filter.chroma() """ n_chroma = 12 # defining variable for use below if hop_length is None: hop_length = int(n_fft / 2) if S is None: S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))**2 else: n_fft = 2 * (S.shape[0] - 1) if tuning is None: tuning = librosa.estimate_tuning(S=S, sr=sr, bins_per_octave=n_chroma) if 'A440' not in kwargs: kwargs['A440'] = 440.0 * 2.0**(float(tuning) / n_chroma) chromafb = librosa.filters.chroma(sr, n_fft, **kwargs) segment_length = sr * seconds / hop_length # n_fft?? # make it a power of two segment_length = 2**prevPow(segment_length) #alt: nextPow() if center: npad = ((0, 0), (int(segment_length / 4), int(segment_length / 4))) S = np.pad(S, npad, mode='reflect') num_segments = math.floor((S.shape[1] - segment_length) / (segment_length / 2) + 1) bin_S = np.zeros((S.shape[0], num_segments)) # Calculate binned chromagram for i in range(int(num_segments)): start, end = calculateStartEnd(segment_length / 2, segment_length, iterV=i) bin_S[:, i] = np.mean(S[:, start:end], axis=1) # Compute raw chroma raw_chroma = np.dot(chromafb, bin_S) # Compute normalization factor for each frame return librosa.util.normalize(raw_chroma, norm=norm, axis=0)
#Extract each songs metadata artist.append(tag.artist) title.append(tag.title) album.append(tag.album) duration.append(tag.duration) year.append(tag.year) genre.append(tag.genre) audio_offset.append(tag.audio_offset) bitrate.append(tag.bitrate) samplerate.append(tag.samplerate) #Extract each songs audio metrics y, sr = librosa.load(songpath) y_harmonic, y_percussive = librosa.effects.hpss(y) tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr) tuning = librosa.estimate_tuning(y=y_harmonic, sr=sr) zcr = librosa.feature.zero_crossing_rate(y) spectral_centroids = librosa.feature.spectral_centroid(y, sr=sr) spec_bw = librosa.feature.spectral_bandwidth(y, sr=sr) rolloff = librosa.feature.spectral_rolloff(y, sr=sr) chroma_stft = librosa.feature.chroma_stft(y, sr=sr) tempos.append(tempo) tunings.append(tuning) zeroCrossing.append(np.mean(zcr)) specCentroid.append(np.mean(spectral_centroids)) specBandwidth.append(np.mean(spec_bw)) specRolloff.append(np.mean(rolloff)) chroma.append(np.mean(chroma_stft)) counter+=1 print(counter)