Exemple #1
0
def play_melody(filename):

    y, sr = librosa.load(filename)

    y, _ = librosa.effects.trim(y)

    p = librosa.estimate_tuning(y=y, sr=sr, bins_per_octave=1)

    y = librosa.effects.pitch_shift(y, sr, -1 * p, bins_per_octave=1)

    melody = melody_generator(random.random())

    beat_length = 0.75
    sample_len = librosa.get_duration(y, sr)
    while True:
        for i, j in melody:
            if i is None:
                time.sleep(beat_length * j)
            else:
                target_len = beat_length * j
                sample = librosa.effects.time_stretch(
                    y, 1 / (sample_len * target_len))
                sample = librosa.effects.pitch_shift(sample,
                                                     sr=sr,
                                                     n_steps=i,
                                                     bins_per_octave=12)
                sd.play(sample, blocking=True)
def callback(in_data, frame_count, time_info, flag):
    audio_data = np.fromstring(in_data, dtype=np.int16)
    audio_data = librosa.resample(audio_data.astype('float32'), 44100, 11025)
    tuning = librosa.estimate_tuning(y=audio_data, sr=11025)
    X = np.abs(
        librosa.core.cqt(audio_data,
                         sr=11025,
                         n_bins=84,
                         bins_per_octave=12,
                         tuning=tuning,
                         window='hamming',
                         norm=2)).T
    with torch.no_grad():
        global prev_chord
        if torch.cuda.is_available():
            X = torch.tensor(X).cuda()
        else:
            X = torch.tensor(X)
        X = X.unsqueeze(0)
        pred = model(X)
        y = pred.topk(1, dim=2)[1].squeeze().view(-1)
        from collections import Counter
        counter = Counter(ind_to_chord_names(y, category))
        current_chord = counter.most_common(1)[0][0]
        if prev_chord != current_chord:
            print(current_chord)
            prev_chord = current_chord
    return in_data, pyaudio.paContinue
Exemple #3
0
    def loop(self):
        try:
            data = self.Q.get(block=False)
        except:
            pass
        else:
            t = time.time()

            signal = np.fromstring(data, dtype=np.float32)
            pitch = self._pitch_o(signal)[0]
            confidence = self._pitch_o.get_confidence()

            #print(pitch)

            if confidence > 0.9:

                signal = np.fromstring(data,
                                       dtype=np.float32).astype(np.uint16)
                tuning = librosa.estimate_tuning(y=signal, sr=self.sampleRate)

                self.pitch = pitch - tuning
                self.confidence = confidence
                self.pitches.append((self.pitch, self.confidence, t))

            else:
                self.pitch = 0
                self.confidence = 0
        time.sleep(0.00001)
def getChromagramSTFT(audioSegment,
                      sampleRate,
                      FFTLength,
                      hopLength,
                      tuning=True):
    """
	Description:
		This function calculates short time chromagram using an STFT of FFTLength every hopLength samples

	Arguments:
		audioSegment: a length of audio read into an array (For example : use audioSegment = librosa.load(filename.wav))
		sampleRate: sample rate of the audio file
		hopLength: determines time resolution of the short time chromagram. Corresponds to number of samples captured in each frame
		tuning (bool): If true, it calculates a tuning estimate to adjust chromagram with (check librosa.estimate_tuning())

	Returns:
		short time chromagram: 12 x N dimensional (N = number of frames of size HopLength in audioSegment)
	"""

    if tuning:
        tuningDiff = librosa.estimate_tuning(y=audioSegment,
                                             sr=sampleRate,
                                             resolution=1e-2)
    else:
        tuningDiff = 0

    chroma_stft = librosa.feature.chroma_stft(audioSegment,
                                              sr=sampleRate,
                                              S=None,
                                              n_fft=FFTLength,
                                              hop_length=hopLength,
                                              norm=np.inf,
                                              tuning=tuningDiff)

    return chroma_stft
 def calculate_tuning_and_tones(self, y_harmonic, sr):
     start = time.time()
     self.tuning = librosa.estimate_tuning(y=y_harmonic, sr=sr).tolist()
     debug_print('tuning',time.time()-start)
     start = time.time()
     chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, tuning=self.tuning)
     self.pitchMeanEnergies = []
     for pitch in chroma:
         self.pitchMeanEnergies += [sum(pitch)/len(pitch)]
     debug_print('tones',time.time()-start)
Exemple #6
0
 def get_features(self, sr):
     """
     calculates tempo and pitch using librosa
     documentation https://librosa.github.io/librosa/
     """
     timeseries = buf_to_float(self.bytes)
     pitch = estimate_tuning(timeseries, sr)
     # onset_env = onset_strength(timeseries, sr)
     # temp = tempo(onset_env, sr)[0]
     return([pitch])
Exemple #7
0
def getData(filename):
    print("Gretting data for {}".format(filename))
    hop_length = 256;

    # Load the example clip
    y, sr = librosa.load(filename)

    # Short-time Fourier transform (STFT)
    S = np.abs(librosa.stft(y))

    # Separate harmonics and percussives into two waveforms
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # Beat track on the percussive signal
    tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)

    # Compute MFCC features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

    # And the first-order differences (delta features)
    mfcc_delta = librosa.feature.delta(mfcc)

    # Stack and synchronize between beat events
    # This time, we'll use the mean value (default) instead of median
    beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

    # Compute chroma features from the harmonic signal
    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)

    # Aggregate chroma features between beat events
    # We'll use the median value of each feature between beat frames
    beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)

    # Finally, stack all beat-synchronous features together
    beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

    # Average the energy
    avgEnergy = np.mean(librosa.feature.rmse(y=y))

    # Estimate tuning
    tuning = librosa.estimate_tuning(y=y, sr=sr)

    zeroCrossings = np.sum(librosa.core.zero_crossings(y=y))

    avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr))

    avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr))

    raw = [avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc),
           np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma),
           np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo]
    norm = [(float(i)-min(raw))/((max(raw)-min(raw))) for i in raw] # normalise numbers between -1 and 1
    return raw
Exemple #8
0
 def load_and_gen_obj(self):
     print("Loading song.")
     self.librosa_rep, self.samp_rate = lbr.load(self.filename)
     # ".T" gives the transposed version of the NumPy array
     self.spectro = lbr.feature.melspectrogram(self.librosa_rep,
                                               self.samp_rate,
                                               **MEL_KWARGS).T
     self.duration = lbr.get_duration(self.librosa_rep, self.samp_rate)
     self.onset_env = lbr.onset.onset_strength(self.librosa_rep,
                                               self.samp_rate)
     self.tempo = lbr.beat.tempo(self.onset_env, self.samp_rate)
     self.tuning = lbr.estimate_tuning(self.librosa_rep, self.samp_rate)
     #self.mel_freq = lbr.mel_frequencies(40)
     print("Features and spectrogram extracted.")
Exemple #9
0
def estimate_tuning(input_file):
    '''Load an audio file and estimate tuning (in cents)'''

    print('Loading ', input_file)
    y, sr = librosa.load(input_file)

    print('Separating harmonic component ... ')
    y_harm = librosa.effects.harmonic(y)

    print('Estimating tuning ... ')
    # Just track the pitches associated with high magnitude
    tuning = librosa.estimate_tuning(y=y_harm, sr=sr)

    print('{:+0.2f} cents'.format(100 * tuning))
Exemple #10
0
def getData(filename):
	print("Gretting data for{}".format(filename))
	y, sr = librosa.load(filename) # load song
	S = np.abs(librosa.stft(y))
	avgEnergy = np.mean(librosa.feature.rmse(y=y))
	tuning = librosa.estimate_tuning(y=y, sr=sr)
	tempo = librosa.beat.estimate_tempo(librosa.onset.onset_strength(y, sr=sr), sr=sr)
	# zeroCrossings = np.sum(librosa.core.zero_crossings(y=y))
	avgChroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr))
	avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr))
	avgMFCC = np.mean(librosa.feature.mfcc(y=y, sr=sr))
	avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr))
	raw = [avgEnergy, tuning, tempo, avgChroma, avgMelSpectro, avgMFCC, avgSpectralContrast]
	norm = [float(i)/sum(raw) for i in raw] # normalise numbers between -1 and 1
	return np.array([norm])
Exemple #11
0
def add_tuning_tempo(y, sr, label_id, features, labels):
    """
    Input
        y: song data
        sr: sameple rate
        label_id: label(genre) id
        features: array of ffts
        labels: array of labels
    Description
        extracts tuning tempo from y and appends it to features.
    """
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    tuning = librosa.estimate_tuning(y=y, sr=sr)
    features.append((tuning,tempo))
    labels.append(label_id)
def extract_feature(X, sample_rate):
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,
                    axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,
                     axis=0)
    tuning = librosa.estimate_tuning(y=X, sr=sample_rate)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft,
                                                         sr=sample_rate).T,
                       axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
                                              sr=sample_rate).T,
                      axis=0)
    return mfccs, chroma, mel, contrast, tonnetz, tuning
Exemple #13
0
    def __test(target_hz, resolution, bins_per_octave, tuning):

        y = np.sin(2 * np.pi * target_hz * t)
        tuning_est = librosa.estimate_tuning(resolution=resolution,
                                             bins_per_octave=bins_per_octave,
                                             y=y,
                                             sr=sr,
                                             n_fft=2048,
                                             fmin=librosa.note_to_hz('C4'),
                                             fmax=librosa.note_to_hz('G#9'))

        # Round to the proper number of decimals
        deviation = np.around(np.abs(tuning - tuning_est),
                              int(-np.log10(resolution)))

        # We'll accept an answer within three bins of the resolution
        assert deviation <= 3 * resolution
Exemple #14
0
    def __test(target_hz, resolution, bins_per_octave, tuning):

        y = np.sin(2 * np.pi * target_hz * t)
        tuning_est = librosa.estimate_tuning(resolution=resolution,
                                             bins_per_octave=bins_per_octave,
                                             y=y,
                                             sr=sr,
                                             n_fft=2048,
                                             fmin=librosa.note_to_hz('C4'),
                                             fmax=librosa.note_to_hz('G#9'))

        # Round to the proper number of decimals
        deviation = np.around(np.abs(tuning - tuning_est),
                              int(-np.log10(resolution)))

        # We'll accept an answer within three bins of the resolution
        assert deviation <= 3 * resolution
def adjust_tuning(input_file, output_file):
    '''Load audio, estimate tuning, apply pitch correction, and save.'''
    print('Loading ', input_file)
    y, sr = librosa.load(input_file)

    print('Separating harmonic component ... ')
    y_harm = librosa.effects.harmonic(y)

    print('Estimating tuning ... ')
    # Just track the pitches associated with high magnitude
    tuning = librosa.estimate_tuning(y=y_harm, sr=sr)

    print('{:+0.2f} cents'.format(100 * tuning))
    print('Applying pitch-correction of {:+0.2f} cents'.format(-100 * tuning))
    y_tuned = librosa.effects.pitch_shift(y, sr, -tuning)

    print('Saving tuned audio to: ', output_file)
    librosa.output.write_wav(output_file, y_tuned, sr)
Exemple #16
0
def adjust_tuning(input_file, output_file):
    '''Load audio, estimate tuning, apply pitch correction, and save.'''
    print('Loading ', input_file)
    y, sr = librosa.load(input_file)

    print('Separating harmonic component ... ')
    y_harm = librosa.effects.harmonic(y)

    print('Estimating tuning ... ')
    # Just track the pitches associated with high magnitude
    tuning = librosa.estimate_tuning(y=y_harm, sr=sr)

    print('{:+0.2f} cents'.format(100 * tuning))
    print('Applying pitch-correction of {:+0.2f} cents'.format(-100 * tuning))
    y_tuned = librosa.effects.pitch_shift(y, sr, -tuning)

    print('Saving tuned audio to: ', output_file)
    sf.write(output_file, y_tuned, sr)
Exemple #17
0
    def __test(target_hz, resolution, bins_per_octave, tuning):

        y = np.sin(2 * np.pi * target_hz * t)
        tuning_est = librosa.estimate_tuning(resolution=resolution,
                                             bins_per_octave=bins_per_octave,
                                             y=y,
                                             sr=sr,
                                             n_fft=2048,
                                             fmin=librosa.note_to_hz('C4'),
                                             fmax=librosa.note_to_hz('G#9'))

        # Round to the proper number of decimals
        deviation = np.around(tuning - tuning_est, int(-np.log10(resolution)))

        # Take the minimum floating point for positive and negative deviations
        max_dev = np.min([np.mod(deviation, 1.0), np.mod(-deviation, 1.0)])

        # We'll accept an answer within three bins of the resolution
        assert max_dev <= 3 * resolution
def beat_track(input_file):
    # === LOADING ===
    print('Loading ', input_file)
    y, sr = librosa.load(input_file, sr=22050)
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # Use a default hop size of 512 samples @ 22KHz ~= 23ms
    hop_length = 512

    # This is the window length used by default in stft
    print('Tracking beats')
    tempo, beats = librosa.beat.beat_track(y=y_percussive,
                                           sr=sr,
                                           hop_length=hop_length)

    print('Estimated tempo: {:0.2f} beats per minute'.format(tempo))

    # === TEMPO ===
    # 'beats' will contain the frame numbers of beat events.
    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=hop_length)
    print(tempo)
    print(beats)
    print(beat_times)

    # === TUNING ===

    print('Estimating tuning ... ')
    # Just track the pitches associated with high magnitude
    tuning = librosa.estimate_tuning(y=y_harmonic, sr=sr)
    print(tuning)

    print('{:+0.2f} cents'.format(100 * tuning))

    # === NOTES ===
    chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, tuning=tuning)

    #My first code!
    pitchesMeanEnergy = []
    for pitch in chroma:
        pitchesMeanEnergy += [sum(pitch) / len(pitch)]
    print(pitchesMeanEnergy)
Exemple #19
0
def preprocess_librosa(audiopath,
                       feparam,
                       n_bins=84,
                       bins_per_octave=12,
                       mod_steps=(0, )):
    x, sr = librosa.load(audiopath,
                         feparam['fs'],
                         mono=feparam['stereo_to_mono'])
    Xs = []
    tuning = librosa.estimate_tuning(y=x, sr=sr)
    for mod_step in mod_steps:
        X_pitched = librosa.effects.pitch_shift(x, sr, n_steps=mod_step)
        X = np.abs(
            librosa.core.cqt(X_pitched,
                             sr=sr,
                             n_bins=n_bins,
                             bins_per_octave=bins_per_octave,
                             tuning=tuning,
                             window='hamming',
                             norm=2))
        Xs.append(X.T)
    return Xs
def getChromagramCQT(audioSegment, sampleRate, hopLength, tuning=False):
    """
	Description:
		This function calculates short time chromagram using a constant Q transform.

	Arguments:
		audioSegment: a length of audio read into an array (For example : use audioSegment = librosa.load(filename.wav))
		sampleRate: sample rate of the audio file
		hopLength: determines time resolution of the short time chromagram. Corresponds to number of samples captured in each frame
		tuning (bool): If true, it calculates a tuning estimate to adjust chromagram with (check librosa.estimate_tuning())

	Returns:
		short time chromagram: 12 x N dimensional (N = number of frames of size HopLength in audioSegment)
	"""

    if tuning:
        tuningDiff = librosa.estimate_tuning(y=audioSegment,
                                             sr=sampleRate,
                                             resolution=1e-2)
    else:
        tuningDiff = 0

    chroma_cq = librosa.feature.chroma_cqt(audioSegment,
                                           sr=sampleRate,
                                           C=None,
                                           hop_length=hopLength,
                                           fmin=None,
                                           norm=np.inf,
                                           threshold=0.0,
                                           tuning=tuningDiff,
                                           n_chroma=12,
                                           n_octaves=7,
                                           window=None,
                                           bins_per_octave=36,
                                           cqt_mode='full')

    return chroma_cq
Exemple #21
0
def extract_feature(X, sample_rate):
    stft = np.abs(librosa.stft(X))
    tuning = librosa.estimate_tuning(y=X, sr=sample_rate)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    return mel, contrast, tuning
def tunning(signal, fs, **kwargs):
    "It estimates *signal*'s tuning offset (in fractions of a bin) relative to A440=440.0Hz."
    return librosa.estimate_tuning(y=signal, sr=fs)
Exemple #23
0
def create_dataset(input_path, output_path):
    print("Creating h5 from {} to file {}".format(input_path, output_path))

    feature_vector_dim = 35
    labels = os.listdir(input_path)
    data_matrix = np.empty((len(labels) * 100, feature_vector_dim))
    data_labels = np.chararray((len(labels) * 100, 1), itemsize=10)
    index = 0
    for l, label in enumerate(labels):
        print("Data for {}".format(label))

        instrument_dir = os.path.join(input_path, label)
        files = os.listdir(instrument_dir)
        skipped = 0

        # Read files for each genre
        for i, track in enumerate(files):
            print(" {} of {}".format(i + 1, len(files)), end="\r")
            sys.stdout.flush()

            try:
                y, sr = librosa.load(os.path.join(instrument_dir, track))
                stft = np.abs(librosa.stft(y))

                arr = np.empty(feature_vector_dim)
                arr[0] = librosa.beat.beat_track(y, sr)[0]
                arr[1] = librosa.estimate_tuning(y, sr)
                arr[2:8] = np.mean(librosa.feature.tonnetz(
                    librosa.effects.harmonic(y), sr),
                                   axis=1)
                arr[8:28] = np.mean(librosa.feature.mfcc(y, sr), axis=1)
                arr[28:] = np.mean(librosa.feature.spectral_contrast(S=stft,
                                                                     sr=sr),
                                   axis=1)

                data_matrix[index] = arr
                data_labels[index] = label
                index += 1
            except (KeyboardInterrupt, SystemExit):
                raise
            except:
                skipped += 1
        print("")

        if skipped > 0:
            print(" Skipped {} corrupted files".format(skipped))

    # Resize array
    data_matrix = data_matrix[:index]
    data_labels = data_labels[:index]

    # Normalise
    minis = np.min(data_matrix, axis=0)
    data_matrix2 = data_matrix + minis
    minis = np.min(data_matrix2, axis=0)
    maxis = np.max(data_matrix2, axis=0)
    diff = maxis - minis
    diff[diff == 0] = 1
    data_matrix = (data_matrix2 - minis) / diff

    # Write to file
    out_file = h5py.File(output_path, 'w')
    for l, label in enumerate(labels):
        labeled_data = data_matrix[[
            d[0] == label.encode('UTF-8') for d in data_labels
        ]].flatten()
        out_file.create_dataset(label, data=labeled_data)
    out_file.create_dataset('vector_size', data=[feature_vector_dim])
    out_file.close()

    print("Done")
Exemple #24
0
def run(inputDirectory, outputDirectory, parameterFileName, spectrumFileName, fileNameAppendix, attackTime, sustainTime, \
    centroid_flag, f0normCentroid_flag, rolloff_flag, bandwidth_flag, spread_flag, highLowEnergy_flag, \
    tristimulus_flag, inharmonicity_flag, noisiness_flag, oddeven_flag, tuning_flag, crossingRate_flag, \
    rms_flag, entropy_flag, temporalCentroid_flag, logAttackTime_flag, decayTime_flag, vectorOutput_flag):

    # clear output folder
    if os.path.isdir(outputDirectory):
            shutil.rmtree(outputDirectory)
    os.mkdir(outputDirectory)
    samplingRate = 0

    # ----------Setting up variables required for calculation and saving of parameter data---------------
    data_array = []
    series_names, centroid_values, centroid_deviations,  f0NormalizedCentroid_values, f0NormalizedCentroid_deviations, \
    rolloff_values, rolloff_deviations, bandwidth_values, bandwidth_deviation, spread_values, spread_deviations, \
    highLowEnergy_values, highLowEnegry_deviations, tristimulus1_values, tristimulus1_deviations, \
    tristimulus2_values, tristimulus2_deviations, tristimulus3_values, tristimulus3_deviations, \
    inharmonicity_values, inharmonicity_deviations, noisiness_values, noisiness_deviations, \
    oddEvenRatio_values, oddEvenRatio_deviations, tuning_values, tuning_deviations, \
    zeroCrossingRate_values, zeroCrossingRate_deviations, rms_values, rms_deviations, entropy_values, entropy_deviations, \
    temporalCentroid_values, temporalCentroid_deviations, logAttackTime_values, logAttackTime_deviations, \
    decayTime_values, decayTime_deviations, foundFundumentalPitches = \
    [" "], ["Spectrum Centroid"], ["Centroid Deviation"], ["F0 Normalized Centroid"], ["F0 Normalized Centroid Deviations"],\
    ["Rolloff"], ["Rolloff Deviation"], ["Bandwidth"], ["Bandwidth Deviation"], ["Spread"], ["Spread Deviation"], \
    ["High Energy - Low Energy Ratio"], ["High Energy - Low Energy Ratio Deviations"], ["Tristimulus 1"], ["Tristimulus 1 Deviations"], \
    ["Tristimulus 2"], ["Tristimulus 2 Deviations"], ["Tristimulus 3"], ["Tristimulus 3 Deviations"], \
    ["Inharmonicity"], ["Inharmonicity Deviation"], ["Noisiness"], ["Noisiness Deviations"], \
    ["Odd-Even Ratio"], ["Odd-Even Ratio Deviation"], ["Tuning"], ["Tuning Deviation"], \
    ["Zero Crossing Rate"], ["Zero Crossing Rate Deviation"], ["RMS"], ["RMS Deviation"], ["Entropy"], ["Entropy Deviation"], \
    ["Temporal Centroid"], ["Temporal Centroid Deviations"], ["Log Attack Time"], ["Log Attack Time Deviations"], \
    ["Decay Time"], ["Decay Time Deviation"], ["Average Found Fundumental Pitches"]

    allAttackSpectrums, allSustainSpectrums, allDecaySpectrums, allAttackFrequencies, allSustainFrequencies, \
            allDecayFrequencies, seriesNames = [], [], [], [], [], [], []
    # Spectrum scaling factors
    impulseTime, maxAttack, maxSustain, maxDecay = 0, 0, 0, 0

    # ---------------Calculating spectrums and parameters------------------
    for seriesDirectory in os.listdir(os.fsencode(inputDirectory)):
        seriesDirectory = inputDirectory + "/" + os.fsdecode(seriesDirectory)
        print("Entering folder: " + seriesDirectory)
        impulses, attackSpectrums, sustainSpectrums, decaySpectrums, centroids, f0normCentroids, rolloffs, bandwidths, \
        spreads, highLowEnergies, tristimulus1s, tristimulus2s, tristimulus3s, inharmonicities, noisinesses, \
        oddEvenRatios, tunings, crossingRates, rmss, entropies, temporalCentroids, logAttackTimes, decayTimes, \
        pitchesHz =  [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []

        # If harmonic data and normalized centroids make no sense it may be caused by improper fundumental pitch detection.
        # Check in parameterData.csv whether the fundumentals were properly found.
        # If not, then manually add the correct pitch below and rerun the offending sounds.
        fundumentalPitch = 261.63

        for impulseFile in os.listdir(os.fsencode(seriesDirectory)):
            impulseFileName = seriesDirectory + "/" + os.fsdecode(impulseFile)
            args = Arguments()
            args.fundumentalPitch = fundumentalPitch
            # librosa loading
            print("Loading file: " + impulseFileName)
            args.impulseLIB, samplingRate = librosa.load(impulseFileName)
            samplingRate = 44100
            #args.impulseLIB = librosa.to_mono(args.impulseLIB)
            # iracema loading
            args.impulseIRA = iracema.Audio(impulseFileName)
            args.impulseFFT = iracema.spectral.fft(args.impulseIRA, window_size=2048, hop_size=1024)
            args.pitch = iracema.pitch.hps(args.impulseFFT, minf0=50, maxf0=500)
            args.harmonicsIRA = iracema.harmonics.extract(args.impulseFFT, args.pitch)
            pitchesHz.append(np.median(args.pitch.data))

            if centroid_flag:
                centroids.append(np.mean(librosa.feature.spectral_centroid(args.impulseLIB)))
            if f0normCentroid_flag:
                if fundumentalPitch == 0:
                    f0normCentroids.append((np.mean(librosa.feature.spectral_centroid(args.impulseLIB) / np.median(args.pitch.data))))
                else:
                    f0normCentroids.append((np.mean(librosa.feature.spectral_centroid(args.impulseLIB) / fundumentalPitch)))
            if rolloff_flag:
                rolloffs.append(np.mean(librosa.feature.spectral_rolloff(args.impulseLIB)))
            if bandwidth_flag:
                bandwidths.append(np.mean(librosa.feature.spectral_bandwidth(args.impulseLIB)))
            if spread_flag:
                spreads.append(np.mean(iracema.features.spectral_spread(args.impulseFFT).data))
            if tuning_flag:
                tunings.append(np.mean(librosa.estimate_tuning(args.impulseLIB)))
            if crossingRate_flag:
                crossingRates.append(np.mean(librosa.feature.zero_crossing_rate(args.impulseLIB)))
            if rms_flag:
                rmss.append(pc.CalculateRMS(args))
            if entropy_flag:
                entropies.append(np.mean(iracema.features.spectral_entropy(args.impulseFFT).data))
            if temporalCentroid_flag:
                temporalCentroids.append(pc.CalculateTemporalCentroid(args))
            if logAttackTime_flag:
                logAttackTimes.append(pc.CalculateLogAttackTime(args))
            if decayTime_flag:
                decayTimes.append(pc.CalculateDecayTime(args))
            impulses = pc.InsertIntoVstack(args.impulseLIB, impulses)

        fullFrequencies, fullSpectrums, attackFrequencies, attackSpectrums, sustainFrequencies, sustainSpectrums, \
        decayFrequencies, decaySpectrums = pc.CalculateFFTs(impulses, samplingRate, attackTime, sustainTime)

        if fundumentalPitch == 0:
            fundumentalPitch = np.median(pitchesHz)
        foundFundumentalPitches.append(fundumentalPitch)
        mathHarmFreq = pc.CreateMathematicalHarmonicFrequencyVector(fundumentalPitch, n=20)
        harmonicData = pc.ExtractHarmonicDataFromSpectrums(fullSpectrums, fullFrequencies, mathHarmFreq, bufforInHZ=20)

        if noisiness_flag:
            noisinesses = pc.CalculateNoisiness(fullSpectrums, fullFrequencies, harmonicData)
        if highLowEnergy_flag:
            highLowEnergies = pc.CalculateHighEnergyLowEnergyRatio(fullSpectrums, fullFrequencies)
        if tristimulus_flag:
            tristimulus1s, tristimulus2s, tristimulus3s = pc.CalculateTristimulus(harmonicData)
        if inharmonicity_flag:
            inharmonicities = pc.CalculateInharmonicity(harmonicData)
        if oddeven_flag:
            oddEvenRatios = pc.CalculateOERs(harmonicData)

        # Dividing spectrum data into segments
        avrAttackSpectrum = pc.CalculateAverageVector(attackSpectrums)
        avrSustainSpectrum = pc.CalculateAverageVector(sustainSpectrums)
        avrDecaySpectrum = pc.CalculateAverageVector(decaySpectrums)

        allAttackSpectrums.append(avrAttackSpectrum)
        allSustainSpectrums.append(avrSustainSpectrum)
        allDecaySpectrums.append(avrDecaySpectrum)
        allAttackFrequencies.append(attackFrequencies)
        allSustainFrequencies.append(sustainFrequencies)
        allDecayFrequencies.append(decayFrequencies)
        seriesNames.append(seriesDirectory)
        impulseTime = len(impulses[0,:])/samplingRate
        #maxAttack = max([maxAttack, max(avrAttackSpectrum)])
        #maxSustain = max([maxSustain, max(avrSustainSpectrum)])
        #maxDecay = max([maxDecay, max(avrDecaySpectrum)])

        seriesName = seriesDirectory.replace(inputDirectory + "/", "")
        series_names.append(seriesName)

        CalculateStatistics(centroids, centroid_values, centroid_deviations)
        CalculateStatistics(f0normCentroids, f0NormalizedCentroid_values, f0NormalizedCentroid_deviations)
        CalculateStatistics(rolloffs, rolloff_values, rolloff_deviations)
        CalculateStatistics(bandwidths, bandwidth_values, bandwidth_deviation)
        CalculateStatistics(spreads, spread_values, spread_deviations)
        CalculateStatistics(highLowEnergies, highLowEnergy_values, highLowEnegry_deviations)
        CalculateStatistics(tristimulus1s, tristimulus1_values, tristimulus1_deviations)
        CalculateStatistics(tristimulus2s, tristimulus2_values, tristimulus2_deviations)
        CalculateStatistics(tristimulus3s, tristimulus3_values, tristimulus3_deviations)
        CalculateStatistics(inharmonicities, inharmonicity_values, inharmonicity_deviations)
        CalculateStatistics(noisinesses, noisiness_values, noisiness_deviations)
        CalculateStatistics(oddEvenRatios, oddEvenRatio_values, oddEvenRatio_deviations)
        CalculateStatistics(tunings, tuning_values, tuning_deviations)
        CalculateStatistics(crossingRates, zeroCrossingRate_values, zeroCrossingRate_deviations)
        CalculateStatistics(rmss, rms_values, rms_deviations)
        CalculateStatistics(entropies, entropy_values, entropy_deviations)
        CalculateStatistics(temporalCentroids, temporalCentroid_values, temporalCentroid_deviations)
        CalculateStatistics(logAttackTimes, logAttackTime_values, logAttackTime_deviations)
        CalculateStatistics(decayTimes, decayTime_values, decayTime_deviations)

    # -----------------Saving results-------------------
    # Saving parameter data into .npy file
    data_array = series_names
    if centroid_flag:
        data_array = np.vstack((data_array, centroid_values, centroid_deviations))
    if f0normCentroid_flag:
        data_array = np.vstack((data_array, f0NormalizedCentroid_values, f0NormalizedCentroid_deviations))
    if rolloff_flag:
        data_array = np.vstack((data_array, rolloff_values, rolloff_deviations))
    if bandwidth_flag:
        data_array = np.vstack((data_array, bandwidth_values, bandwidth_deviation))
    if spread_flag:
        data_array = np.vstack((data_array, spread_values, spread_deviations))
    if highLowEnergy_flag:
        data_array = np.vstack((data_array, highLowEnergy_values, highLowEnegry_deviations))
    if tristimulus_flag:
        data_array = np.vstack((data_array, tristimulus1_values, tristimulus1_deviations))
        data_array = np.vstack((data_array, tristimulus2_values, tristimulus2_deviations))
        data_array = np.vstack((data_array, tristimulus3_values, tristimulus3_deviations))
    if inharmonicity_flag:
        data_array = np.vstack((data_array, inharmonicity_values, inharmonicity_deviations))
    if noisiness_flag:
        data_array = np.vstack((data_array, noisiness_values, noisiness_deviations))
    if oddeven_flag:
        data_array = np.vstack((data_array, oddEvenRatio_values, oddEvenRatio_deviations))
    if tuning_flag:
        data_array = np.vstack((data_array, tuning_values, tuning_deviations))
    if crossingRate_flag:
        data_array = np.vstack((data_array, zeroCrossingRate_values, zeroCrossingRate_deviations))
    if rms_flag:
        data_array = np.vstack((data_array, rms_values, rms_deviations))
    if entropy_flag:
        data_array = np.vstack((data_array, entropy_values, entropy_deviations))
    if temporalCentroid_flag:
        data_array = np.vstack((data_array, temporalCentroid_values, temporalCentroid_deviations))
    if logAttackTime_flag:
        data_array = np.vstack((data_array, logAttackTime_values, logAttackTime_deviations))
    if decayTime_flag:
        data_array = np.vstack((data_array, decayTime_values, decayTime_deviations))

    np.save(outputDirectory + '/' + parameterFileName + '_' + fileNameAppendix + '.npy', data_array)

    # Saving data into .csv file
    with open(outputDirectory + '/' + parameterFileName + '_' + fileNameAppendix + '.csv', 'w', newline='') as csvfile:
        dataWriter = csv.writer(csvfile, delimiter=',', quotechar=';', quoting=csv.QUOTE_MINIMAL)
        dataWriter.writerow(series_names)
        if centroid_flag:
            dataWriter.writerow(centroid_values)
            dataWriter.writerow(centroid_deviations)
        if f0normCentroid_flag:
            dataWriter.writerow(f0NormalizedCentroid_values)
            dataWriter.writerow(f0NormalizedCentroid_deviations)
        if rolloff_flag:
            dataWriter.writerow(rolloff_values)
            dataWriter.writerow(rolloff_deviations)
        if bandwidth_flag:
            dataWriter.writerow(bandwidth_values)
            dataWriter.writerow(bandwidth_deviation)
        if spread_flag:
            dataWriter.writerow(spread_values)
            dataWriter.writerow(spread_deviations)
        if highLowEnergy_flag:
            dataWriter.writerow(highLowEnergy_values)
            dataWriter.writerow(highLowEnegry_deviations)
        if tristimulus_flag:
            dataWriter.writerow(tristimulus1_values)
            dataWriter.writerow(tristimulus1_deviations)
            dataWriter.writerow(tristimulus2_values)
            dataWriter.writerow(tristimulus2_deviations)
            dataWriter.writerow(tristimulus3_values)
            dataWriter.writerow(tristimulus3_deviations)
        if inharmonicity_flag:
            dataWriter.writerow(inharmonicity_values)
            dataWriter.writerow(inharmonicity_deviations)
        if noisiness_flag:
            dataWriter.writerow(noisiness_values)
            dataWriter.writerow(noisiness_deviations)
        if oddeven_flag:
            dataWriter.writerow(oddEvenRatio_values)
            dataWriter.writerow(oddEvenRatio_deviations)
        if tuning_flag:
            dataWriter.writerow(tuning_values)
            dataWriter.writerow(tuning_deviations)
        if crossingRate_flag:
            dataWriter.writerow(zeroCrossingRate_values)
            dataWriter.writerow(zeroCrossingRate_deviations)
        if rms_flag:
            dataWriter.writerow(rms_values)
            dataWriter.writerow(rms_deviations)
        if entropy_flag:
            dataWriter.writerow(entropy_values)
            dataWriter.writerow(entropy_deviations)
        if temporalCentroid_flag:
            dataWriter.writerow(temporalCentroid_values)
            dataWriter.writerow((temporalCentroid_deviations))
        if logAttackTime_flag:
            dataWriter.writerow(logAttackTime_values)
            dataWriter.writerow(logAttackTime_deviations)
        if decayTime_flag:
            dataWriter.writerow(decayTime_values)
            dataWriter.writerow(decayTime_deviations)
        dataWriter.writerow(foundFundumentalPitches)

    print("Data saved to: " + parameterFileName + '_' + fileNameAppendix)

    # Saving spectrum data
    with open(outputDirectory + '/' + spectrumFileName + '_' + fileNameAppendix + '.csv', 'w', newline='') as csvfile:
        dataWriter = csv.writer(csvfile, delimiter=',', quotechar=';', quoting=csv.QUOTE_MINIMAL)
        for iterator in range(0, len(seriesNames)):
            dataWriter.writerow("Name: ")
            dataWriter.writerow(seriesNames[iterator])
            dataWriter.writerow("Attack spectrum: ")
            dataWriter.writerow(allAttackSpectrums[iterator])
            dataWriter.writerow("Attack frequencies: ")
            dataWriter.writerow(allAttackFrequencies[iterator])
            dataWriter.writerow("Sustain spectrum: ")
            dataWriter.writerow(allSustainSpectrums[iterator])
            dataWriter.writerow("Sustain frequencies: ")
            dataWriter.writerow(allSustainFrequencies[iterator])
            dataWriter.writerow("Decay Spectrum: ")
            dataWriter.writerow(allDecaySpectrums[iterator])
            dataWriter.writerow("Decay frequencies: ")
            dataWriter.writerow(allDecayFrequencies[iterator])

    print("Spectrums saved to: " + spectrumFileName + '_' + fileNameAppendix)

    # --------------------Plotting spectrums----------------------
    for iterator in range(0, len(seriesNames)):

        #plt.suptitle(seriesNames[iterator].replace(inputDirectory, ''), fontsize='xx-large')

        #converting frequencies to kHz for easier legibility
        kAttackFrequencies = allAttackFrequencies[iterator]/1000
        kSustainFrequencies = allSustainFrequencies[iterator]/1000
        kDecayFrequencies = allDecayFrequencies[iterator]/1000

        plt.subplot(131)
        DrawSpectrum(allAttackFrequencies[iterator], allAttackSpectrums[iterator], maxAttack, '0', attackTime)
        plt.subplot(132)
        DrawSpectrum(allSustainFrequencies[iterator], allSustainSpectrums[iterator], maxSustain, attackTime, sustainTime)
        plt.subplot(133)
        DrawSpectrum(allDecayFrequencies[iterator], allDecaySpectrums[iterator], maxDecay, sustainTime, round(impulseTime, 2))

        outputFile = seriesNames[iterator].replace(inputDirectory, outputDirectory)
        print("Outputing to: " + outputFile)

        figure = plt.gcf()
        figure.set_size_inches(19, 8)

        if vectorOutput_flag:
            plt.savefig(outputFile, dpi=100, format="eps")
        else:
            plt.savefig(outputFile, dpi = 100)

        #plt.show()
        plt.clf()
Exemple #25
0
print(y.shape)
print(y_8k.shape)

print('sdgad')

plt.figure(2)
plt.subplot(211)
c = librosa.stft(y)
plt.plot(librosa.stft(y))
plt.subplot(212)
plt.plot(librosa.istft(c))
plt.show()

print('lllllllllllllllllllllll')

print(librosa.estimate_tuning(y, sr))

# # 定义数据流块
# CHUNK = 1024
# # 只读方式打开wav文件
# wf = wave.open('weina.wav', 'rb')
#
# p = pyaudio.PyAudio()
#
# # 打开数据流
# stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
#                 channels=wf.getnchannels(),
#                 rate=wf.getframerate(),
#                 output=True)
#
# # 读取数据
Exemple #26
0
def generate_accomp(melody_audio_fname,\
        out_fname,\
        use_pyin_notes=True,\
        alyzer=None,\
        generator=None,\
        gen_method=None,\
        roman_to_chords=True,\
        do_estimate_tuning=False,\
        do_remove_perc=False,\
        min_chord_dur_beats=None,\
        build_dir='.'):
    """ Generate accompaniment from the melody audio file. 


    Parameters
    ----------
    melody_audio_fname: string
        full path of audio file containing melody
    out_fname: string
        full path of audio file containing melody plus generated accompaniment
    use_pyin_notes: Boolean
        use pYIN:Notes to estimate melody notes
    """


    # create analyzer and chord generator objects
    if alyzer is None:
        print 'make an AudioAnalyzer'
        alyzer = AudioAnalyzer()
    if gen_method is None:
        gen_method = chordgen.GEN_METHOD_SHORTEST
    if generator is None:
        # load FST
        print 'make ChordGenerator'

        if USE_BQ:
            isyms = fsm.SymbolTable(filename=BQ_INPUT_SYMS_FILE)
            osyms = fsm.SymbolTable(filename=BQ_OUTPUT_SYMS_FILE)
            fst = fsm.FST(filename=BQ_LOG_FST_FILE,isyms_table=isyms,osyms_table=osyms)
            rom2label = False
        else:
            isyms = fsm.SymbolTable(filename=INPUT_SYMS_FILE)
            osyms = fsm.SymbolTable(filename=OUTPUT_SYMS_FILE)
            fst = fsm.FST(filename=LOG_FST_FILE,isyms_table=isyms,osyms_table=osyms)
            rom2label = True
        generator = ChordGenerator(fst=fst,method=gen_method,roman_to_chords=rom2label,\
            build_dir=build_dir)

    # load audio file and re-tune
    print 80*'.'
    print 'analyzing audio...\n\n'
    print 'loading audio file:',melody_audio_fname,'\n'
    y_orig, sr = librosa.load(melody_audio_fname)

    print 'computing beat times...'
    tempo, beats = librosa.beat.beat_track(y=y_orig, sr=sr, trim=False)
    beat_times = librosa.frames_to_time(beats, sr=sr)
    # print 'beat times:',beat_times
    beat_times = fill_beat_times(beat_times,tempo)
    print 'beat times:',beat_times


    onset_frames = librosa.onset.onset_detect(y=y_orig, sr=sr)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr)
    print 'onset times:',onset_times

    # get rid of percussive parts of signal
    if do_remove_perc:
        print 'retain only harmonic portion of signal...'
        y_harm = librosa.effects.harmonic(y_orig)
    else:
        y_harm = y_orig

    if do_estimate_tuning:
        print 'estimate tuning...'
        tuning = librosa.estimate_tuning(y=y_harm, sr=sr)
        print 'correct tuning...'
        y_tuned = librosa.effects.pitch_shift(y_harm, sr, -tuning)
    else:
        tuning = 0
        y_tuned = y_harm

    print 80*'.'

    if use_pyin_notes:
        print 'estimating melody...'
        tmp_audio_fname = 'tmp_melody_audio.wav'
        librosa.output.write_wav(path=tmp_audio_fname, y=y_tuned, sr=sr)
        mseq = alyzer.get_estimated_melody_sequence_pyin(audio_fname=tmp_audio_fname)
    else:
        # generate the accompaniment and write to a file
        # print 'generate accompaniment...'
        mseq = alyzer.get_estimated_melody_sequence(y_tuned,sr)

    if USE_BQ:
        mseq.repeat_events(beat_quant_level=BEAT_QUANT_LEVEL)
    print 'estimating key...'
    key = generator.estimate_key(mseq)

    print 'synthesizing audio to file...'
    generator.synthesize_to_file(output_audio_fname=out_fname,\
        melody_sequence=mseq,\
        fs=sr,\
        beat_times=beat_times,\
        # beat_times=onset_times,\
        melody_audio=y_orig,\
        tuning_offset=tuning,\
        min_chord_dur_beats=min_chord_dur_beats)

    print 'done'
    print 80*'.'
Exemple #27
0
    def getFeature(self):
        #端点检测
        vad = Vad(self.filename)
        newVoice = vad.getNewVoice()

        y, sr = librosa.load(newVoice)
        #获取mfcc
        ccc = librosa.feature.mfcc(y=y, sr=sr)
        #获取ccc最大特征值对应特征向量
        A = np.mat(ccc)
        B = A * A.T
        a, b = np.linalg.eig(B)
        m = np.argmax(a)

        ccc1 = np.array(b[m].tolist()[0])

        #能量构造
        #S, phase = librosa.magphase(librosa.stft(y))
        S = librosa.magphase(librosa.stft(y, window=np.ones, center=False))[0]
        rms = librosa.feature.rmse(S=S)
        rms_max = np.max(rms[0])
        rms_min = np.min(rms[0])
        rms_mean = np.mean(rms[0])
        rms_std = np.std(rms[0])

        #过零率
        rate = librosa.feature.zero_crossing_rate(y)
        rete_max = np.max(rate)
        rate_mean = np.mean(rate)
        rate_std = np.std(rate)

        # =============================================================================
        #         S = np.abs(librosa.stft(y))
        #         chroma = librosa.feature.chroma_stft(S=S, sr=sr)
        #         A = np.mat(chroma)
        #         B = A * A.T
        #         a,b=np.linalg.eig(B)
        #         m = np.argmax(a);
        #         chroma1 = np.array(b[m].tolist()[0])
        # =============================================================================

        # pitch = Pitch(newVoice, sr)
        # pitches = np.array(pitch.getPitch())
        # pitch_max = np.max(pitches)
        # pitch_mean = np.mean(pitches)
        # pitch_std = np.std(pitches)
        # result3 = np.array([pitch_max, pitch_mean, pitch_std])

        # melEnergy = MelEnergy(newVoice)
        # melEnergys = melEnergy.getMelEnergy()

        e = librosa.estimate_tuning(y=y, sr=sr)

        # mel = librosa.feature.melspectrogram(y=y, sr=sr)
        # A = np.mat(mel)
        # B = A * A.T;
        # a, b = np.linalg.eig(B)
        # m = np.argmax(a);
        # mel1 = np.array(b[m].tolist()[0])

        # odf = librosa.onset.onset_strength(y=y, sr=sr, hop_length=512)
        # ac = librosa.autocorrelate(odf, max_size=4 * sr / 512)
        # ac_min = np.min(ac)
        # ac_max = np.max(ac)
        # ac_mean = np.mean(ac)
        # ac_std = np.std(ac)

        result1 = np.array([
            e, rms_max, rms_min, rms_mean, rms_std, rete_max, rate_mean,
            rate_std
        ])
        result2 = ccc1
        #result3 = melEnergys
        result = np.append(result1, result2)
        # result = np.append(result,mel1)
        return result
Exemple #28
0
def getData(filename, answers):
	print("Gretting data for {}".format(filename))
	hop_length = 256;

	# Load the example clip
	y, sr = librosa.load(filename)

	# Short-time Fourier transform (STFT)
	S = np.abs(librosa.stft(y))

	# Separate harmonics and percussives into two waveforms
	y_harmonic, y_percussive = librosa.effects.hpss(y)

	# Beat track on the percussive signal
	tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)

	# Compute MFCC features from the raw signal
	mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

	# And the first-order differences (delta features)
	mfcc_delta = librosa.feature.delta(mfcc)

	# Stack and synchronize between beat events
	# This time, we'll use the mean value (default) instead of median
	beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

	# Compute chroma features from the harmonic signal
	chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)

	# Aggregate chroma features between beat events
	# We'll use the median value of each feature between beat frames
	beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)

	# Finally, stack all beat-synchronous features together
	beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

	# Average the energy 
	avgEnergy = np.mean(librosa.feature.rmse(y=y))

	# Estimate tuning
	tuning = librosa.estimate_tuning(y=y, sr=sr)

	zeroCrossings = np.sum(librosa.core.zero_crossings(y=y))

	avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr))

	avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr))

	raw = [ avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc), np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma), np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo]
	#norm = [(float(i)-min(raw))/((max(raw)-min(raw))) for i in raw] # normalise numbers between -1 and 1
	return np.array(raw.reshape((1,2))


def gethiphop(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Hiphop-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getjazz(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Jazz-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getclassical(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Classical-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getcountry(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Country-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getdance(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Dance-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getmetal(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Metal-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getreggae(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Reggae-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass
def getrock(Data,genre, prefix, answers, fileName):
	for song in genre:
		try:
			Data = np.vstack([Data, getData("{}Rock-Samples/{}".format(prefix,song), answers)])
		except:
			print("ERROR ON SONG {}".format(song))
			pass
	try:
		np.save(fileName, Data)
		print("SAVED {}!".format(fileName))
	except:
		print("ERROR COUNLDN'T SAVE {}".format(fileName))
		pass


# create dataset for NN with 12 inputes and 4 outputs

# DS = SupervisedDataSet(12, 8)
# DS = SupervisedDataSet.loadFromFile("DataSet")
answers = [[1,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],[0,0,1,0,0,0,0,0],[0,0,0,1,0,0,0,0],[0,0,0,0,1,0,0,0],[0,0,0,0,0,1,0,0],[0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,1]]

Data = np.array([]).reshape((0,2))

# list of song names on my computer by genre
hiphop = listdir_nohidden("{}Hiphop-Samples".format(prefix))
jazz = listdir_nohidden("{}Jazz-Samples".format(prefix))
classical = listdir_nohidden("{}Classical-Samples".format(prefix))
country = listdir_nohidden("{}Country-Samples".format(prefix))
dance = listdir_nohidden("{}Dance-Samples".format(prefix))
metal = listdir_nohidden("{}Metal-Samples".format(prefix))
reggae = listdir_nohidden("{}Reggae-Samples".format(prefix))
rock = listdir_nohidden("{}Rock-Samples".format(prefix))

p1 = Process(target=gethiphop, args=((Data, hiphop, prefix, answers[0], "HiphopDataSet.npy")))
p2 = Process(target=getjazz, args=((Data, jazz, prefix, answers[1], "JazzDataSet.npy")))
p3 = Process(target=getclassical, args=((Data, classical, prefix, answers[2], "ClassicalDataSet.npy")))
p4 = Process(target=getcountry, args=((Data, country, prefix, answers[3], "CountryDataSet.npy")))
p5 = Process(target=getdance, args=((Data, dance, prefix, answers[4], "DanceDataSet.npy")))
p6 = Process(target=getmetal, args=((Data, metal, prefix, answers[5], "MetalDataSet.npy")))
p7 = Process(target=getreggae, args=((Data, reggae, prefix, answers[6], "ReggaeDataSet.npy")))
p8 = Process(target=getrock, args=((Data, rock, prefix, answers[7], "RockDataSet.npy")))

p1.start()
p2.start()
p3.start()
p4.start()
p5.start()
p6.start()
p7.start()
p8.start()

p1.join()
p2.join()
p3.join()
p4.join()
p5.join()
p6.join()
p7.join()
p8.join()

# np.save("CompleteRawDataSet.npy", Data)

# tdata = np.array([]).reshape(0,12)
# for i in range(5):
# 	tdata = np.vstack([tdata, Data[i][0]])
# tdata = whiten(tdata)
# print(tdata)
# print(np.argmin(tdata, axis=0))

#DS.saveToFile("DataSetComplete")
Exemple #29
0
import librosa

vox, err = librosa.load("teste.m4a")

vox_harm = librosa.effects.harmonic( vox )

tunning = librosa.estimate_tuning( vox_harm, err)

vox_tunned = librosa.effects.pitch_shift( vox_harm, err, tunning )

librosa.output.write_wav( "res.wav", vox_tunned, err )
Exemple #30
0
    beatcounter =1
    while beatcounter <= 16:
        length = int(beatcounter/(newportion ^ 3)) % 4
        pitch = int(beatcounter/(newportion ^ beatcounter)) % 8
        melody.append((pitch, length))
        beatcounter += length

    return melody

def play_melody(filename)

    y, sr = librosa.load(filename)

    y, _ = librosa.effects.trim(y)

    p = librosa.estimate_tuning(y = y, sr = sr, bins_per_octave=1)


    y = librosa.effects.pitch_shift(y, sr, -1*p, bins_per_octave=1)

    melody = melody_generator(random.random())

    beat_length = 0.75
    sample_len = librosa.get_duration(y, sr)
    while True:
        for i, j in melody:
            if i is None:
                time.sleep(beat_length*j)
            else:
                target_len = beat_length*j
                sample = librosa.effects.time_stretch(y, 1/(sample_len*target_len))
Exemple #31
0
 def states(self, jam):
     mudabox = jam.sandbox.muda
     state = dict(tuning=librosa.estimate_tuning(y=mudabox._audio['y'],
                                                 sr=mudabox._audio['sr']))
     yield state
def feature_extraction(y, sr, opt_tuning):

    if opt_tuning:
        #extraction of tuning
        A440 = librosa.estimate_tuning(y=y, sr=sr, resolution=1e-3)
        print('Deviation from A440 is : {0:.2f}'.format(A440))
    else:
        A440 = 0.0

    print('Features for local similarity: ', ' '.join(params.feat))
    full = []
    idx_chroma = 0

    if 'cepstral' in params.feat:
        mfcc = librosa.feature.mfcc(y=y,
                                    sr=sr,
                                    n_mfcc=20,
                                    n_fft=NFFT,
                                    hop_length=STEP)
        mfcc_delta = librosa.feature.delta(mfcc)
        fcep = np.concatenate((mfcc, mfcc_delta), axis=0)
        full.append(fcep)

    if 'chroma' in params.feat:
        chroma = librosa.feature.chroma_cqt(y=y,
                                            sr=sr,
                                            n_chroma=12,
                                            n_octaves=N_OCTAVES,
                                            hop_length=STEP,
                                            norm=None,
                                            tuning=A440)
        chroma_delta = librosa.feature.delta(chroma)
        fchr = np.concatenate((chroma, chroma_delta), axis=0)
        idx_chroma = len(full)
        full.append(fchr)

    if 'spectral' in params.feat:
        centroid = librosa.feature.spectral_centroid(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     hop_length=STEP)
        contrast = librosa.feature.spectral_contrast(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     n_bands=6,
                                                     hop_length=STEP)
        flatness = librosa.feature.spectral_flatness(y=y,
                                                     n_fft=NFFT,
                                                     hop_length=STEP)
        rolloff05 = librosa.feature.spectral_rolloff(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     hop_length=STEP,
                                                     roll_percent=0.05)
        rolloff25 = librosa.feature.spectral_rolloff(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     hop_length=STEP,
                                                     roll_percent=0.25)
        rolloff50 = librosa.feature.spectral_rolloff(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     hop_length=STEP,
                                                     roll_percent=0.50)
        rolloff75 = librosa.feature.spectral_rolloff(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     hop_length=STEP,
                                                     roll_percent=0.75)
        rolloff95 = librosa.feature.spectral_rolloff(y=y,
                                                     sr=sr,
                                                     n_fft=NFFT,
                                                     hop_length=STEP,
                                                     roll_percent=0.95)
        spec = np.concatenate((centroid, contrast, flatness, rolloff05,
                               rolloff25, rolloff50, rolloff75, rolloff95),
                              axis=0)
        spec_delta = librosa.feature.delta(spec)
        fspec = np.concatenate((spec, spec_delta), axis=0)
        full.append(fspec)

    full = np.array(full)[0]

    print('feature shape', full.shape)
    return full, idx_chroma
    # y, sr = librosa.load(filename, offset=1.3, duration=0.2)    # 0.160
    # y, sr = librosa.load(filename, offset=2.6, duration=0.2)    # -0.48
    # y, sr = librosa.load(filename, offset=2.8, duration=0.2)    #-0.169
    # y, sr = librosa.load(filename, offset=3, duration=0.2)
    # y, sr = librosa.load(filename, offset=3.3, duration=0.2)
    # y, sr = librosa.load(filename, offset=3.55, duration=0.2)

    pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
    np.set_printoptions(threshold=np.nan)
    print(pitches[np.nonzero(pitches)])

    pitches = pitches[magnitudes > np.median(magnitudes)]
    p = librosa.pitch_tuning(pitches)
    print(p)

    tun = librosa.estimate_tuning(y=y, sr=sr)
    print(tun)

    onset_frames_time = [
        0.7662585, 1.27709751, 2.80961451, 3.0185941, 3.29723356, 3.57587302,
        3.80807256, 4.80653061, 7.2678458, 7.70902494
    ]
    onset_frames_time_diff = np.diff(onset_frames_time)
    onset_frames_time_diff = list(onset_frames_time_diff)
    onset_frames_time_diff.append(0.2)
    for i, o in enumerate(onset_frames_time):
        offset = round(o, 2)
        duration = round(onset_frames_time_diff[i], 2)
        y, sr = librosa.load(filename, offset=offset, duration=duration)
        pitches, magnitudes = librosa.core.piptrack(y=y, sr=sr)
        pitches = pitches[magnitudes > np.median(magnitudes)]
Exemple #34
0
def feature_extraction(path, sr, mono, frame_len, hop_len):

    # load wav file
    y, sr = librosa.load(path, sr=sr, mono=mono)

    # print('-' * 60)
    # print('y shape =>', y.shape)
    # print('y duration => ', librosa.core.get_duration(y))
    # print('-' * 60)

    # trim it (remove silence from beggining and end)
    yt, index = librosa.effects.trim(y)
    y = np.array(yt)

    # print('trim_y shape =>', y.shape)
    # print('terim_y duration => ', librosa.core.get_duration(y))
    # print('-' * 60)

    frames = framing(y, frame_len, hop_len)

    # print('frames shape =>', frames.shape)
    # print('frames[0] shape =>', frames[0].shape)
    # print('-' * 60)

    features = []

    energys = []
    zcrs = []
    estimate_tunings = []
    pitch_tunings = []
    mfccs = []
    # chroma_stfts = []

    # stfts = []

    for i in range(n_mfcc):
        mfccs.append([])

    # for i in range(12):
    #     chroma_stfts.append([])

    for frame in frames:

        energy = np.sum(np.power(frame, 2))
        energys.append(energy)

        zcr = np.count_nonzero(librosa.core.zero_crossings(frame))
        zcrs.append(zcr)

        estimate_tuning = librosa.estimate_tuning(y=frame, sr=sr)
        estimate_tunings.append(estimate_tuning)

        pitch_tuning = librosa.core.pitch_tuning(frame)
        pitch_tunings.append(pitch_tuning)

        mfcc = librosa.feature.mfcc(y=frame, sr=sr, n_mfcc=n_mfcc)

        for i in range(n_mfcc):
            mfccs[i].append(np.average(mfcc[i]))

        # chroma_stft = librosa.feature.chroma_stft(y=frame, sr=sr)

        # for i in range(12):
        # chroma_stfts[i].append(np.average(chroma_stft[i]))

        # print(chroma_stfts)
        # exit()

    zcrs = np.array(zcrs)
    energys = np.array(energys)
    mfccs = np.array(mfccs)

    # print('zcrs shape =>', zcrs.shape)
    # print('energys shape =>', energys.shape)
    # print('mfccs shape =>', mfccs.shape)

    def temp(base_features):

        base_features = np.array(base_features)

        features.append(base_features.min())
        features.append(base_features.max())

        # print(base_features.min())
        # print(base_features.max())
        # print(np.mean(base_features))
        # print(np.std(base_features))
        # print(scipy.stats.kurtosis(base_features))
        # print(scipy.stats.skew(base_features))

        features.append(np.mean(base_features))
        features.append(np.std(base_features))

        features.append(scipy.stats.kurtosis(base_features))
        features.append(scipy.stats.skew(base_features))

    temp(zcrs)
    temp(energys)
    temp(estimate_tunings)
    temp(pitch_tunings)

    for mfcc in mfccs:
        temp(mfcc)

    # for chroma_stft in chroma_stfts:
    #     temp(chroma_stft)

    # print('features shape =>', np.array(features).shape)
    return np.array(features)
Exemple #35
0
def tunning(signal,fs, **kwargs):
    "It estimates *signal*'s tuning offset (in fractions of a bin) relative to A440=440.0Hz."
    return librosa.estimate_tuning(y=signal,sr=fs)
import matplotlib.pyplot as plt
import numpy as np
from scipy import signal
import preprocessing as pre

print('\033[1m' + 'Debugging Prints' + '\033[0m')

ref_track = 'WAM20_20sek.wav'
test_track = 'WAM21_30sek.wav'

#Importing audio files
ref_recording, sr = music_parser.readMusicFile(f'assets/{ref_track}')
test_recording, sr = music_parser.readMusicFile(f'assets/{test_track}')

#Estimate Tuning
ref_tuning = librosa.estimate_tuning(ref_recording, sr)
test_tuning = librosa.estimate_tuning(test_recording, sr)

#Parameter extraction
ref_length = librosa.get_duration(ref_recording, sr=sr)
test_length = librosa.get_duration(test_recording, sr=sr)
frame_length = 9600
hopsize = int(frame_length / 2)
window = 'hann'

# ##Sample properties Visualization
# # Compute waveform Visualization
# t_ref = np.arange(ref_recording.shape[0]) / sr
# t_test = np.arange(test_recording.shape[0]) / sr
# title_r = 'Waveform, Sample: Reference Recording'
# title_t = 'Waveform, Sample: Test Recording'
def getData(filename):
    print("Gretting data for {}".format(filename))
    hop_length = 256

    # Load the example clip
    y, sr = librosa.load(filename)

    # Short-time Fourier transform (STFT)
    S = np.abs(librosa.stft(y))

    # Separate harmonics and percussives into two waveforms
    y_harmonic, y_percussive = librosa.effects.hpss(y)

    # Beat track on the percussive signal
    tempo, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr)

    # Compute MFCC features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)

    # And the first-order differences (delta features)
    mfcc_delta = librosa.feature.delta(mfcc)

    # Stack and synchronize between beat events
    # This time, we'll use the mean value (default) instead of median
    beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

    # Compute chroma features from the harmonic signal
    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)

    # Aggregate chroma features between beat events
    # We'll use the median value of each feature between beat frames
    beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)

    # Finally, stack all beat-synchronous features together
    beat_features = np.vstack([beat_chroma, beat_mfcc_delta])

    # Average the energy
    avgEnergy = np.mean(librosa.feature.rmse(y=y))

    # Estimate tuning
    tuning = librosa.estimate_tuning(y=y, sr=sr)

    zeroCrossings = np.sum(librosa.core.zero_crossings(y=y))

    avgMelSpectro = np.mean(librosa.feature.melspectrogram(y=y, sr=sr))

    avgSpectralContrast = np.mean(librosa.feature.spectral_contrast(S=S, sr=sr))

    raw = [avgSpectralContrast, avgMelSpectro, np.mean(y_harmonic), np.mean(y_percussive), np.mean(mfcc), np.mean(mfcc_delta), np.mean(beat_mfcc_delta), np.mean(chromagram), np.mean(beat_chroma), np.mean(beat_features), avgEnergy, tuning, zeroCrossings, tempo]

    data = np.array([raw])
    rDataStack = np.load("ANN/Data/RawDataStack.npy")
    rDataStack = np.vstack([rDataStack, data])
    np.save("ANN/Data/RawDataStack.npy", rDataStack)
    wDataStack = whiten(rDataStack)
    np.save("ANN/Data/WhitenDataStack.npy", wDataStack)
    wmin = np.argmin(wDataStack, axis=0)
    wmax = np.argmax(wDataStack, axis=0)
    dmin = []
    dmax = []
    for i, val in enumerate(wmin):
        dmin.append(wDataStack[val][i])
    for i, val in enumerate(wmax):
        dmax.append(wDataStack[val][i])
    for j in range(len(wDataStack[-1])):
        wDataStack[-1][j] = (wDataStack[-1][j] - dmin[j]) / (dmax[j] - dmin[j])
    print wDataStack[-1]
    return wDataStack[-1]  # <<-- This is the single normalized feature set to feed into the network.
Exemple #38
0
 def states(self, jam):
     mudabox = jam.sandbox.muda
     state = dict(tuning=librosa.estimate_tuning(y=mudabox._audio['y'],
                                                 sr=mudabox._audio['sr']))
     yield state
Exemple #39
0
def chromagram(y=None,
               sr=44100,
               S=None,
               norm=np.inf,
               n_fft=2048,
               hop_length=None,
               seconds=4,
               tuning=None,
               center=True,
               **kwargs):
    """
    Derivation of chromagram from librosa python package. Bins spectrogram
    on a larger frame size than it was originally calculated with.

    Parameters
    ----------
    y : numpy array [shape=(n,)] 
        a numpy array of time series to calculate the chromagram of.
        Default is none.
    sr : integer
        The sampling rate of the audio file. Default is 44100 Hz. 
    S : np.ndarray. 
        The spectrogram from which to calculate the chromagram. 
        Default is None (function calculates spectrogram first).
    norm : float or None. 
        Column-wise normalization. Default np.inf.
    n_fft : integer. 
        The window size with which to calculate
                the spectrogram. Default is 2048.
    hop_length : integer. 
        The amount of overlap between the frames. 
        Default is half-overlap.
    seconds : integer. 
        The amount of seconds to bin the spectrogram
            into before calculating the chromagram. Default is 4 seconds.
    tuning : float in '[-0.5, 0.5]' or None. 
            Deviation from A440; tuning in fractional bins. 
            Default is None (automatically estimated)
    center : boolean. 
        Whether or not to center the spectrogram
                before calculating the chromagram. Default is True.
    kwargs : array
        the arguments for librosa.filter.chroma()

    """

    n_chroma = 12  # defining variable for use below
    if hop_length is None:
        hop_length = int(n_fft / 2)
    if S is None:
        S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))**2
    else:
        n_fft = 2 * (S.shape[0] - 1)

    if tuning is None:
        tuning = librosa.estimate_tuning(S=S, sr=sr, bins_per_octave=n_chroma)

    if 'A440' not in kwargs:
        kwargs['A440'] = 440.0 * 2.0**(float(tuning) / n_chroma)

    chromafb = librosa.filters.chroma(sr, n_fft, **kwargs)

    segment_length = sr * seconds / hop_length  # n_fft??

    # make it a power of two
    segment_length = 2**prevPow(segment_length)  #alt: nextPow()
    if center:
        npad = ((0, 0), (int(segment_length / 4), int(segment_length / 4)))
        S = np.pad(S, npad, mode='reflect')
    num_segments = math.floor((S.shape[1] - segment_length) /
                              (segment_length / 2) + 1)

    bin_S = np.zeros((S.shape[0], num_segments))
    # Calculate binned chromagram
    for i in range(int(num_segments)):
        start, end = calculateStartEnd(segment_length / 2,
                                       segment_length,
                                       iterV=i)
        bin_S[:, i] = np.mean(S[:, start:end], axis=1)

    # Compute raw chroma
    raw_chroma = np.dot(chromafb, bin_S)

    # Compute normalization factor for each frame
    return librosa.util.normalize(raw_chroma, norm=norm, axis=0)
            #Extract each songs metadata
            artist.append(tag.artist)
            title.append(tag.title)
            album.append(tag.album)
            duration.append(tag.duration)
            year.append(tag.year)
            genre.append(tag.genre)
            audio_offset.append(tag.audio_offset)
            bitrate.append(tag.bitrate)
            samplerate.append(tag.samplerate)

            #Extract each songs audio metrics
            y, sr = librosa.load(songpath)
            y_harmonic, y_percussive = librosa.effects.hpss(y)
            tempo, beats = librosa.beat.beat_track(y=y_percussive, sr=sr)
            tuning = librosa.estimate_tuning(y=y_harmonic, sr=sr)
            zcr = librosa.feature.zero_crossing_rate(y)
            spectral_centroids = librosa.feature.spectral_centroid(y, sr=sr)
            spec_bw = librosa.feature.spectral_bandwidth(y, sr=sr)
            rolloff = librosa.feature.spectral_rolloff(y, sr=sr)
            chroma_stft = librosa.feature.chroma_stft(y, sr=sr)
            tempos.append(tempo)
            tunings.append(tuning)
            zeroCrossing.append(np.mean(zcr))
            specCentroid.append(np.mean(spectral_centroids))
            specBandwidth.append(np.mean(spec_bw))
            specRolloff.append(np.mean(rolloff))
            chroma.append(np.mean(chroma_stft))
            
            counter+=1
            print(counter)