Esempio n. 1
0
    def generate_note(self, f0_info, n_duration, round_to_sixteenth=True):
        f0 = f0_info[0]
        a = remap(f0_info[1], self.cqt.min(), self.cqt.max(), 0, 1)
        duration = librosa.frames_to_time(n_duration, sr=self.sr, hop_length=self.hop_length)
        note_duration = 0.02 * np.around(duration / 0.02)  # Round to 2 decimal places for music21 compatibility
        midi_duration = second_to_quarter(duration, self.tempo)
        midi_velocity = int(round(remap(f0_info[1], self.cqt.min(), self.cqt.max(), 80, 120)))
        if round_to_sixteenth:
            midi_duration = round(midi_duration * 16) / 16
        try:
            if f0 is None:
                midi_note = None
                note_info = Rest(type=self.mm.secondsToDuration(note_duration).type)
                f0 = 0
            else:
                midi_note = round(librosa.hz_to_midi(f0))
                note = Note(librosa.midi_to_note(midi_note), type=self.mm.secondsToDuration(note_duration).type)
                note.volume.velocity = midi_velocity
                note_info = [note]
        except DurationException:
            if f0 is None:
                midi_note = None
                note_info = Rest(type='32nd')
                f0 = 0
            else:
                midi_note = round(librosa.hz_to_midi(f0))
                note = Note(librosa.midi_to_note(midi_note),
                            type='eighth')
                note.volume.velocity = midi_velocity
                note_info = [note]

        midi_info = [midi_note, midi_duration, midi_velocity]
        n = np.arange(librosa.frames_to_samples(n_duration, hop_length=self.hop_length))
        sine_wave = a * np.sin(2 * np.pi * f0 * n / float(self.sr))
        return [sine_wave, midi_info, note_info]
 def arousalScore(t):
     before = pitches[(times >= t - TUNE_SCOPE / 2) & (times <= t)]
     after = pitches[(times >= t) & (times <= t + TUNE_SCOPE / 2)]
     before = (librosa.hz_to_midi(before + 0.1) * 6 / 12).astype(int)
     after = (librosa.hz_to_midi(after + 0.1) * 6 / 12).astype(int)
     score = np.sum(after) - np.sum(before)
     return score / len(before)
Esempio n. 3
0
def make_jam(freq_dict,sr,track_duration):
    """
    this function creates a jam according to a dictionary that specifies 
    each frequency's presence 

    dict: keys are frequencies
          values are list of tuples (start_time, duration) of that frequency
    """
    jam = jams.JAMS()

    # Store the track duration
    jam.file_metadata.duration = track_duration

    pitch_co = jams.Annotation(namespace='pitch_contour')
    note_h = jams.Annotation(namespace='note_hz')
    note_m = jams.Annotation(namespace='note_midi')
    pitch_cl = jams.Annotation(namespace='pitch_class')
    pitch_h = jams.Annotation(namespace='pitch_hz')
    pitch_m = jams.Annotation(namespace='pitch_midi')
    
    pitch_co.annotation_metadata = jams.AnnotationMetadata(data_source='synth')
    note_h.annotation_metadata = jams.AnnotationMetadata(data_source='synth')
    note_m.annotation_metadata = jams.AnnotationMetadata(data_source='synth')
    pitch_cl.annotation_metadata = jams.AnnotationMetadata(data_source='synth')
    pitch_h.annotation_metadata = jams.AnnotationMetadata(data_source='synth')
    pitch_m.annotation_metadata = jams.AnnotationMetadata(data_source='synth')


    #assign frequencies to each start_time
    freqs = freq_dict.keys()
    for f in freqs:
        time_dur = freq_dict[f] #list of tuples (start_time,duration)
        for t, dur in time_dur:
            pitch_co.append(time=t, duration=dur, value={"index":0,"frequency":f,"voiced":True})
            note_h.append(time=t, duration=dur,value=f)
            note_m.append(time=t, duration=dur, value=librosa.hz_to_midi(f))
            pclass = librosa.hz_to_note(f)
            pitch_cl.append(time=t, duration=dur,value={"tonic":pclass[:-1],"pitch":int(pclass[-1])})
            pitch_h.append(time=t, duration=dur,value=f)
            pitch_m.append(time=t, duration=dur, value=librosa.hz_to_midi(f))
    # Store the new annotation in the jam
    jam.annotations.append(pitch_co)
    jam.annotations.append(note_h)
    jam.annotations.append(note_m)
    jam.annotations.append(pitch_cl)
    jam.annotations.append(pitch_h)
    jam.annotations.append(pitch_m)

    return jam
Esempio n. 4
0
def sonify_annotation(intervals, pitches, sonification_fs=8000):
    """Sonify a note annotation.
    
    Parameters
    ----------
    intervals: np.ndarray shape=(n, 2)
        Array of note start and end times in seconds
    pitches: np.ndarray shape=(n,)
        Array of note pitches in Hz
    sonification_fs: float
        Sample rate of sonified audio.
        
    Returns
    -------
    y_sonify: np.ndarray shape=(m,)
        Mono audio signal of sonified notes
    
    """
    pm = pretty_midi.PrettyMIDI()
    inst = pretty_midi.Instrument(program=0, is_drum=False, name='piano')
    pm.instruments.append(inst)
    velocity = 100
    for interval, pitch in zip(intervals, pitches):
        inst.notes.append(
            pretty_midi.Note(velocity, librosa.hz_to_midi(pitch), interval[0],
                             interval[1]))

    return pm.synthesize(fs=sonification_fs)
Esempio n. 5
0
def pitchChroma(
    pitches,
    n_class=PITCH_CHROMA_CLASS,
    count=PITCH_CHROMA_COUNT,
    hop=PITCH_CHROMA_HOP,
):
    """input: [frames]
    output: [n_class, frames]"""
    pitches = deepcopy(pitches)
    frames = pitches.shape[-1]
    nonVoice = pitches <= 0  # whether it's a voicing frame
    pitches[nonVoice] = 10  # avoid log(0) warning
    pitches = librosa.hz_to_midi(pitches) * n_class / 12
    pitches = np.remainder(pitches.astype(int), n_class)
    # -1 represnets non-voice, will be ignored in bincount
    pitches[nonVoice] = n_class
    # pitches: [1, frames]
    pitches = np.expand_dims(pitches, axis=0)

    assert (pitches <= n_class).all()
    # XPitches: [count, frames]
    XPitches = librosa.feature.stack_memory(pitches,
                                            n_steps=count,
                                            delay=hop,
                                            mode="edge")
    assert (XPitches <=
            n_class).all(), "suprise! just try again the bug might gone"

    # res: [n_class, frames]
    res = np.zeros((n_class, frames))
    weights = np.array([count - i for i in range(count)])
    for t in range(frames):
        bins = np.bincount(XPitches[:, t], weights=weights, minlength=n_class)
        res[:, t] = bins[:n_class]
    return res
Esempio n. 6
0
def synth_config(
    in_path, out_path, model_path,
    gens, mutation, elitism, var_mut
):
    with open(model_path, "rb") as f:
        model = pickle.load(f)
    source = sm.sound.load(in_path)
    lso = sm.extraction.get_sounds(source)

    sm.synth.POPULATION = len(lso)
    sm.synth.GENERATIONS = gens
    sm.synth.AVG_MUTATIONS = mutation
    sm.synth.ELITISM = elitism
    sm.synth.VAR_MUTATIONS = var_mut

    synths = sm.synth.fast_evolve(sm.synth.SoundSynth, lso, model)
    config = [synth.gen(so) for synth,so in zip(synths, lso)]

    out = ""
    i = 0
    for so in config:
        _, vtrack = so.track_pitch()
        pitch = lr.hz_to_midi(so.get_f0())
        vel = np.clip(np.mean(vtrack) * 127, 0, 127)
        path = os.path.dirname(out_path)
        fname = os.path.splitext(os.path.basename(out_path))[0] + f"_s{i}.wav"
        so.write(os.path.join(path, fname))
        out += f"{fname}, {pitch}, {vel}\n"
        i += 1

    with open(out_path, "w") as file:
        file.write(out)
Esempio n. 7
0
 def test_hz_to_midi_is_accurate(self):
     """Tests converting between MIDI values and their frequencies in hertz."""
     hz = np.linspace(20.0, 20000.0, 128)
     librosa_midi = librosa.hz_to_midi(hz)
     with self.cached_session() as sess:
         tf_midi = sess.run(core.hz_to_midi(hz))
     self.assertAllClose(librosa_midi, tf_midi)
Esempio n. 8
0
 def test_hz_to_midi_is_accurate(self):
     """Tests converting between MIDI values and their frequencies in hertz.
     """
     hz = np.linspace(20.0, 20000.0, 128)
     librosa_midi = librosa.hz_to_midi(hz)
     th_midi = core.hz_to_midi(hz)
     assert np.allclose(librosa_midi, th_midi)
 def midi_notes_encoding(audio):
     """
     Compute frame-based midi encoding of audio
     :param audio: 1-D array of audio time series 
     """
     pitches, magnitudes = librosa.piptrack(audio)
     pitches = np.transpose(pitches)
     magnitudes = np.transpose(magnitudes)
     lc = np.zeros((pitches.shape[0], 88), dtype=np.float32)
     for i in range(pitches.shape[0]):
         # Count non-zero entries of pitches
         nz_count = len(np.nonzero(pitches[i])[0])
         # Keep a maximum of 6 detected pitches
         num_ind_to_keep = min(nz_count, 6)
         ind_of_largest_pitches = np.argpartition(
             magnitudes[i], -num_ind_to_keep)[-num_ind_to_keep:] \
             if num_ind_to_keep != 0 else []
         # Convert the largest pitches to midi notes
         overtone_limit = librosa.midi_to_hz(96)[0]
         ind_of_largest_pitches = filter(
             lambda x: pitches[i, x] <= overtone_limit,
             ind_of_largest_pitches)
         midi_notes = librosa.hz_to_midi(pitches[i, ind_of_largest_pitches])
         midi_notes = midi_notes.round()
         # Normalize magnitudes of pitches
         midi_mags = magnitudes[i, ind_of_largest_pitches] / \
                     np.linalg.norm(magnitudes[i, ind_of_largest_pitches], 1)
         np.put(lc[i], midi_notes.astype(np.int64) - [9], midi_mags)
     return lc
Esempio n. 10
0
def pitch_features(segment, hop_length=1024, nor=True, to_midi=False):
    if nor is True and to_midi is True:
        to_midi = not to_midi

    hop_length = hop_length
    p_features = np.array([])
    pitches, magnitudes = librosa.piptrack(y=segment,
                                           sr=global_sr,
                                           fmin=20,
                                           fmax=8000,
                                           n_fft=hop_length * 2,
                                           hop_length=hop_length)
    p = [
        pitches[magnitudes[:, i].argmax(), i]
        for i in range(0, pitches.shape[1])
    ]
    pitch0 = np.array(p)  # shape (305,)
    pitch = np.transpose(pitch0)
    p_features = np.hstack((p_features, max(20, np.amin(pitch, 0))))
    p_features = np.hstack((p_features, np.amax(pitch, 0)))
    p_features = np.hstack((p_features, np.median(pitch, 0)))
    p_features = np.hstack((p_features, np.mean(pitch, 0)))
    p_features = np.hstack((p_features, np.std(pitch, 0)))
    # p_features = np.hstack((p_features, np.var(pitch, 0)))
    if nor:
        p_features = normalize(p_features.reshape(1, -1))
    if to_midi:
        p_features = np.int_(librosa.hz_to_midi(p_features))
    return p_features
def generate_sine_midi_note(f0_info, sr, n_duration):
    f0 = f0_info[0]
    A = remap(f0_info[1], CdB.min(), CdB.max(), 0, 1)
    duration = librosa.frames_to_time(n_duration, sr=fs, hop_length=hop_length)
    # Generate music21 note
    note_duration = 0.02 * np.around(
        duration / 2 /
        0.02)  # Round to 2 decimal places for music21 compatibility
    midi_velocity = int(round(remap(f0_info[1], CdB.min(), CdB.max(), 0, 127)))
    if f0 == None:
        try:
            note_info = Rest(type=mm.secondsToDuration(note_duration).type)
        except DurationException:
            note_info = None
        f0 = 0
    else:
        midi_note = round(librosa.hz_to_midi(f0))
        try:
            note = Note(midi_note,
                        type=mm.secondsToDuration(note_duration).type)
            note.volume.velocity = midi_velocity
            note_info = [note]
        except DurationException:
            note_info = None

    if note_info is None:
        return None

    # Generate Sinewave
    n = np.arange(librosa.frames_to_samples(n_duration, hop_length=hop_length))
    sine_wave = A * np.sin(2 * np.pi * f0 * n / float(sr))
    return [sine_wave, note_info]
Esempio n. 12
0
def get_notes(WAVE_OUTPUT_FILENAME, file):
    os.system("aubiopitch -i" + WAVE_OUTPUT_FILENAME + " -r 44100 -p " +
              args.method + " -H 128 > " + file + ".txt")
    f = open(file + '.txt', 'r')
    note = np.array(f.read().split()[1::2]).astype(float)
    output_MIDI = []

    for j in range(0, len(note)):
        if (note[j] != 0):
            d = librosa.hz_to_midi(note[j])
            d = round(d, 0)
            output_MIDI.append(d)

    s = ""

    for i in range(0, len(output_MIDI) - 1):
        if (output_MIDI[i] < output_MIDI[i + 1]):
            if ((output_MIDI[i + 1] - output_MIDI[i]) <= 2):
                s += "u"
            else:
                s += "U"
        if (output_MIDI[i] == output_MIDI[i + 1]):
            s += "S"
        if (output_MIDI[i] > output_MIDI[i + 1]):
            if ((output_MIDI[i] - output_MIDI[i + 1]) <= 2):
                s += "d"
            else:
                s += "D"
    return s
Esempio n. 13
0
 def test_hz_to_midi_is_accurate(self):
   """Tests converting between MIDI values and their frequencies in hertz."""
   hz = np.linspace(0.0, 20000.0, 128)
   librosa_midi = librosa.hz_to_midi(hz)
   librosa_midi = tf.where(tf.less_equal(hz, 0.0), 0.0, librosa_midi)
   tf_midi = core.hz_to_midi(hz)
   self.assertAllClose(librosa_midi, tf_midi)
Esempio n. 14
0
    def hz_to_MIDI(self, varMidi, varMusica):
        y, sr, wav = self.load(varMusica.getAntigoWav(), varMusica.getOitava())
        tempo, beats = librosa.beat.beat_track(onset_envelope=wav[:200], sr=sr)

        varMusica.GeneralMIDI = librosa.hz_to_midi(tempo)
        print("varMusica general Midi {}".format(varMusica.GeneralMIDI))
        varMidi.criaNovoMIDI(varMusica.GeneralMIDI)
        self.criaWav(sys.argv[3], 'novissimo.wav', varMusica)
Esempio n. 15
0
def get_notes(filename, duration):
    y, sr = librosa.load(filename)
    fmin = 73
    fmax = 1108
    n_bins = 256
    harmonic = librosa.effects.harmonic(y)
    onsets = librosa.onset.onset_detect(harmonic)
    # get silence states
    rms = librosa.feature.rms(y=harmonic)[0]
    r_normalized = (rms - 0.01) / (np.std(rms) + 1e-9)
    p = np.exp(r_normalized) / (1 + np.exp(r_normalized))
    transition = librosa.sequence.transition_loop(2, [0.5, 0.6])
    full_p = np.vstack([1 - p, p])
    states = librosa.sequence.viterbi_discriminative(full_p, transition)
    # drop silent onsets
    onsets_filtered = onsets[states[onsets] > 0]
    # silence start borders
    silence = np.nonzero(states[:-1] - states[1:] > 0)[0]
    # note borders
    borders = np.hstack([silence.reshape(1, -1), onsets_filtered.reshape(1, -1)])[0]
    borders = np.sort(borders)
    # get frequencies and aggregate them
    pitches, magnitudes = librosa.piptrack(harmonic, sr=sr,
                                           fmin=fmin,
                                           fmax=fmax)
    freq = pitches.max(axis=-1)
    bins = np.argmax(magnitudes, axis=0)
    bins_sync = librosa.util.sync(bins, borders, aggregate=np.median)
    states_sync = librosa.util.sync(states, borders, aggregate=np.median)
    pitch_sync = freq[bins_sync]
    pitch_sync[pitch_sync == 0] = 1e-6
    # get notation and midi keys
    notes = librosa.hz_to_note(pitch_sync)
    midi = list(librosa.hz_to_midi(pitch_sync))
    # check pauses
    pauses = np.nonzero(states_sync == 0)[0]
    for x in list(pauses):
        notes[int(x)] = 'P'
        midi[int(x)] = 'P'
    # check wrong notes
    for i, note in enumerate(notes):
        if note[:-1] not in notes_to_numbers.keys():
            notes[i] = 'P'
    # add borders to borders and define notes lengths
    borders = np.append(borders, pitches.shape[-1])
    borders = np.concatenate([np.array([0]), borders])
    lengths = borders[1:] - borders[:-1]

    bpm = librosa.beat.tempo(y, sr=sr)

    melody = dict(notes=notes,
                  lengths=list(lengths),
                  midi=midi,
                  bpm=bpm,
                  duration=duration,
                  raw_filename=filename.split('/')[-1])

    return melody
Esempio n. 16
0
 def __init__(self, freq, start_time, duration, volume=1):
     # detremine note and MIDI value using librosa
     self._actual_freq = freq
     self.freq = librosa.note_to_hz(librosa.hz_to_note(freq))
     self.note = librosa.hz_to_note(self.freq)
     self.midi = int(librosa.hz_to_midi(self.freq))
     self.start_time = start_time
     self.duration = duration
     self.volume = volume
Esempio n. 17
0
def load_score(score_file):

    dom = parse(score_file)

    xml_notes = dom.getElementsByTagName("note")
    
    notes=[]
    durations=[]
    for note in xml_notes:
        if is_fermata(note):
            print 'fermata'
        durations.append(get_duration(note))
        if is_rest(note):
            notes.append('0')
        else: 
            if is_accidental(note):
                alter_aux = get_alter(note)
                if alter_aux == '1':
                    notes.append(get_step(note) + '#' + get_octave(note))
                if alter_aux == '2':
                    notes.append(get_step(note) + '##' + get_octave(note))
                if alter_aux == '-1':
                    notes.append(get_step(note) + 'b' + get_octave(note))
                if alter_aux == '-2':
                    notes.append(get_step(note) + 'bb' + get_octave(note))
            else:
                notes.append(get_step(note) + get_octave(note))
       
    durations=np.array(durations,dtype='int16')
    
    cr = csv.reader(open("../traditional_dataset/note_convertion.csv","rb"))
          
    notation=[]
    frequency=[]
    
    for row in cr:
    
        notation.append(row[0]) 
        frequency.append(row[1])
    
    frequency = np.array(frequency, 'float64')
    
    i=0
    melo = np.empty([0,])
    for note in notes:
        if note=='0':
            for k in range(0,durations[i]):            
                melo = np.r_[melo,0]
        else:
            for k in range(0,durations[i]):            
                melo = np.r_[melo,frequency[notation.index(note)]]
        i=i+1
    
    
    score = lr.hz_to_midi(melo)
    np.place(score,score==-np.inf,0)
    return score, notes
Esempio n. 18
0
def evolve(lso, n_generations, elitism, mut_prob, fitness_weights,
           fitness_params):
    npop = len(lso)
    population = [(so, fitness(so, fitness_weights, fitness_params))
                  for so in lso]

    for _ in range(n_generations):
        n_child = round(npop * (1 - elitism))
        pot_parents = copy.deepcopy(population)
        childs = []

        while len(childs) < n_child:
            if len(pot_parents) < 2: pot_parents = copy.deepcopy(population)
            parent0 = random.choices(pot_parents,
                                     weights=[
                                         score for _, score in pot_parents
                                     ],
                                     k=1)[0]
            pot_parents.remove(parent0)
            parent1 = random.choices(pot_parents,
                                     weights=[
                                         score for _, score in pot_parents
                                     ],
                                     k=1)[0]
            pot_parents.remove(parent1)
            child = crossover(parent0[0], parent1[0])
            while random.uniform(0, 1) < mut_prob:
                mut = random.choice(mutations)
                print(f"Mutating: {mut.__name__}")
                new_child = mut(child)
                if not np.isnan(new_child.samples).any():
                    child = new_child
                else:
                    print("Mutation Failed")
            try:
                child = SoundObject(
                    lr.effects.trim(child.get_normalize_to(1.0)))
            except:
                continue
            fit = fitness(child, fitness_weights, fitness_params)
            if np.isnan(fit) or np.isinf(fit): continue
            childs.append((child, fit))

        population = sorted(population, key=lambda ind: ind[1],
                            reverse=True)[:int(elitism * npop)] + childs
        print(max([s for _, s in population]))

    out = []
    for so, _ in population:
        ptrack, mtrack = so.track_pitch()
        pitch = lr.hz_to_midi(np.mean(ptrack))
        velocity = np.clip(np.mean(mtrack) * 127, 0, 127)
        out.append((so, pitch, velocity))
    return out
Esempio n. 19
0
def compute_adjusted_features(
    audio_features,
    auto_adjust,
    loudness_db_shift,
    f0_octave_shift,
    f0_confidence_threshold,
):
    """Compute resynthetized audio"""
    audio_features_mod = {k: v.copy() for k, v in audio_features.items()}
    if auto_adjust:
        # Adjust the peak loudness.
        l = audio_features["loudness_db"]
        model_ld_avg_max = {
            "Violin": -34.0,
            "Flute": -45.0,
            "Flute2": -44.0,
        }[instrument_model]
        ld_max = np.max(audio_features["loudness_db"])
        ld_diff_max = model_ld_avg_max - ld_max
        audio_features_mod = shift_ld(audio_features_mod, ld_diff_max)

        # Further adjust the average loudness above a threshold.
        l = audio_features_mod["loudness_db"]
        model_ld_mean = {
            "Violin": -44.0,
            "Flute": -51.0,
            "Flute2": -53.0,
        }[instrument_model]
        ld_thresh = -50.0
        ld_mean = np.mean(l[l > ld_thresh])
        ld_diff_mean = model_ld_mean - ld_mean
        audio_features_mod = shift_ld(audio_features_mod, ld_diff_mean)

        # Shift the pitch register.
        model_p_mean = {
            "Violin": 73.0,
            "Flute": 81.0,
            "Flute2": 74.0,
        }[instrument_model]
        p = librosa.hz_to_midi(audio_features["f0_hz"])
        p[p == -np.inf] = 0.0
        p_mean = p[l > ld_thresh].mean()
        p_diff = model_p_mean - p_mean
        p_diff_octave = p_diff / 12.0
        round_fn = np.floor if p_diff_octave > 1.5 else np.ceil
        p_diff_octave = round_fn(p_diff_octave)
        audio_features_mod = shift_f0(audio_features_mod, p_diff_octave)

    audio_features_mod = shift_ld(audio_features_mod, loudness_db_shift)
    audio_features_mod = shift_f0(audio_features_mod, f0_octave_shift)
    audio_features_mod = mask_by_confidence(audio_features_mod,
                                            f0_confidence_threshold)
    return audio_features_mod
Esempio n. 20
0
def get_Z(fmin, hop_length, n_bins_per_octave, n_octaves, stem):
    melody_f0s = di.symbolic.get_melody(stem)
    melody_annotation_hop = 256
    melody_downsampling = hop_length / melody_annotation_hop
    melody_range = xrange(0, len(melody_f0s), melody_downsampling)
    melody_f0s = melody_f0s[melody_range]
    midis = librosa.hz_to_midi(melody_f0s)
    midis[np.isinf(midis)] = 0.0
    track_activations = np.vstack(stem.track.activations_data)[:, 1:]
    stem_id = int(stem.name[1:]) - 1
    activations = track_activations[:, stem_id]
    activation_hop = 2048
    activation_upsampling = activation_hop / hop_length
    activations = activations.repeat(activation_upsampling)
    n_bins = n_bins_per_octave * n_octaves
    n_frames = len(activations)
    Z = np.zeros((n_bins, n_frames), np.float32)
    for frame_id in range(len(midis)):
        bin_id = int(midis[frame_id] - librosa.hz_to_midi(fmin)[0])
        if bin_id >= 0:
            Z[bin_id, frame_id] = activations[frame_id]
    return Z
Esempio n. 21
0
def get_Z(fmin, hop_length, n_bins_per_octave, n_octaves, stem):
    melody_f0s = di.symbolic.get_melody(stem)
    melody_annotation_hop = 256
    melody_downsampling = hop_length / melody_annotation_hop
    melody_range = xrange(0, len(melody_f0s), melody_downsampling)
    melody_f0s = melody_f0s[melody_range]
    midis = librosa.hz_to_midi(melody_f0s)
    midis[np.isinf(midis)] = 0.0
    track_activations = np.vstack(stem.track.activations_data)[:, 1:]
    stem_id = int(stem.name[1:]) - 1
    activations = track_activations[:, stem_id]
    activation_hop = 2048
    activation_upsampling = activation_hop / hop_length
    activations = activations.repeat(activation_upsampling)
    n_bins = n_bins_per_octave * n_octaves
    n_frames = len(activations)
    Z = np.zeros((n_bins, n_frames), np.float32)
    for frame_id in range(len(midis)):
        bin_id = int(midis[frame_id] - librosa.hz_to_midi(fmin)[0])
        if bin_id >= 0:
            Z[bin_id, frame_id] = activations[frame_id]
    return Z
Esempio n. 22
0
def plot_audio(audio_features, audio_features_mod, **kwargs):
    """Plot audio features in matplotlib"""
    legend = ["Audio features", "Resynth audio"]
    fig, ax = plt.subplots(nrows=3, ncols=1, sharex=True, figsize=(6, 4))
    ax[0].plot(audio_features["loudness_db"])
    ax[0].plot(audio_features_mod["loudness_db"])
    ax[0].set_ylabel("loudness_db")
    ax[0].legend(legend)

    ax[1].plot(librosa.hz_to_midi(audio_features["f0_hz"]))
    ax[1].plot(librosa.hz_to_midi(audio_features_mod["f0_hz"]))
    ax[1].set_ylabel("f0 [midi]")
    ax[1].legend(legend)

    ax[2].plot(audio_features_mod["f0_confidence"])
    ax[2].plot(
        np.ones_like(audio_features_mod["f0_confidence"]) *
        f0_confidence_threshold)
    ax[2].set_ylabel("f0 confidence")
    ax[2].set_xlabel("Time step [frame]")
    ax[2].legend(legend)
    return fig, ax
Esempio n. 23
0
def plotMel(timesList, freqsList, title='', grid=(44100, 2048, 512)):
    for i, (times, freqs) in enumerate(zip(timesList, freqsList)):
        midis = np.zeros([len(freqs)])
        midis[freqs > 0] = librosa.hz_to_midi(freqs[freqs > 0])
        seminotes = midis.astype(int) + 0.5

        plt.plot(times[midis > 0], midis[midis > 0], label=f'midi {i}')
        # plt.plot(times, seminotes, label=f'seminote {i}')

    sr, n_fft, hop_length = grid
    frames = librosa.time_to_frames(times[-1], sr, hop_length, n_fft)
    for t in librosa.frames_to_time(range(frames), sr, hop_length, n_fft):
        plt.axvline(x=t, color='g')
    for f in librosa.fft_frequencies(sr, n_fft):
        if f >= np.min(freqs[freqs > 0]) * 0.9 and f <= np.max(freqs) * 1.1:
            plt.axhline(y=librosa.hz_to_midi(f), color='g')

    plt.xlabel('time')
    plt.ylabel('midi note')
    plt.legend()
    plt.title(f'melody:{title}')
    plt.show()
Esempio n. 24
0
def test_cq_to_chroma(n_octaves, semitones, n_chroma, fmin, base_c, window):

    bins_per_octave = 12 * semitones
    n_bins = n_octaves * bins_per_octave

    if np.mod(bins_per_octave, n_chroma) != 0:
        ctx = pytest.raises(librosa.ParameterError)
    else:
        ctx = dnr()

    with ctx:
        # Fake up a cqt matrix with the corresponding midi notes

        if fmin is None:
            midi_base = 24  # C2
        else:
            midi_base = librosa.hz_to_midi(fmin)

        midi_notes = np.linspace(
            midi_base,
            midi_base + n_bins * 12.0 / bins_per_octave,
            endpoint=False,
            num=n_bins,
        )
        #  We don't care past 2 decimals here.
        # the log2 inside hz_to_midi can cause problems though.
        midi_notes = np.around(midi_notes, decimals=2)
        C = np.diag(midi_notes)

        cq2chr = librosa.filters.cq_to_chroma(
            n_input=C.shape[0],
            bins_per_octave=bins_per_octave,
            n_chroma=n_chroma,
            fmin=fmin,
            base_c=base_c,
            window=window,
        )

        chroma = cq2chr.dot(C)
        for i in range(n_chroma):
            v = chroma[i][chroma[i] != 0]
            v = np.around(v, decimals=2)

            if base_c:
                resid = np.mod(v, 12)
            else:
                resid = np.mod(v - 9, 12)

            resid = np.round(resid * n_chroma / 12.0)
            assert np.allclose(np.mod(i - resid, 12), 0.0), i - resid
def get_df_audio(sep, div, bmp, sr):
    audio_list = []
    for i in range(0, len(sep)):
        duration = librosa.core.get_duration(sep[i])
        start = librosa.samples_to_time(div[i, 0])
        end = librosa.samples_to_time(div[i, 1])
        pitch = estimate_pitch_fft(sep[i], sr)
        #pitch = estimate_pitch(sep[i], sr)
        midi = librosa.hz_to_midi(pitch)
        audio_list.append([start, end, duration, pitch, midi, bmp])
    df = pd.DataFrame(
        audio_list,
        columns=['start', 'end', 'duration', 'pitch', 'midi', 'tempo'])
    return df
Esempio n. 26
0
    def save(self, out):
        out_txt = ""
        i = 0
        for so, fr, mag, _ in self.sounds:
            pitch = lr.hz_to_midi(fr)
            vel = np.clip(mag * 127, 0, 127)
            path = os.path.dirname(out)
            fname = os.path.splitext(os.path.basename(out))[0] + f"_s{i}.wav"
            so.write(os.path.join(path, fname))
            out_txt += f"{fname}, {pitch}, {vel}\n"
            i += 1

        with open(out, "w") as file:
            file.write(out_txt)
Esempio n. 27
0
def __extract_notes(summary_spectrum, fs):
    '''
    Extracts the significant F0s from the summary spectrum of a frame.
    @param summary_spectrum A numpy array representing the processed frequency
    spectrum of a signal frame.
    @param fs The sampling frequency in Hz.
    @return A list of detected F0s in Hz.
    '''
    N = summary_spectrum.size
    residual_spectrum = np.copy(summary_spectrum)
    detected = []
    detected_saliences = []

    if __salience(summary_spectrum, fs, 1 / FMAX,
                  1 / FMIN) < SILENCE_THRESHOLD:
        return []

    previous_significance = 0
    while True:
        # estimation
        tau, s = __max_salience(residual_spectrum, fs)
        detected_saliences.append(s)
        fc = 1 / tau

        # cancellation
        subtracted_signal = np.zeros(residual_spectrum.size)
        for i in range(1, 21):
            partial_frequency = i * fc
            time_vector = np.linspace(0, N / fs, N)
            wave = np.sin(2 * np.pi * partial_frequency * time_vector)

            subtracted_signal = np.add(
                subtracted_signal,
                __hamming_magnitude(fc, fs, summary_spectrum.size) *
                __weight(tau, tau, i, fs) * wave)
        subtracted_signal = abs(
            np.fft.fft(subtracted_signal, n=subtracted_signal.size))
        fcy_vector = fs * np.arange(0, N) / N
        residual_spectrum = residual_spectrum - subtracted_signal
        residual_spectrum[residual_spectrum < 0] = 0

        # check if the F0 is significant
        significance = sum(detected_saliences) / (len(detected_saliences)**
                                                  0.66)
        if significance > previous_significance * SIGINC:
            previous_significance = significance
            detected.append(int(round(librosa.hz_to_midi(fc))))
        else:
            return detected
Esempio n. 28
0
def sequencer_config(in_path, out_path, n):
    source = sm.sound.load(in_path)
    model = sm.mustruct.build_model(n, source)
    model = [[(
        int(lr.hz_to_midi(note.pitch)) if note.pitch > 0.0 else 0,
        int(np.clip(note.mag, 0, 1) * 127),
        round(note.dur, 3)
    ) for note in entry] for entry in model]
    out = ""
    for entry in model:
        for note in entry:
            out += f"{note[0]}, {note[1]}, {note[2]}; "
        out += "\n"
    with open(out_path, "w") as file:
        file.write(out)
Esempio n. 29
0
def hz_to_midi_zeros(annotation):
    '''
        Special function so that zeros represent silence
        Input: Annotation List taken straight from mtrack
        Output: 1d np.array containing frequencies instead of note names
    '''
    new_values = np.array([])

    for a in annotation:
        new_a = 0
        if a != 0:
            new_a = librosa.hz_to_midi(a)
        new_values = np.append(new_values, new_a)

    return new_values
Esempio n. 30
0
    def play(self, npitch, nmag, ndur):
        note = Note(npitch, nmag, ndur)

        if note.pitch <= 0:
            samps = np.zeros(int(note.dur * sm.sound.SAMPLE_RATE))
            return SoundObject(samps)

        n_samples = int(note.dur * sm.sound.SAMPLE_RATE)
        factors = []
        for so, pitch, mag, dur in self.sounds:
            p_dif = (note.pitch - pitch)**2
            m_dif = (note.mag - mag)**2
            d_dif = (note.dur - dur)**2
            wei = 1 / np.sqrt(p_dif + m_dif + d_dif)
            samps = so.samples
            if samps.size > n_samples:
                samps = samps[:n_samples]
            elif samps.size < n_samples:
                samps = np.concatenate(
                    [samps, np.zeros(n_samples - samps.size)])
            factors.append([samps, wei])

        total = sum([w for _, w in factors])
        factors = sorted(factors, key=lambda fac: fac[1])[-MAX_SAMPLES:]
        out = np.zeros(n_samples)
        for samps, wei in factors:
            out += samps * wei / total

        pitch = SoundObject(out).get_f0()
        if pitch > 0:
            out = lr.effects.pitch_shift(
                out, sm.sound.SAMPLE_RATE,
                lr.hz_to_midi(note.pitch) - lr.hz_to_midi(pitch))

        oso = sm.effects.band_pass(SoundObject(out), note.pitch, 20000)
        return oso
Esempio n. 31
0
def showMidi(melfile, srcDir=None):
    if srcDir is not None:
        files = os.listdir(srcDir)
        melFiles = list(map(lambda x: os.path.join(srcDir, x), files))
    else:
        melFiles = [melfile]

    midis = np.array([])
    for melfile in melFiles:
        times, freqs = loadMel(melfile)
        x = librosa.hz_to_midi(freqs[freqs > 0])
        midis = np.concatenate([midis, x])

    plt.hist(midis, bins=25)
    plt.xlabel('midi')
    plt.show()
    return midis
Esempio n. 32
0
def __extract_pitches_from_onsets(signal, fs):
    filter_fcies = __filter_center_fcies()
    filter_bands = __filter_bandwidths(filter_fcies)

    size_of_frame = int(FRAME_SIZE * fs)

    s = np.zeros(signal.size)
    for i in range(0, len(filter_fcies)):
        num_coefs_1, denom_coefs_1, num_coefs_2, denom_coefs_2 = __coefficients(
            filter_fcies[i], fs, filter_bands[i])
        # filter twice with the first filter and twice with the second
        c = sig.lfilter(num_coefs_1, denom_coefs_1, signal)
        c = sig.lfilter(num_coefs_1, denom_coefs_1, c)
        c = sig.lfilter(num_coefs_2, denom_coefs_2, c)
        c = sig.lfilter(num_coefs_2, denom_coefs_2, c)
        # apply neural transduction operations
        c = __dynamic_compression(c, fs)
        c = __fwr_and_filter(c, fs, filter_fcies[i])
        s += c

    # detect onsets
    onsets = librosa.onset.onset_detect(signal, fs, units="samples")

    # detect notes in the frame following every onset
    channels = []
    for i in range(16):
        channels.append([Melody([])])
    for onset in onsets:
        f = __frame_spectrum(s, fs, onset)
        tracked = __extract_notes(f, fs)
        counter = 1
        # track all notes until they are not detected anymore to get durations
        while not len(tracked) == 0:
            f = __frame_spectrum(s, fs, onset + counter * size_of_frame)
            F0s = __extract_notes(f, fs)
            for f0 in tracked:
                if f0 not in F0s:
                    duration = counter * FRAME_SIZE
                    midi_pitch = int(round(librosa.hz_to_midi(f0)))
                    note = Note(duration, midi_pitch, 64)
                    __add_to_channel(channels[0], note, onset / fs)
                    tracked.remove(f0)
    for i in range(len(channels)):
        channels[i] = Channel(Chord(channels[i]), 0, 0)
    return Chord(channels)
Esempio n. 33
0
    def __test(n_bins, bins_per_octave, n_chroma, fmin, base_c, window):
        # Fake up a cqt matrix with the corresponding midi notes

        if fmin is None:
            midi_base = 24  # C2
        else:
            midi_base = librosa.hz_to_midi(fmin)

        midi_notes = np.linspace(midi_base,
                                 midi_base + n_bins * 12.0 / bins_per_octave,
                                 endpoint=False,
                                 num=n_bins)
        #  We don't care past 2 decimals here.
        # the log2 inside hz_to_midi can cause problems though.
        midi_notes = np.around(midi_notes, decimals=2)
        C = np.diag(midi_notes)

        cq2chr = librosa.filters.cq_to_chroma(n_input=C.shape[0],
                                              bins_per_octave=bins_per_octave,
                                              n_chroma=n_chroma,
                                              fmin=fmin,
                                              base_c=base_c,
                                              window=window)

        chroma = cq2chr.dot(C)
        for i in range(n_chroma):
            v = chroma[i][chroma[i] != 0]
            v = np.around(v, decimals=2)

            if base_c:
                resid = np.mod(v, 12)
            else:
                resid = np.mod(v - 9, 12)

            resid = np.round(resid * n_chroma / 12.0)
            assert np.allclose(np.mod(i - resid, 12), 0.0), i-resid
 gt = np.empty([len(timestamps),],'float64')
 for i in range(1,len(onset)):
     while (j<len(timestamps) and (timestamps[j])>=onset[i-1] and (timestamps[j])<=onset[i]):
         gt[j]=melo[i-1]
         j=j+1
 
 plt.figure(figsize=(18,6))
 plt.plot(timestamps, melody_hz)
 plt.plot(timestamps, gt)
 plt.xlabel('Time (s)')
 plt.ylabel('Notes')
 plt.yscale('log')
 plt.yticks(frequency, notation)
 plt.ylim(ymax = 2350 , ymin = 246)
 plt.axis( )
 plt.show()
 
 #%%
 midigt = lr.hz_to_midi(gt);    
 melonotes = lr.hz_to_midi(melody_hz);
 int_melonotes=np.round(melonotes)   
 
 plt.figure()
 plt.plot(timestamps,int_melonotes,'.-',color='blue', lw=0.7)
 plt.plot(timestamps,midigt,'green', lw=1.4)
 plt.plot(timestamps,melonotes, 'red', lw=0.3)
 plt.fill_between(timestamps, midigt, int_melonotes, facecolor='cyan', label='diference', alpha=0.2)
 plt.grid(b=True, which='major', color='black', axis='y', linestyle='-')
 plt.grid(b=True, which='minor', color='black', axis='y', linestyle='-', alpha=0.3)
 plt.minorticks_on()
 
def pitch_extraction(audio, fs, win, hop):

    audio = audio.astype('float32', copy=False)
    
    win_s=win
    hop_s=hop
    tolerance = 0.5
    
    pitch_o = pitch("yin", win_s, hop_s, fs)
    pitch_o.set_unit("Hz")
    pitch_o.set_tolerance(tolerance)
    
    pitches = []
    confidences = []
    
    # total number of frames read
    total_frames = len(audio)/win_s
    for i in range(0,total_frames-1):
        
        samples = audio[i*win_s:(i+1)*win_s]
        p = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #if confidence < 0.8: pitch = 0.
        #print "%f %f %f" % (total_frames / float(samplerate), pitch, confidence)
        pitches += [p]
        confidences += [confidence]
    
    timestamps = np.arange(len(pitches)) * float(win_s)/fs
    
    pitches = np.array(pitches)
    melody_hz = copy.deepcopy(pitches)
    melody_hz[pitches<=0] = 0
    melody_hz[pitches>1200] = 0

    melonotes = lr.hz_to_midi(melody_hz);
    int_melonotes=np.round(melonotes) 

    int_melonotes[int_melonotes<58] = 0
    int_melonotes[int_melonotes>96] = 0

    #ONSET DETECTION FROM PITCH CONTOUR

    onset_detection=np.zeros([len(int_melonotes,)], dtype='int8')
    M=0; m=0; k=0; #onset_detection[0]=0
    for i in range(0,len(int_melonotes)-1):
        M=M+1
        k=k+1
        f0_mean=np.sum(int_melonotes[m:m+M])/float(M)
        if (np.abs(f0_mean-int_melonotes[k])>0.2) :
            onset_detection[k-1]=-1
            onset_detection[k]=1
            m=k+1
            M=1
        else:
            onset_detection[k]=0

    limits=np.where(onset_detection==1)    

    #PITCH CORRECTION WITH ONSET DETECTION     
     
    filtrated_pitch=int_melonotes.copy()
    for i in range(0, len(limits[0])-1):
        aux=limits[0][i]
        aux2=limits[0][i+1]    
        filtrated_pitch[aux:aux2] = np.median(filtrated_pitch[aux:aux2])
    
    filtrated_pitch=np.round(filtrated_pitch)
    
    filtrated_pitch[filtrated_pitch<58] = 0
    filtrated_pitch[filtrated_pitch>96] = 0       
    
    return filtrated_pitch, timestamps
def test_hz_to_midi():
    assert np.allclose(librosa.hz_to_midi([55, 110, 220, 440]),
                       [33, 45, 57, 69])