This repository has been archived by the owner on Sep 5, 2023. It is now read-only.
forked from GuitarsAI/BasicAutoTranscriptionRepo
/
autoMusicTransRepo.py
217 lines (171 loc) · 7.13 KB
/
autoMusicTransRepo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Imports
## General Imports
import numpy as np
## Visualization
import matplotlib.pyplot as plt
## Audio Imports
import librosa, librosa.display #https://librosa.github.io/librosa/index.html
from music21.duration import DurationException
from music21.tempo import MetronomeMark #http://web.mit.edu/music21/
from music21.note import Note, Rest
from music21.stream import Stream
from music21 import metadata
from music21 import instrument
import wave
# Configurations
## Matplotlib
plt.rc("figure", figsize=(10, 5))
# Parameters
## Signal Processing
fs = 44100 # Sampling Frequency
nfft = 2048 # length of the FFT window
overlap = 0.5 # Hop overlap percentage
hop_length = int(nfft*(1-overlap)) # Number of samples between successive frames
n_bins = 72 # Number of frequency bins
mag_exp = 4 # Magnitude Exponent
pre_post_max = 6 # Pre- and post- samples for peak picking
cqt_threshold = -60 # Threshold for CQT dB levels, all values below threshold are set to -120 dB
backtrack = False
filename = "sweet_child_o_mine_intro"
# Load audio file
x, fs = librosa.load("input/" + filename + ".wav", sr=None, mono=True)
# Audio data information
print("x Shape=", x.shape)
print("Sample rate fs=", fs)
print("Audio Length in seconds=%d [s]" % (x.shape[0]/fs))
# CQT Function
def calc_cqt(x,fs=fs,hop_length=hop_length, n_bins=n_bins, mag_exp=mag_exp):
C = librosa.cqt(x, sr=fs, hop_length=hop_length, fmin=None, n_bins=n_bins)
C_mag = librosa.magphase(C)[0]**mag_exp
CdB = librosa.core.amplitude_to_db(C_mag ,ref=np.max)
return CdB
# CQT Threshold
def cqt_thresholded(cqt,thres=cqt_threshold):
new_cqt=np.copy(cqt)
new_cqt[new_cqt<thres]=-120
return new_cqt
# Onset Envelope from CQT
def calc_onset_env(cqt):
return librosa.onset.onset_strength(S=cqt, sr=fs, aggregate=np.mean, hop_length=hop_length)
# Onset from Onset Envelope
def calc_onset(cqt, pre_post_max=pre_post_max, backtrack=True):
onset_env=calc_onset_env(cqt)
onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env,
sr=fs, units='frames',
hop_length=hop_length,
backtrack=backtrack,
pre_max=pre_post_max,
post_max=pre_post_max)
onset_boundaries = np.concatenate([[0], onset_frames, [cqt.shape[1]]])
onset_times = librosa.frames_to_time(onset_boundaries, sr=fs, hop_length=hop_length)
return [onset_times, onset_boundaries, onset_env]
# Fine Tuning
# without UI because jupyter widgets not available
def inter_cqt_tuning(mag_exp,thres,pre_post_max, backtrack):
global CdB
CdB = calc_cqt(x,fs,hop_length, n_bins, mag_exp)
plt.figure()
new_cqt=cqt_thresholded(CdB,thres)
librosa.display.specshow(new_cqt, sr=fs, hop_length=hop_length, x_axis='time', y_axis='cqt_note', cmap='coolwarm')
plt.ylim([librosa.note_to_hz('B2'),librosa.note_to_hz('B6')])
global onsets
onsets=calc_onset(new_cqt,pre_post_max, backtrack)
plt.vlines(onsets[0], 0, fs/2, color='k', alpha=0.8)
plt.title("CQT - " + filename)
plt.colorbar()
plt.show()
inter_cqt_tuning(mag_exp, cqt_threshold, pre_post_max, backtrack)
# Estimate Tempo
tempo, beats=librosa.beat.beat_track(y=None, sr=fs, onset_envelope=onsets[2], hop_length=hop_length,
start_bpm=120.0, tightness=100, trim=True, bpm=None,
units='frames')
tempo=int(2*round(tempo/2))
print("BPM: {}".format(tempo))
mm = MetronomeMark(referent='quarter', number=tempo)
# Convert Seconds to Quarter-Notes
def time_to_beat(duration, tempo):
return (tempo*duration/60)
# Remap input to 0-1 for Sine Amplitude or to 0-127 for MIDI
def remap(x, in_min, in_max, out_min, out_max):
return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min
# Generate Sinewave and music21 notes
def generate_sine_midi_note(f0_info, sr, n_duration):
f0 = f0_info[0]
A = remap(f0_info[1], CdB.min(), CdB.max(), 0, 1)
duration = librosa.frames_to_time(n_duration, sr=fs, hop_length=hop_length)
# Generate music21 note
note_duration = 0.02 * np.around(duration / 2 / 0.02) # Round to 2 decimal places for music21 compatibility
midi_velocity = int(round(remap(f0_info[1], CdB.min(), CdB.max(), 0, 127)))
if f0 == None:
try:
note_info = Rest(type=mm.secondsToDuration(note_duration).type)
except DurationException:
note_info = None
f0 = 0
else:
midi_note = round(librosa.hz_to_midi(f0))
try:
note = Note(midi_note, type=mm.secondsToDuration(note_duration).type)
note.volume.velocity = midi_velocity
note_info = [note]
except DurationException:
note_info = None
if note_info is None:
return None
# Generate Sinewave
n = np.arange(librosa.frames_to_samples(n_duration, hop_length=hop_length))
sine_wave = A * np.sin(2 * np.pi * f0 * n / float(sr))
return [sine_wave, note_info]
# Estimate Pitch
def estimate_pitch(segment, threshold):
freqs = librosa.cqt_frequencies(n_bins=n_bins, fmin=librosa.note_to_hz('C1'),
bins_per_octave=12)
if segment.max()<threshold:
return [None, np.mean((np.amax(segment,axis=0)))]
else:
f0 = int(np.mean((np.argmax(segment,axis=0))))
return [freqs[f0], np.mean((np.amax(segment,axis=0)))]
# Generate notes from Pitch estimation
def estimate_pitch_and_notes(x, onset_boundaries, i, sr):
n0 = onset_boundaries[i]
n1 = onset_boundaries[i+1]
f0_info = estimate_pitch(np.mean(x[:,n0:n1],axis=1),threshold=cqt_threshold)
return generate_sine_midi_note(f0_info, sr, n1-n0)
# Array of music information - Sinewave and music21 notes
music_info = np.array([
estimate_pitch_and_notes(CdB, onsets[1], i, sr=fs)
for i in range(len(onsets[1])-1)
], dtype=object)
music_info = np.array([info for info in music_info if info is not None], dtype=object)
# Get sinewave
synth_audio=np.concatenate(music_info[:,0])
# Convert audio to 16-bit int, generated audio is 64-bit float
synth_audio_converted = np.array([
sample * 32767 for sample in synth_audio
], dtype=np.int16)
# write to wav file
file = wave.open("output/" + filename + "_sine.wav", "wb")
file.setnchannels(1)
file.setsampwidth(2) # 2 bytes = 16 bit
file.setframerate(fs)
file.writeframes(synth_audio_converted)
file.close()
# Get music21 notes
note_info = list(music_info[:,1])
# Create music21 stream
s = Stream()
s.append(mm)
electricguitar = instrument.fromString('electric guitar')
electricguitar.midiChannel=0
electricguitar.midiProgram=30 #Set program to Overdriven Guitar
s.append(electricguitar)
s.insert(0, metadata.Metadata())
for note in note_info:
s.append(note)
# Analyse music21 stream to get song Key
key=s.analyze('key')
print("Key: " + key.name)
# Insert Key to Stream
s.insert(0, key)
# Save MIDI to file
s.write('midi', "output/" + filename + "_music21.mid")