def unweight(Y, sampling_frequency): # undo equal loudness contour weighting elc, _ = iso226(30, sampling_frequency, Y.shape[2] / 2) elc = (10**(-np.concatenate( (elc, elc), axis=0)) / 20) # convert from dB and invert elc = elc / np.max(elc) Y = Y / elc return Y
def __init__(self,bands): self.freq = 0.01 self.oscs = [0 for i in range(0,bands)] self.level = [0 for i in range(0,bands)] self.last_level = [0 for i in range(0,bands)] self.t = 0 self.t_max = 10 self.loudness_curve = iso226.iso226(1,[i*20 for i in range(1,bands+1)]) self.loudness_curve = map(lambda i: i*0.1, self.loudness_curve)
def __init__(self, bands): self.freq = 0.01 self.oscs = [0 for i in range(0, bands)] self.level = [0 for i in range(0, bands)] self.last_level = [0 for i in range(0, bands)] self.t = 0 self.t_max = 10 self.loudness_curve = iso226.iso226( 1, [i * 20 for i in range(1, bands + 1)]) self.loudness_curve = map(lambda i: i * 0.1, self.loudness_curve)
def weighted_spectrogram_loss(y_true, y_pred): #Y_pred = tf.spectral.rfft(y_pred) Y_pred = tf.log(tf.abs(Y_pred) + 1e-6) # apply equal loudness contour weighting elc, _ = iso226(30, sampling_frequency, Y_data.shape[2] / 2) elc = 1 / elc elc = elc / np.max(elc) Y_data = Y_data / elc loss = tf.norm(y_true - Y_pred, axis=2) return loss '''
def render(self, out, mode): if len(self.blips) > 0: step = self.bar_length / len(self.blips) for i, b in enumerate(self.blips): # midi note to frequency p = pitch(b[0] + 69) * 4 # make an event for this note self.events.append({ 'pos': i * step, 'freq': p, 'tec': techno.techno(0.3 + b[1] * 0.5, 0.4), 'vol': iso226.iso226(90, p) }) if mode == "TECHNO": env = 150 else: env = 50 print(self.blips) for i in range(0, self.bar_length): if self.pos < len(out): for e in self.events: if mode == "TECHNO": s = 0.016 * e['tec'].generate( self.pos / 44100.0 * e['freq']) * e['vol'] else: s = 0.008 * math.sin( self.pos / 44100.0 * e['freq']) * e['vol'] if i > e['pos'] and i <= e['pos'] + env: env_lev = 1 - (e['pos'] + env - i) / float(env) out[self.pos] += s * env_lev if i > e['pos'] + env: out[self.pos] += s e['vol'] *= 0.9995 self.pos += 1 if i % 50 == 0: time.sleep(0.3) # remove old events new_events = [] for e in self.events: if e['vol'] > 0.001: new_events.append(e) self.events = new_events
def normiso(f0): x = iso226(ISOPHON, f0) return x - min(x)
def load_data(data_path, datasplit_dict, mode, example_duration, time_window_duration, sampling_frequency, loss_domain, equal_loudness): print('[*] Loading data...', flush=True) if sampling_frequency == 44100: wav_path = os.path.join(data_path, 'TPD 44kHz') elif sampling_frequency == 22050: wav_path = os.path.join(data_path, 'TPD 22kHz') elif sampling_frequency == 11025: wav_path = os.path.join(data_path, 'TPD 11kHz') else: raise ValueError('sampling frequency not recognized!') twd_suffix = '_dt{:02.0f}'.format(time_window_duration * 1e3) midi_path = os.path.join(data_path, twd_suffix) X_data, Y_data, filenames = [], [], [] # need to do some rounding because 50 ms at 11025 is 551.25 samples! (which # will not work) num_pts_per_window = int( np.round(time_window_duration * sampling_frequency)) num_windows_per_example = int(example_duration / time_window_duration) num_pts_per_example = int(num_pts_per_window * num_windows_per_example) # mask filenames for specific mode all_filenames = np.array(datasplit_dict['filename']) if mode == 'train': idx = np.array(np.array(datasplit_dict['train']) > 0) elif mode == 'train_dev': idx = np.array(np.array(datasplit_dict['train_dev']) > 0) elif mode == 'test': idx = np.array(np.array(datasplit_dict['test']) > 0) else: raise ValueError('arg.mode not recognized!') all_filenames = all_filenames[idx] wav_filenames = [ file + '_sf' + str(sampling_frequency) + '.wav' for file in all_filenames ] midi_filenames = [file + twd_suffix + '.mat' for file in all_filenames] wav_listing = [os.path.join(wav_path, file) for file in wav_filenames] midi_listing = [os.path.join(midi_path, file) for file in midi_filenames] print('number of files = ' + str(len(wav_filenames)) + '=' + str(len(midi_filenames)), flush=True) target_wav_files = [] target_midi_files = [] for midi_file in midi_listing: filename = midi_file.split('/')[-1].split(twd_suffix + '.mat')[0] corresponding_wav_file = os.path.join( wav_path, filename + '_sf' + str(sampling_frequency) + '.wav') if corresponding_wav_file in wav_listing: target_wav_files.append(corresponding_wav_file) target_midi_files.append(midi_file) # load the data and format them for i, midi_filepath in enumerate(target_midi_files): filename = midi_filepath.split('/')[-1].split(twd_suffix + '.mat')[0] print(' loading ' + filename, flush=True) # load the MIDI file data = scipy.io.loadmat(midi_filepath) X = data['Xin'] num_examples_in_midi = np.ceil(X.shape[0] / num_windows_per_example) # load the wav file wav_filepath = target_wav_files[i] fs, Y = scipy.io.wavfile.read(wav_filepath) Y = Y.astype(float) Y = Y / 32768 # scipy.io.wavfile outputs values that are int16 assert (fs == sampling_frequency) num_examples_in_wav = np.ceil(len(Y) / num_pts_per_example) # # make sure there will be the same number of examples from each file # assert(num_examples_in_midi == num_examples_in_wav) # pad both arrays pad_amount_X = int( num_examples_in_midi * num_windows_per_example) - X.shape[0] X = np.pad(X, ((0, pad_amount_X), (0, 0)), 'constant') if num_examples_in_midi >= num_examples_in_wav: pad_amount_Y = int( num_examples_in_midi * num_pts_per_example) - Y.shape[0] Y = np.pad(Y, ((0, pad_amount_Y)), 'constant') else: Y = Y[0:int(num_examples_in_midi * num_pts_per_example)] # create the examples for example_num in range(int(num_examples_in_midi)): example_x = X[example_num * num_windows_per_example:(example_num + 1) * num_windows_per_example, :] example_y = Y[example_num * num_pts_per_example:(example_num + 1) * num_pts_per_example] example_y = np.reshape(example_y, (num_windows_per_example, -1)) X_data.append(example_x) Y_data.append(example_y) filenames.append(filename) # change the list of examples into a matrix of [examples, time_windows, pitch_encoding/audio output] X_data = np.stack(X_data) Y_data = np.stack(Y_data) # train on the frequency domain loss function if loss_domain == 'frequency': #import pdb #pdb.set_trace() Y_data = np.fft.rfft(Y_data, axis=2) Y_data = np.concatenate((np.real(Y_data), np.imag(Y_data)), axis=2) #Y_data = np.log(np.abs(Y_data)+1e-6) if equal_loudness: # apply equal loudness contour weighting elc, _ = iso226(30, sampling_frequency, Y_data.shape[2] / 2) #elc = (10**(-np.concatenate((elc,elc),axis = 0))/20) # convert from dB and invert elc = 1 / elc elc = elc / np.max(elc) Y_data = Y_data / elc # error checks assert (X_data.shape[0] == Y_data.shape[0]) assert (X_data.shape[1] == Y_data.shape[1]) return X_data, Y_data, filenames