Exemple #1
0
def unweight(Y, sampling_frequency):
    # undo equal loudness contour weighting
    elc, _ = iso226(30, sampling_frequency, Y.shape[2] / 2)
    elc = (10**(-np.concatenate(
        (elc, elc), axis=0)) / 20)  # convert from dB and invert
    elc = elc / np.max(elc)
    Y = Y / elc
    return Y
Exemple #2
0
 def __init__(self,bands):
     self.freq = 0.01
     self.oscs = [0 for i in range(0,bands)]
     self.level = [0 for i in range(0,bands)]
     self.last_level = [0 for i in range(0,bands)]
     self.t = 0
     self.t_max = 10
     self.loudness_curve = iso226.iso226(1,[i*20 for i in range(1,bands+1)])
     self.loudness_curve = map(lambda i: i*0.1, self.loudness_curve)
Exemple #3
0
 def __init__(self, bands):
     self.freq = 0.01
     self.oscs = [0 for i in range(0, bands)]
     self.level = [0 for i in range(0, bands)]
     self.last_level = [0 for i in range(0, bands)]
     self.t = 0
     self.t_max = 10
     self.loudness_curve = iso226.iso226(
         1, [i * 20 for i in range(1, bands + 1)])
     self.loudness_curve = map(lambda i: i * 0.1, self.loudness_curve)
Exemple #4
0
def weighted_spectrogram_loss(y_true, y_pred):
    #Y_pred = tf.spectral.rfft(y_pred)
    Y_pred = tf.log(tf.abs(Y_pred) + 1e-6)
    # apply equal loudness contour weighting
    elc, _ = iso226(30, sampling_frequency, Y_data.shape[2] / 2)
    elc = 1 / elc
    elc = elc / np.max(elc)
    Y_data = Y_data / elc
    loss = tf.norm(y_true - Y_pred, axis=2)
    return loss
    '''
Exemple #5
0
    def render(self, out, mode):
        if len(self.blips) > 0:
            step = self.bar_length / len(self.blips)
            for i, b in enumerate(self.blips):
                # midi note to frequency
                p = pitch(b[0] + 69) * 4
                # make an event for this note
                self.events.append({
                    'pos': i * step,
                    'freq': p,
                    'tec': techno.techno(0.3 + b[1] * 0.5, 0.4),
                    'vol': iso226.iso226(90, p)
                })
            if mode == "TECHNO": env = 150
            else: env = 50
            print(self.blips)
            for i in range(0, self.bar_length):
                if self.pos < len(out):
                    for e in self.events:
                        if mode == "TECHNO":
                            s = 0.016 * e['tec'].generate(
                                self.pos / 44100.0 * e['freq']) * e['vol']
                        else:
                            s = 0.008 * math.sin(
                                self.pos / 44100.0 * e['freq']) * e['vol']

                        if i > e['pos'] and i <= e['pos'] + env:
                            env_lev = 1 - (e['pos'] + env - i) / float(env)
                            out[self.pos] += s * env_lev
                        if i > e['pos'] + env:
                            out[self.pos] += s
                            e['vol'] *= 0.9995
                    self.pos += 1
                if i % 50 == 0: time.sleep(0.3)

            # remove old events
            new_events = []
            for e in self.events:
                if e['vol'] > 0.001:
                    new_events.append(e)
            self.events = new_events
Exemple #6
0
def normiso(f0):
	x = iso226(ISOPHON, f0)
	return x - min(x)
Exemple #7
0
def load_data(data_path, datasplit_dict, mode, example_duration,
              time_window_duration, sampling_frequency, loss_domain,
              equal_loudness):

    print('[*] Loading data...', flush=True)

    if sampling_frequency == 44100:
        wav_path = os.path.join(data_path, 'TPD 44kHz')
    elif sampling_frequency == 22050:
        wav_path = os.path.join(data_path, 'TPD 22kHz')
    elif sampling_frequency == 11025:
        wav_path = os.path.join(data_path, 'TPD 11kHz')
    else:
        raise ValueError('sampling frequency not recognized!')

    twd_suffix = '_dt{:02.0f}'.format(time_window_duration * 1e3)
    midi_path = os.path.join(data_path, twd_suffix)

    X_data, Y_data, filenames = [], [], []

    # need to do some rounding because 50 ms at 11025 is 551.25 samples! (which
    # will not work)
    num_pts_per_window = int(
        np.round(time_window_duration * sampling_frequency))
    num_windows_per_example = int(example_duration / time_window_duration)
    num_pts_per_example = int(num_pts_per_window * num_windows_per_example)

    # mask filenames for specific mode
    all_filenames = np.array(datasplit_dict['filename'])
    if mode == 'train':
        idx = np.array(np.array(datasplit_dict['train']) > 0)
    elif mode == 'train_dev':
        idx = np.array(np.array(datasplit_dict['train_dev']) > 0)
    elif mode == 'test':
        idx = np.array(np.array(datasplit_dict['test']) > 0)
    else:
        raise ValueError('arg.mode not recognized!')

    all_filenames = all_filenames[idx]
    wav_filenames = [
        file + '_sf' + str(sampling_frequency) + '.wav'
        for file in all_filenames
    ]
    midi_filenames = [file + twd_suffix + '.mat' for file in all_filenames]
    wav_listing = [os.path.join(wav_path, file) for file in wav_filenames]
    midi_listing = [os.path.join(midi_path, file) for file in midi_filenames]

    print('number of files = ' + str(len(wav_filenames)) + '=' +
          str(len(midi_filenames)),
          flush=True)

    target_wav_files = []
    target_midi_files = []
    for midi_file in midi_listing:
        filename = midi_file.split('/')[-1].split(twd_suffix + '.mat')[0]
        corresponding_wav_file = os.path.join(
            wav_path, filename + '_sf' + str(sampling_frequency) + '.wav')
        if corresponding_wav_file in wav_listing:
            target_wav_files.append(corresponding_wav_file)
            target_midi_files.append(midi_file)

    # load the data and format them
    for i, midi_filepath in enumerate(target_midi_files):
        filename = midi_filepath.split('/')[-1].split(twd_suffix + '.mat')[0]
        print('   loading ' + filename, flush=True)
        # load the MIDI file
        data = scipy.io.loadmat(midi_filepath)
        X = data['Xin']
        num_examples_in_midi = np.ceil(X.shape[0] / num_windows_per_example)
        # load the wav file
        wav_filepath = target_wav_files[i]
        fs, Y = scipy.io.wavfile.read(wav_filepath)
        Y = Y.astype(float)
        Y = Y / 32768  # scipy.io.wavfile outputs values that are int16
        assert (fs == sampling_frequency)
        num_examples_in_wav = np.ceil(len(Y) / num_pts_per_example)
        # # make sure there will be the same number of examples from each file
        # assert(num_examples_in_midi == num_examples_in_wav)
        # pad both arrays
        pad_amount_X = int(
            num_examples_in_midi * num_windows_per_example) - X.shape[0]
        X = np.pad(X, ((0, pad_amount_X), (0, 0)), 'constant')
        if num_examples_in_midi >= num_examples_in_wav:
            pad_amount_Y = int(
                num_examples_in_midi * num_pts_per_example) - Y.shape[0]
            Y = np.pad(Y, ((0, pad_amount_Y)), 'constant')
        else:
            Y = Y[0:int(num_examples_in_midi * num_pts_per_example)]
        # create the examples
        for example_num in range(int(num_examples_in_midi)):
            example_x = X[example_num *
                          num_windows_per_example:(example_num + 1) *
                          num_windows_per_example, :]
            example_y = Y[example_num * num_pts_per_example:(example_num + 1) *
                          num_pts_per_example]
            example_y = np.reshape(example_y, (num_windows_per_example, -1))
            X_data.append(example_x)
            Y_data.append(example_y)
            filenames.append(filename)

    # change the list of examples into a matrix of [examples, time_windows, pitch_encoding/audio output]
    X_data = np.stack(X_data)
    Y_data = np.stack(Y_data)

    # train on the frequency domain loss function
    if loss_domain == 'frequency':
        #import pdb
        #pdb.set_trace()
        Y_data = np.fft.rfft(Y_data, axis=2)
        Y_data = np.concatenate((np.real(Y_data), np.imag(Y_data)), axis=2)
        #Y_data = np.log(np.abs(Y_data)+1e-6)

        if equal_loudness:
            # apply equal loudness contour weighting
            elc, _ = iso226(30, sampling_frequency, Y_data.shape[2] / 2)
            #elc = (10**(-np.concatenate((elc,elc),axis = 0))/20) # convert from dB and invert
            elc = 1 / elc
            elc = elc / np.max(elc)
            Y_data = Y_data / elc

    # error checks
    assert (X_data.shape[0] == Y_data.shape[0])
    assert (X_data.shape[1] == Y_data.shape[1])

    return X_data, Y_data, filenames