def call(self, z_pitch, z_vel, z=None): """Forward pass for the MIDI decoder. Args: z_pitch: Tensor containing encoded pitch in MIDI scale. [batch, time, 1]. z_vel: Tensor containing encoded velocity in MIDI scale. [batch, time, 1]. z: Additional non-MIDI latent tensor. [batch, time, n_z] Returns: A dictionary to feed into a processor group. """ # pylint: disable=unused-argument # x = tf.concat([z_pitch, z_vel], axis=-1) # TODO(jesse): Allow velocity. x = z_pitch x = self.net(x) if z is None else self.net([x, z]) if self.norm is not None: x = self.norm(x) x = self.dense_out(x) outputs = nn.split_to_dict(x, self.output_splits) if self.f0_residual: outputs['f0_midi'] += z_pitch outputs['f0_hz'] = core.midi_to_hz(outputs['f0_midi']) return outputs
def test_midi_to_hz_is_accurate(self): """Tests converting between MIDI values and their frequencies in hertz.""" midi = np.arange(128) librosa_hz = librosa.midi_to_hz(midi) with self.cached_session() as sess: tf_hz = sess.run(core.midi_to_hz(midi)) self.assertAllClose(librosa_hz, tf_hz)
def _add_f0_from_midi(ex, midi_notes): """Add fundamental frequency (f0) estimate.""" beam.metrics.Metrics.counter('prepare-tfrecord', 'compute-f0').inc() # TODO: hardcoded, but at some point should be read from a file f0_hz_midi = core.midi_to_hz(midi_notes).numpy() ex = dict(ex) ex.update({'f0_hz_midi': f0_hz_midi.astype(np.float32)}) return ex
def _add_f0(ex): """Add fundamental frequency (f0) estimate.""" beam.metrics.Metrics.counter('prepare-tfrecord', 'compute-f0').inc() # TODO: hardcoded, but at some point should be read from a file f0_hz = core.midi_to_hz([48, 52, 55, 60, 64, 67, 72, 76, 79, 84, 88, 91]).numpy() ex = dict(ex) ex.update({'f0_hz': f0_hz.astype(np.float32)}) return ex
def _un_processing(self, features): # features['f0_scaled'] = hz_to_midi(features['f0_hz']) / F0_RANGE # to scale, 1. hztomidi, 2. divide by f0range # to unscale, 1. * f0range, 2. miditohz features['f0_hz'] = midi_to_hz(features['f0_scaled'] * F0_RANGE) # features['ld_scaled'] = (features['loudness_db'] / LD_RANGE) + 1.0 # to scale, 1. / ldrange, 2. + 1 # to unscale, 1. -1, 2. * ldrange features['loudness_db'] = (features['ld_scaled'] - 1) * LD_RANGE return features
def test_harmonic_to_sinusoidal(self): f0_hz = core.midi_to_hz([80, 81, 82, 81, 80])[np.newaxis, :, np.newaxis] harm_amps = np.ones(shape=(1, 5, 3)) harm_amps /= np.sum(harm_amps, axis=-1, keepdims=True) amps, sin_freqs = core.harmonic_to_sinusoidal(10, harm_amps, f0_hz) sin_freqs = np.squeeze(sin_freqs) f0_hz = np.squeeze(f0_hz) self.assertAllClose(amps, harm_amps * 10) self.assertAllClose(sin_freqs[..., 0], f0_hz) self.assertAllClose(sin_freqs[..., 1], f0_hz * 2) self.assertAllClose(sin_freqs[..., 2], f0_hz * 3)
def test_midi_to_hz_is_accurate(self): """Tests converting between MIDI values and their frequencies in hertz.""" midi = np.arange(128) librosa_hz = librosa.midi_to_hz(midi) tf_hz = core.midi_to_hz(midi) self.assertAllClose(librosa_hz, tf_hz)