Ejemplo n.º 1
0
  def __init__(self,
               fc_stack_layers=2,
               fc_stack_ch=256,
               rnn_ch=512,
               rnn_type='gru',
               n_harmonics=100,
               amp_scale_fn=ddsp.core.exp_sigmoid,
               f0_depth=64,
               hz_min=20.0,
               hz_max=1200.0,
               sample_rate=16000,
               name='sinusoidal_to_harmonic_encoder'):
    """Constructor."""
    super().__init__(name=name)
    self.n_harmonics = n_harmonics
    self.amp_scale_fn = amp_scale_fn
    self.f0_depth = f0_depth
    self.hz_min = hz_min
    self.hz_max = hz_max
    self.sample_rate = sample_rate

    # Layers.
    self.pre_rnn = nn.fc_stack(fc_stack_ch, fc_stack_layers)
    self.rnn = nn.rnn(rnn_ch, rnn_type)
    self.post_rnn = nn.fc_stack(fc_stack_ch, fc_stack_layers)

    self.amp_out = nn.dense(1)
    self.hd_out = nn.dense(n_harmonics)
    self.f0_out = nn.dense(f0_depth)
Ejemplo n.º 2
0
    def __init__(self,
                 rnn_channels=512,
                 rnn_type='gru',
                 ch=512,
                 layers_per_stack=3,
                 input_keys=('ld_scaled', 'f0_scaled', 'z'),
                 output_splits=(('amps', 1), ('harmonic_distribution', 40)),
                 name=None):
        super().__init__(output_splits=output_splits, name=name)
        stack = lambda: nn.fc_stack(ch, layers_per_stack)
        self.input_keys = input_keys

        # Layers.
        self.input_stacks = [stack() for k in self.input_keys]
        self.rnn = nn.rnn(rnn_channels, rnn_type)
        self.out_stack = stack()
        self.dense_out = nn.dense(self.n_out)

        # Backwards compatability.
        self.f_stack = self.input_stacks[0] if len(
            self.input_stacks) >= 1 else None
        self.l_stack = self.input_stacks[1] if len(
            self.input_stacks) >= 2 else None
        self.z_stack = self.input_stacks[2] if len(
            self.input_stacks) >= 3 else None
Ejemplo n.º 3
0
  def __init__(self,
               rnn_channels=512,
               rnn_type='gru',
               z_dims=32,
               z_time_steps=250,
               other_encoders=None,
               **kwargs):
    super().__init__(other_encoders=other_encoders, **kwargs)
    self.z_time_steps = z_time_steps

    # Layers.
    self.z_norm = nn.Normalize('instance')
    self.rnn = nn.temporal_cnn(rnn_channels, 10)
    self.dense_out = nn.dense(z_dims)
    self.frame_shape = (360, 640, 3)
    self.cv_net = tf.keras.applications.ResNet50V2(include_top=False, weights='imagenet', input_shape=self.frame_shape, pooling=None)
    #TODO(sclarke): Change this for fine tuning
    self.cv_net.trainable = True
    print('Vision network layer count: %i'%len(self.cv_net.layers))
    # for l in self.cv_net.layers:
    #   if not('block7' in l.name or 'top' in l.name):
    #     l.trainable = False
    self.final_layers = tf.keras.Sequential(layers=[
                          tf.keras.layers.GlobalAveragePooling2D(),
                        ])
Ejemplo n.º 4
0
  def __init__(self,
               size='large',
               f0_bins=128,
               spectral_fn=lambda x: spectral_ops.compute_mag(x, size=1024),
               name='resnet_f0_encoder'):
    super(ResnetF0Encoder, self).__init__(name=name)
    self.f0_bins = f0_bins
    self.spectral_fn = spectral_fn

    # Layers.
    self.resnet = nn.resnet(size=size)
    self.dense_out = nn.dense(f0_bins)
Ejemplo n.º 5
0
    def __init__(self,
                 output_splits=(('frequencies', 100 * 64), ('amplitudes', 100),
                                ('noise_magnitudes', 60)),
                 spectral_fn=spectral_ops.compute_logmel,
                 size='tiny',
                 name='resnet_sinusoidal_encoder'):
        super().__init__(name=name)
        self.output_splits = output_splits
        self.spectral_fn = spectral_fn

        # Layers.
        self.resnet = nn.resnet(size=size)
        self.dense_outs = [nn.dense(v[1]) for v in output_splits]
Ejemplo n.º 6
0
  def __init__(self,
               rnn_channels=512,
               rnn_type='gru',
               ch=512,
               layers_per_stack=3,
               output_splits=(('amps', 1), ('harmonic_distribution', 40)),
               name='rnn_fc_decoder'):
    super(RnnFcDecoder, self).__init__(output_splits=output_splits, name=name)
    stack = lambda: nn.fc_stack(ch, layers_per_stack)

    # Layers.
    self.f_stack = stack()
    self.l_stack = stack()
    self.rnn = nn.rnn(rnn_channels, rnn_type)
    self.out_stack = stack()
    self.dense_out = nn.dense(self.n_out)
Ejemplo n.º 7
0
    def __init__(self,
                 rnn_channels=512,
                 rnn_type="gru",
                 n_rnn=1,
                 ch=512,
                 layers_per_stack=3,
                 output_splits=(("amps", 1), ("harmonic_distribution", 40)),
                 name="f0_rnn_fc_decoder"):
        super().__init__(output_splits=output_splits, name=name)

        # Create layers.
        stack = lambda: nn.fc_stack(ch, layers_per_stack)
        self.f0_stack = stack()
        self.n_rnn = n_rnn
        self.rnn = [nn.rnn(rnn_channels, rnn_type)]
        for _ in range(self.n_rnn-1):
            self.rnn.append(nn.rnn(rnn_channels, rnn_type))
        self.out_stack = stack()
        self.dense_out = nn.dense(self.n_out)
Ejemplo n.º 8
0
    def __init__(self,
                 rnn_channels=512,
                 rnn_type='gru',
                 ch=512,
                 layers_per_stack=3,
                 append_f0_loudness=True,
                 output_splits=(('amps', 1), ('harmonic_distribution', 40)),
                 name=None):
        super().__init__(output_splits=output_splits, name=name)
        self.append_f0_loudness = append_f0_loudness
        stack = lambda: nn.fc_stack(ch, layers_per_stack)

        # Layers.
        self.f_stack = stack()
        self.l_stack = stack()
        self.z_stack = stack()
        self.rnn = nn.rnn(rnn_channels, rnn_type)
        self.out_stack = stack()
        self.dense_out = nn.dense(self.n_out)
Ejemplo n.º 9
0
  def __init__(self,
               rnn_channels=512,
               rnn_type='gru',
               z_dims=32,
               z_time_steps=250,
               f0_encoder=None,
               name='mfcc_time_distrbuted_rnn_encoder'):
    super(MfccTimeDistributedRnnEncoder, self).__init__(
        f0_encoder=f0_encoder, name=name)
    if z_time_steps not in [63, 125, 250, 500, 1000]:
      raise ValueError(
          '`z_time_steps` currently limited to 63,125,250,500 and 1000')
    self.z_audio_spec = {
        63: {
            'fft_size': 2048,
            'overlap': 0.5
        },
        125: {
            'fft_size': 1024,
            'overlap': 0.5
        },
        250: {
            'fft_size': 1024,
            'overlap': 0.75
        },
        500: {
            'fft_size': 512,
            'overlap': 0.75
        },
        1000: {
            'fft_size': 256,
            'overlap': 0.75
        }
    }
    self.fft_size = self.z_audio_spec[z_time_steps]['fft_size']
    self.overlap = self.z_audio_spec[z_time_steps]['overlap']

    # Layers.
    self.z_norm = nn.Normalize('instance')
    self.rnn = nn.rnn(rnn_channels, rnn_type)
    self.dense_out = nn.dense(z_dims)
Ejemplo n.º 10
0
    def __init__(self,
                 rnn_channels=512,
                 rnn_type="gru",
                 ch=512,
                 layers_per_stack=3,
                 input_keys=["f0_scaled", "osc_scaled"],
                 output_splits=(("amps", 1), ("harmonic_distribution", 40)),
                 name="multi_input_rnn_fc_decoder"):
        super().__init__(output_splits=output_splits, name=name)
        self.input_keys = input_keys
        stack = lambda: nn.fc_stack(ch, layers_per_stack)

        # Layers.
        self.stacks = []
        for _ in range(self.n_in):
            self.stacks.append(stack())
        rnn_channels = make_iterable(rnn_channels)
        self.n_rnn = len(rnn_channels)
        self.rnn = [nn.rnn(rnn_channels[0], rnn_type)]
        for i in range(self.n_rnn-1):
            self.rnn.append(nn.rnn(rnn_channels[i+1], rnn_type))
        self.out_stack = stack()
        self.dense_out = nn.dense(self.n_out)