Ejemplo n.º 1
0
    def _create_iaf(self, inputs, iaf_idx):
        num_stages = self.hparams.num_stages
        num_layers = self.hparams.num_iaf_layers[iaf_idx]
        filter_length = self.hparams.filter_length
        width = self.hparams.width
        out_width = self.out_width
        deconv_width = self.hparams.deconv_width
        deconv_config = self.hparams.deconv_config  # [[l1, s1], [l2, s2]]

        mel = inputs['mel']
        x = inputs['x']

        iaf_name = 'iaf_{:d}'.format(iaf_idx + 1)

        mel_en = wavenet._deconv_stack(mel,
                                       deconv_width,
                                       deconv_config,
                                       name=iaf_name)

        l = masked.shift_right(x)
        l = masked.conv1d(l,
                          num_filters=width,
                          filter_length=filter_length,
                          name='{}/start_conv'.format(iaf_name))

        for i in range(num_layers):
            dilation = 2**(i % num_stages)
            d = masked.conv1d(l,
                              num_filters=2 * width,
                              filter_length=filter_length,
                              dilation=dilation,
                              name='{}/dilated_conv_{:d}'.format(
                                  iaf_name, i + 1))
            c = masked.conv1d(mel_en,
                              num_filters=2 * width,
                              filter_length=1,
                              name='{}/mel_cond_{:d}'.format(iaf_name, i + 1))
            d = wavenet._condition(d, c)

            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d_sigmoid = tf.sigmoid(d[:, :, :m])
            d_tanh = tf.tanh(d[:, :, m:])
            d = d_sigmoid * d_tanh

            l += masked.conv1d(d,
                               num_filters=width,
                               filter_length=1,
                               name='{}/res_{:d}'.format(iaf_name, i + 1))

        l = tf.nn.relu(l)
        l = masked.conv1d(l,
                          num_filters=width,
                          filter_length=1,
                          name='{}/out1'.format(iaf_name))
        c = masked.conv1d(mel_en,
                          num_filters=width,
                          filter_length=1,
                          name='{}/mel_cond_out1'.format(iaf_name))
        l = wavenet._condition(l, c)
        l = tf.nn.relu(l)
        out = masked.conv1d(l,
                            num_filters=out_width,
                            filter_length=1,
                            name='{}/out2'.format(iaf_name))
        mean, scale = tf.split(out, num_or_size_splits=2, axis=2)
        scale = tf.clip_by_value(scale, tf.exp(-7.0), tf.exp(7.0))
        new_x = x * scale + mean
        return {'x': new_x, 'mean': mean, 'scale': scale}
Ejemplo n.º 2
0
    def _create_iaf(self, inputs, iaf_idx, init):
        num_stages = self.hparams.num_stages
        num_layers = self.hparams.num_iaf_layers[iaf_idx]
        filter_length = self.hparams.filter_length
        width = self.hparams.width
        out_width = self.out_width
        deconv_width = self.hparams.deconv_width
        deconv_config = self.hparams.deconv_config  # [[l1, s1], [l2, s2]]
        use_weight_norm = self.use_weight_norm
        use_resize_conv = self.use_resize_conv
        upsample_act = self.upsample_act
        gate_width = width
        final_init, final_bias = PWNHelper.manual_finit_or_not_fn(
            init, iaf_idx)

        mel = inputs['mel']
        x = inputs['x']

        iaf_name = 'iaf_{:d}'.format(iaf_idx + 1)

        mel_en = wavenet._deconv_stack(mel,
                                       deconv_width,
                                       deconv_config,
                                       act=upsample_act,
                                       use_resize_conv=use_resize_conv,
                                       name=iaf_name,
                                       use_weight_norm=use_weight_norm,
                                       init=init)

        l = masked.shift_right(x)
        l = masked.conv1d(l,
                          num_filters=width,
                          filter_length=filter_length,
                          name='{}/start_conv'.format(iaf_name),
                          use_weight_norm=use_weight_norm,
                          init=init)

        for i in range(num_layers):
            dilation = 2**(i % num_stages)
            d = masked.conv1d(l,
                              num_filters=gate_width,
                              filter_length=filter_length,
                              dilation=dilation,
                              name='{}/dilated_conv_{:d}'.format(
                                  iaf_name, i + 1),
                              use_weight_norm=use_weight_norm,
                              init=init)
            c = masked.conv1d(mel_en,
                              num_filters=gate_width,
                              filter_length=1,
                              name='{}/mel_cond_{:d}'.format(iaf_name, i + 1),
                              use_weight_norm=use_weight_norm,
                              init=init)
            d = wavenet._condition(d, c)

            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d_sigmoid = tf.sigmoid(d[:, :, :m])
            d_tanh = tf.tanh(d[:, :, m:])
            d = d_sigmoid * d_tanh

            l += masked.conv1d(d,
                               num_filters=width,
                               filter_length=1,
                               name='{}/res_{:d}'.format(iaf_name, i + 1),
                               use_weight_norm=use_weight_norm,
                               init=init)

        l = tf.nn.relu(l)
        l = masked.conv1d(l,
                          num_filters=width,
                          filter_length=1,
                          name='{}/out1'.format(iaf_name),
                          use_weight_norm=use_weight_norm,
                          init=init)
        c = masked.conv1d(mel_en,
                          num_filters=width,
                          filter_length=1,
                          name='{}/mel_cond_out1'.format(iaf_name),
                          use_weight_norm=use_weight_norm,
                          init=init)
        l = wavenet._condition(l, c)
        l = tf.nn.relu(l)

        mean = masked.conv1d(l,
                             num_filters=out_width // 2,
                             filter_length=1,
                             name='{}/out2_mean'.format(iaf_name),
                             use_weight_norm=use_weight_norm,
                             init=final_init)
        scale_params = masked.conv1d(
            l,
            num_filters=out_width // 2,
            filter_length=1,
            name='{}/out2_scale'.format(iaf_name),
            use_weight_norm=use_weight_norm,
            init=final_init,
            biases_initializer=tf.constant_initializer(final_bias))

        scale, log_scale = PWNHelper.scale_log_scale_fn(scale_params)
        new_x = x * scale + mean

        if DETAIL_LOG:
            tf.summary.scalar('scale_{}'.format(iaf_idx),
                              tf.reduce_mean(scale))
            tf.summary.scalar('log_scale_{}'.format(iaf_idx),
                              tf.reduce_mean(log_scale))
            tf.summary.scalar('mean_{}'.format(iaf_idx), tf.reduce_mean(mean))

        return {
            'x': new_x,
            'mean': mean,
            'scale': scale,
            'log_scale': log_scale
        }
Ejemplo n.º 3
0
    def _create_iaf(self, inputs, iaf_idx):
        num_stages = self.hparams.num_stages
        num_layers = self.hparams.num_iaf_layers[iaf_idx]
        filter_length = self.hparams.filter_length
        width = self.hparams.width
        out_width = self.out_width
        deconv_width = self.hparams.deconv_width
        deconv_config = self.hparams.deconv_config  # [[l1, s1], [l2, s2]]
        use_log_scale = getattr(self.hparams, 'use_log_scale', True)

        mel = inputs['mel']
        x = inputs['x']

        iaf_name = 'iaf_{:d}'.format(iaf_idx + 1)

        mel_en = wavenet._deconv_stack(
            mel, deconv_width, deconv_config, name=iaf_name)

        l = masked.shift_right(x)
        l = masked.conv1d(l, num_filters=width, filter_length=filter_length,
                          name='{}/start_conv'.format(iaf_name))

        for i in range(num_layers):
            dilation = 2 ** (i % num_stages)
            d = masked.conv1d(
                l,
                num_filters=2 * width,
                filter_length=filter_length,
                dilation=dilation,
                name='{}/dilated_conv_{:d}'.format(iaf_name, i + 1))
            c = masked.conv1d(
                mel_en,
                num_filters=2 * width,
                filter_length=1,
                name='{}/mel_cond_{:d}'.format(iaf_name, i + 1))
            d = wavenet._condition(d, c)

            assert d.get_shape().as_list()[2] % 2 == 0
            m = d.get_shape().as_list()[2] // 2
            d_sigmoid = tf.sigmoid(d[:, :, :m])
            d_tanh = tf.tanh(d[:, :, m:])
            d = d_sigmoid * d_tanh

            l += masked.conv1d(d, num_filters=width, filter_length=1,
                               name='{}/res_{:d}'.format(iaf_name, i + 1))

        l = tf.nn.relu(l)
        l = masked.conv1d(l, num_filters=width, filter_length=1,
                          name='{}/out1'.format(iaf_name))
        c = masked.conv1d(mel_en, num_filters=width, filter_length=1,
                          name='{}/mel_cond_out1'.format(iaf_name))
        l = wavenet._condition(l, c)
        l = tf.nn.relu(l)

        # to keep the scale in a reasonable small range if use_log_scale=True.
        final_kernel_init = (tf.truncated_normal_initializer(0., 0.01) if use_log_scale
                             else tf.uniform_unit_scaling_initializer(1.0))
        out = masked.conv1d(l, num_filters=out_width, filter_length=1,
                            name='{}/out2'.format(iaf_name),
                            kernel_initializer=final_kernel_init)
        mean, scale_params = tf.split(out, num_or_size_splits=2, axis=2)
        if use_log_scale:
            log_scale = tf.clip_by_value(scale_params, -9.0, 7.0)
            scale = tf.exp(log_scale)
        else:
            scale_params = tf.nn.softplus(scale_params)
            scale = tf.clip_by_value(scale_params, tf.exp(-9.0), tf.exp(7.0))
            log_scale = tf.log(scale)
        new_x = x * scale + mean

        if DETAIL_LOG:
            tf.summary.scalar('scale_{}'.format(iaf_idx), tf.reduce_mean(scale))
            tf.summary.scalar('log_scale_{}'.format(iaf_idx), tf.reduce_mean(log_scale))
            tf.summary.scalar('mean_{}'.format(iaf_idx), tf.reduce_mean(mean))

        return {'x': new_x,
                'mean': mean,
                'scale': scale,
                'log_scale': log_scale}