Ejemplo n.º 1
0
    def __init__(self, hp, max_to_keep=5):
        self.hp = hp

        dilations_factor = hp.layers // hp.stacks
        dilations = [
            2**i for j in range(hp.stacks) for i in range(dilations_factor)
        ]

        self.upsample_factor = hp.upsample_factor
        self.gc_enable = hp.gc_enable
        global_condition_channels = None
        global_condition_cardinality = None
        if hp.gc_enable:
            global_condition_channels = hp.global_channel
            global_condition_cardinality = hp.global_cardinality

        scalar_input = hp.input_type == "raw"
        quantization_channels = hp.quantize_channels[hp.input_type]
        if scalar_input:
            quantization_channels = None

        with tf.variable_scope('vocoder'):
            self.net = WaveNetModel(
                batch_size=hp.batch_size,
                dilations=dilations,
                filter_width=hp.filter_width,
                scalar_input=scalar_input,
                initial_filter_width=hp.initial_filter_width,
                residual_channels=hp.residual_channels,
                dilation_channels=hp.dilation_channels,
                quantization_channels=quantization_channels,
                out_channels=hp.out_channels,
                skip_channels=hp.skip_channels,
                global_condition_channels=global_condition_channels,
                global_condition_cardinality=global_condition_cardinality,
                use_biases=True,
                local_condition_channels=hp.n_mel_bins)

            if hp.upsample_conditional_features:
                with tf.variable_scope('upsample_layer') as upsample_scope:
                    layer = dict()
                    for i in range(len(hp.upsample_factor)):
                        shape = [hp.upsample_factor[i], hp.filter_width, 1, 1]
                        weights = np.ones(shape) * 1 / float(
                            hp.upsample_factor[i])
                        init = tf.constant_initializer(value=weights,
                                                       dtype=tf.float32)
                        variable = tf.get_variable(name='upsample{}'.format(i),
                                                   initializer=init,
                                                   shape=weights.shape)
                        layer['upsample{}_filter'.format(i)] = variable
                        layer['upsample{}_bias'.format(
                            i)] = create_bias_variable(
                                'upsample{}_bias'.format(i), [1])

                    self.upsample_var = layer
                    self.upsample_scope = upsample_scope

        self.saver = tf.train.Saver(var_list=tf.trainable_variables(),
                                    max_to_keep=max_to_keep)
Ejemplo n.º 2
0
def create_wavenet(args, wavenet_params):
    # Create network.
    net = WaveNetModel(
        batch_size=args.batch_size,
        dilations=wavenet_params["dilations"],
        filter_width=wavenet_params["filter_width"],
        residual_channels=wavenet_params["residual_channels"],
        dilation_channels=wavenet_params["dilation_channels"],
        skip_channels=wavenet_params["skip_channels"],
        quantization_channels=wavenet_params["quantization_channels"],
        use_biases=wavenet_params["use_biases"],
        scalar_input=wavenet_params["scalar_input"],
        initial_filter_width=wavenet_params["initial_filter_width"],
    )

    if args.l2_regularization_strength == 0:
        args.l2_regularization_strength = None

    return net
Ejemplo n.º 3
0
    def __init__(self,
                 batch_size=None,
                 sample_size=None,
                 q_factor=1,
                 n_stack=2,
                 max_dilation=10,
                 K=512,
                 D=128,
                 lr=0.001,
                 use_gc=False,
                 gc_cardinality=None,
                 is_training=True,
                 global_step=None,
                 scope='params',
                 residual_channels=256,
                 dilation_channels=512,
                 skip_channels=256,
                 use_biases=False,
                 upsampling_method='deconv',
                 encoding_channels=[2, 4, 8, 16, 32, 1]):

        assert sample_size is not None
        assert q_factor == 1 or (q_factor % 2) == 0

        self.filter_width = 2
        self.dilations = [
            2**i for j in range(n_stack) for i in range(max_dilation)
        ]
        self.receptive_field = (self.filter_width - 1) * sum(
            self.dilations) + 1
        self.receptive_field += self.filter_width - 1

        self.q_factor = q_factor
        self.quantization_channels = 256 * q_factor

        self.K = K
        self.D = D
        self.use_gc = use_gc
        self.gc_cardinality = gc_cardinality
        self.use_biases = use_biases

        # encoding spec
        self.encode_level = 6
        self.encoding_channels = encoding_channels

        # model spec
        self.upsampling_method = upsampling_method
        self.is_training = is_training
        self.train_op = None
        self.batch_size = batch_size
        self.sample_size = sample_size
        self.reduced_timestep = None
        self.initialized = False
        if batch_size is not None and sample_size is not None:
            self.reduced_timestep = int(
                np.ceil(self.sample_size / 2**self.encode_level))
            self.initialized = True

        # etc
        self.drop_rate = 0.5
        self.global_step = global_step
        self.lr = lr

        with tf.variable_scope(scope) as params:
            self.enc_var, self.enc_scope = self.create_encoder_variables()
            with tf.variable_scope('decoder') as dec_param_scope:

                self.deconv_var = self.create_deconv_variables()
                self.wavenet = WaveNetModel(
                    batch_size=batch_size,
                    dilations=self.dilations,
                    filter_width=self.filter_width,
                    residual_channels=residual_channels,
                    dilation_channels=dilation_channels,
                    quantization_channels=self.quantization_channels,
                    skip_channels=skip_channels,
                    global_condition_channels=gc_cardinality,
                    global_condition_cardinality=gc_cardinality,
                    use_biases=use_biases)

                self.dec_scope = dec_param_scope

            with tf.variable_scope('embed'):
                init = tf.truncated_normal_initializer(stddev=0.01)
                #                 init = tf.constant_initializer(value=np.random.random((self.K, self.D)), dtype=tf.float32)
                self.embeds = tf.get_variable('embedding', [self.K, self.D],
                                              dtype=tf.float32,
                                              initializer=init)

        self.param_scope = params
        self.saver = None
        self.set_saver()