예제 #1
0
def build_cae_from_options_dict(a, a_lengths, b_lengths, options_dict):
    # Latent layer
    build_latent_func = tflego.build_autoencoder
    latent_func_kwargs = {
        "enc_n_hiddens": [],
        "n_z": options_dict["n_z"],
        "dec_n_hiddens": [options_dict["dec_n_hiddens"][0]],
        "activation": tf.nn.relu
    }

    # Network
    network_dict = tflego.build_multi_encdec_lazydynamic_latentfunc(
        a,
        a_lengths,
        options_dict["enc_n_hiddens"],
        options_dict["dec_n_hiddens"],
        build_latent_func,
        latent_func_kwargs,
        y_lengths=b_lengths,
        rnn_type=options_dict["rnn_type"],
        bidirectional=options_dict["bidirectional"],
        keep_prob=options_dict["keep_prob"],
        add_conditioning_tensor=None)

    encoder_states = network_dict["encoder_states"]
    ae = network_dict["latent_layer"]
    z = ae["z"]
    y = network_dict["decoder_output"]
    mask = network_dict["mask"]
    y *= tf.expand_dims(mask, -1)  # safety

    return {"z": z, "y": y, "mask": mask}
예제 #2
0
def build_cae_from_options_dict(a, a_lengths, b_lengths, options_dict):

    # Latent layer
    build_latent_func = tflego.build_autoencoder
    latent_func_kwargs = {
        "enc_n_hiddens": [],
        "n_z": options_dict["n_z"],
        "dec_n_hiddens": [options_dict["dec_n_hiddens"][0]],
        "activation": tf.nn.relu
    }

    # Speaker embedding
    if options_dict["d_speaker_embedding"] is not None:
        speaker_id = tf.placeholder(TF_ITYPE, [None])
        with tf.variable_scope("speaker_embedding"):
            speaker_embedding = tf.get_variable(
                "E", [
                    options_dict["n_speakers"],
                    options_dict["d_speaker_embedding"]
                ],
                dtype=TF_DTYPE,
                initializer=tf.contrib.layers.xavier_initializer())
            embedding_lookup = tf.nn.embedding_lookup(speaker_embedding,
                                                      speaker_id)

    # Network
    network_dict = tflego.build_multi_encdec_lazydynamic_latentfunc(
        a,
        a_lengths,
        options_dict["enc_n_hiddens"],
        options_dict["dec_n_hiddens"],
        build_latent_func,
        latent_func_kwargs,
        y_lengths=b_lengths,
        rnn_type=options_dict["rnn_type"],
        bidirectional=options_dict["bidirectional"],
        keep_prob=options_dict["keep_prob"],
        add_conditioning_tensor=None
        if options_dict["d_speaker_embedding"] is None else embedding_lookup)

    encoder_states = network_dict["encoder_states"]
    ae = network_dict["latent_layer"]
    z = ae["z"]
    y = network_dict["decoder_output"]
    mask = network_dict["mask"]
    y *= tf.expand_dims(mask, -1)  # safety

    if options_dict["d_speaker_embedding"] is not None:
        return {
            "z": z,
            "y": y,
            "mask": mask,
            "speaker_id": speaker_id,
            "speaker_embedding": speaker_embedding
        }
    else:
        return {"z": z, "y": y, "mask": mask}
def build_vae_from_options_dict(x, x_lengths, options_dict):
    build_latent_func = tflego.build_vae
    latent_func_kwargs = {
        "enc_n_hiddens": [],
        "n_z": options_dict["n_z"],
        "dec_n_hiddens": [options_dict["dec_n_hiddens"][0]],
        "activation": tf.nn.relu
    }
    network_dict = tflego.build_multi_encdec_lazydynamic_latentfunc(
        x,
        x_lengths,
        options_dict["enc_n_hiddens"],
        options_dict["dec_n_hiddens"],
        build_latent_func,
        latent_func_kwargs,
        rnn_type=options_dict["rnn_type"],
        keep_prob=options_dict["keep_prob"])
    network_dict["decoder_output"] *= tf.expand_dims(network_dict["mask"], -1)
    # safety
    return network_dict