Example #1
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        images = tf.placeholder(tf.float32, shape=[None, 48, 48, 1])
        bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[])
        x_tilde, z_e_x, z_q_x, z_i_x, z_nst_q_x, z_emb = create_vqvae(
            images, bn_flag)
        rec_loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images))
        vq_loss = tf.reduce_mean(
            tf.square(tf.stop_gradient(z_e_x) - z_nst_q_x))
        commit_loss = tf.reduce_mean(
            tf.square(z_e_x - tf.stop_gradient(z_nst_q_x)))
        beta = 0.25
        loss = rec_loss + vq_loss + beta * commit_loss
        params = get_params_dict()
        grads = tf.gradients(loss, params.values())

        learning_rate = 0.0002
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(grads) == len(params)
        j = [(g, p) for g, p in zip(grads, params.values())]
        train_step = optimizer.apply_gradients(j)

    things_names = [
        "images", "bn_flag", "x_tilde", "z_e_x", "z_q_x", "z_i_x", "z_emb",
        "loss", "rec_loss", "train_step"
    ]
    things_tf = [eval(name) for name in things_names]
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        images = tf.placeholder(tf.float32, shape=[None, 12, 6, 1])
        labels = tf.placeholder(tf.float32, shape=[None, 1])
        x_tilde = create_pixel_cnn(images, labels)
        loss = tf.reduce_mean(
            CategoricalCrossEntropyLinearIndexCost(x_tilde, images))
        #loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images))
        #loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=x_tilde, labels=images))
        #loss = tf.reduce_mean((x_tilde - images) ** 2)
        params = get_params_dict()
        grads = tf.gradients(loss, params.values())

        learning_rate = 0.0002
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(grads) == len(params)
        j = [(g, p) for g, p in zip(grads, params.values())]
        train_step = optimizer.apply_gradients(j)

    things_names = ["images", "labels", "x_tilde", "loss", "train_step"]
    things_tf = [eval(name) for name in things_names]
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model
Example #3
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        inputs = tf.placeholder(tf.float32, shape=[None, batch_size, 1])
        inputs_tm1 = inputs[:-1]
        inputs_t = inputs[1:]
        init_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        pred, hiddens, cells = create_model(inputs_tm1, inputs_t, init_hidden,
                                            init_cell)
        rec_loss = tf.reduce_mean(tf.square(pred - inputs_t))
        loss = rec_loss

        params = get_params_dict()
        grads = tf.gradients(loss, params.values())

        learning_rate = 0.0002
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(grads) == len(params)
        j = [(g, p) for g, p in zip(grads, params.values())]
        train_step = optimizer.apply_gradients(j)

    things_names = [
        "inputs", "inputs_tm1", "inputs_t", "init_hidden", "init_cell",
        "hiddens", "cells", "pred", "loss", "rec_loss", "train_step"
    ]
    things_tf = [
        inputs, inputs_tm1, inputs_t, init_hidden, init_cell, hiddens, cells,
        pred, loss, rec_loss, train_step
    ]
    assert len(things_names) == len(things_tf)
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model
Example #4
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        inputs = tf.placeholder(tf.float32, shape=[None, batch_size, n_inputs])
        targets = tf.placeholder(tf.float32,
                                 shape=[None, batch_size, n_inputs])
        init_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_q_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_q_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        r = create_model(inputs, init_hidden, init_cell, init_q_hidden,
                         init_q_cell)

        # reconstruction loss
        rec_loss = tf.reduce_mean(
            BernoulliCrossEntropyCost(tf.reshape(r.pred_sig, (-1, 1)),
                                      tf.reshape(targets, (-1, 1))))

        # h2h embedding losses
        alpha = 1.
        beta = 0.25
        vq_h_loss = tf.reduce_mean(
            tf.square(tf.stop_gradient(r.q_nvq_hiddens) - r.q_nst_hiddens))
        commit_h_loss = tf.reduce_mean(
            tf.square(r.q_nvq_hiddens - tf.stop_gradient(r.q_nst_hiddens)))

        # output embedding losses
        vq_o_loss = tf.reduce_mean(
            tf.square(tf.stop_gradient(r.q_nvq_out) - r.q_nst_out))
        commit_o_loss = tf.reduce_mean(
            tf.square(r.q_nvq_out - tf.stop_gradient(r.q_nst_out)))

        loss_r = rec_loss
        loss_h = alpha * vq_h_loss + beta * commit_h_loss
        loss_o = alpha * vq_o_loss + beta * commit_o_loss
        loss = loss_r + loss_h + loss_o

        params = get_params_dict()
        grads = tf.gradients(loss, params.values())
        learning_rate = 0.0001
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(grads) == len(params)
        grads = [
            tf.clip_by_value(g, -10., 10.) if g is not None else None
            for g in grads
        ]
        j = [(g, p) for g, p in zip(grads, params.values())]
        train_step = optimizer.apply_gradients(j)

    things_names = [
        "inputs", "targets", "init_hidden", "init_cell", "init_q_hidden",
        "init_q_cell", "loss", "rec_loss", "train_step"
    ]
    things_tf = [eval(name) for name in things_names]
    things_names += r._asdict().keys()
    things_tf += r._asdict().values()
    train_model = namedtuple('Model', things_names)(*things_tf)
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    return graph, train_model
Example #5
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        images = tf.placeholder(tf.float32, shape=[None, cut_len, 257, 1])
        bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[])
        x_tilde, z_e_x, z_q_x, z_i_x, z_emb = create_vqvae(images, bn_flag)
        rec_loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images))
        vq_loss = tf.reduce_mean(tf.square(tf.stop_gradient(z_e_x) - z_q_x))
        commit_loss = tf.reduce_mean(tf.square(z_e_x - tf.stop_gradient(z_q_x)))
        #rec_loss = tf.reduce_mean(tf.reduce_sum(BernoulliCrossEntropyCost(x_tilde, images), axis=[1, 2]))
        #vq_loss = tf.reduce_mean(tf.reduce_sum(tf.square(tf.stop_gradient(z_e_x) - z_q_x), axis=[1, 2, 3]))
        #commit_loss = tf.reduce_mean(tf.reduce_sum(tf.square(z_e_x - tf.stop_gradient(z_q_x)), axis=[1, 2, 3]))
        beta = 0.25
        loss = rec_loss + vq_loss + beta * commit_loss
        params = get_params_dict()

        enc_params = [params[k] for k in params.keys() if "enc" in k]
        dec_params = [params[k] for k in params.keys() if "dec" in k]
        emb_params = [params[k] for k in params.keys() if "embed" in k]

        dec_grads = list(zip(tf.gradients(loss, dec_params), dec_params))
        # scaled loss by alpha, but crank up vq loss grad
        # like having a higher lr only on embeds
        embed_grads = list(zip(tf.gradients(vq_loss, emb_params), emb_params))
        grad_z = tf.gradients(rec_loss, z_q_x)
        enc_grads = [(tf.gradients(z_e_x, p, grad_z)[0] + tf.gradients(beta * commit_loss, p)[0], p) for p in enc_params]

        learning_rate = 0.0002
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        train_step = optimizer.apply_gradients(dec_grads + enc_grads + embed_grads)

    things_names = ["images",
                    "bn_flag",
                    "x_tilde",
                    "z_e_x",
                    "z_q_x",
                    "z_i_x",
                    "z_emb",
                    "loss",
                    "rec_loss",
                    "train_step"]
    things_tf = [images,
                 bn_flag,
                 x_tilde,
                 z_e_x,
                 z_q_x,
                 z_i_x,
                 z_emb,
                 loss,
                 rec_loss,
                 train_step]
    assert len(things_names) == len(things_tf)
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model
Example #6
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        inputs = tf.placeholder(tf.float32, shape=[None, batch_size, 1])
        inputs_tm1 = inputs[:-1]
        inputs_t = inputs[1:]
        init_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_q_hidden = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        init_q_cell = tf.placeholder(tf.float32, shape=[batch_size, n_hid])
        r = create_model(inputs_tm1, inputs_t, init_hidden, init_cell,
                         init_q_hidden, init_q_cell)
        pred_sm, pred, hiddens, cells, q_hiddens, q_cells, q_nst_hiddens, q_nvq_hiddens, i_hiddens, oh_tm1 = r
        rec_loss = tf.reduce_mean(
            CategoricalCrossEntropyIndexCost(pred_sm, inputs_t))

        alpha = 1.
        beta = 0.25
        vq_h_loss = tf.reduce_mean(
            tf.square(tf.stop_gradient(q_nvq_hiddens) - q_nst_hiddens))
        commit_h_loss = tf.reduce_mean(
            tf.square(q_nvq_hiddens - tf.stop_gradient(q_nst_hiddens)))

        loss = rec_loss + alpha * vq_h_loss + beta * commit_h_loss

        params = get_params_dict()
        grads = tf.gradients(loss, params.values())
        learning_rate = 0.0001
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(grads) == len(params)
        grads = [
            tf.clip_by_value(g, -10., 10.) if g is not None else None
            for g in grads
        ]
        j = [(g, p) for g, p in zip(grads, params.values())]
        train_step = optimizer.apply_gradients(j)

    things_names = [
        "inputs", "inputs_tm1", "inputs_t", "init_hidden", "init_cell",
        "init_q_hidden", "init_q_cell", "hiddens", "cells", "q_hiddens",
        "q_cells", "q_nvq_hiddens", "i_hiddens", "pred", "pred_sm", "oh_tm1",
        "loss", "rec_loss", "train_step"
    ]
    things_tf = [
        inputs, inputs_tm1, inputs_t, init_hidden, init_cell, init_q_hidden,
        init_q_cell, hiddens, cells, q_hiddens, q_cells, q_nvq_hiddens,
        i_hiddens, pred, pred_sm, oh_tm1, loss, rec_loss, train_step
    ]
    assert len(things_names) == len(things_tf)
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model
Example #7
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        # THIS ONE HAS TO HAVE SHAPE
        inputs = tf.placeholder(tf.float32,
                                shape=[word_length_limit, batch_size, 1])
        #outputs = tf.placeholder(tf.float32, shape=[word_length_limit + 1, batch_size, 1])
        #outputs_masks = tf.placeholder(tf.float32, shape=[word_length_limit + 1, batch_size])
        # THESE DO NOT
        outputs = tf.placeholder(tf.float32, shape=[None, batch_size, 1])
        outputs_masks = tf.placeholder(tf.float32, shape=[None, batch_size])
        pred_logits, enc_atts, dec_atts = create_model(inputs, outputs)
        enc_atts_0 = enc_atts[0]
        enc_atts_1 = enc_atts[1]
        enc_atts_2 = enc_atts[2]
        dec_atts_0 = dec_atts[0]
        dec_atts_1 = dec_atts[1]
        dec_atts_2 = dec_atts[2]
        loss_i = CategoricalCrossEntropyLinearIndexCost(
            pred_logits[:-1], outputs[1:])
        loss = tf.reduce_sum(outputs_masks[:-1] * loss_i) / tf.reduce_sum(
            outputs_masks[1:])

        params = get_params_dict()
        grads = tf.gradients(loss, params.values())

        learning_rate = 0.0002
        optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(grads) == len(params)
        j = [(g, p) for g, p in zip(grads, params.values())]
        train_step = optimizer.apply_gradients(j)

    things_names = [
        "inputs", "outputs", "outputs_masks", "pred_logits", "enc_atts_0",
        "enc_atts_1", "enc_atts_2", "dec_atts_0", "dec_atts_1", "dec_atts_2",
        "loss", "train_step"
    ]
    things_tf = [eval(tn) for tn in things_names]
    assert len(things_names) == len(things_tf)
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model
def step(inp_t, h_tm1):
    output, state = SimpleRNNCell([inp_t], [3],
                                  h_tm1,
                                  h_dim,
                                  20,
                                  random_state=random_state,
                                  name="l1")
    h = state[0]
    return output, h


o = scan(step, [inputs], [None, init_h])
loss = tf.reduce_mean(o[0])
h_o = o[1]

params_dict = get_params_dict()
params = params_dict.values()
grads = tf.gradients(loss, params)

learning_rate = 0.0002
opt = tf.train.AdamOptimizer(learning_rate=learning_rate, use_locking=True)
updates = opt.apply_gradients(zip(grads, params))

inputs_np = random_state.randn(33, n_batch, 3)
init_h_np = np.zeros((n_batch, h_dim))
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    feed = {inputs: inputs_np, init_h: init_h_np}
    outs = [loss, updates, h_o]
    lop = sess.run(outs, feed)
Example #9
0
def create_graph():
    graph = tf.Graph()
    with graph.as_default():
        # vqvae part
        # define all the vqvae inputs and outputs
        vqvae_inputs = tf.placeholder(tf.float32,
                                      shape=[
                                          None, train_audio[0].shape[0],
                                          train_audio[0].shape[1],
                                          train_audio[0].shape[2]
                                      ])
        bn_flag = tf.placeholder_with_default(tf.zeros(shape=[]), shape=[])
        x_tilde, z_e_x, z_q_x, z_i_x, z_nst_q_x, z_emb = create_vqvae(
            vqvae_inputs, bn_flag)

        #rec_loss = tf.reduce_mean(BernoulliCrossEntropyCost(x_tilde, images))
        vqvae_rec_loss = tf.reduce_mean(tf.square(x_tilde - vqvae_inputs))
        vqvae_vq_loss = tf.reduce_mean(
            tf.square(tf.stop_gradient(z_e_x) - z_nst_q_x))
        vqvae_commit_loss = tf.reduce_mean(
            tf.square(z_e_x - tf.stop_gradient(z_nst_q_x)))
        vqvae_alpha = 1.
        vqvae_beta = 0.25
        vqvae_loss = vqvae_rec_loss + vqvae_alpha * vqvae_vq_loss + vqvae_beta * vqvae_commit_loss
        vqvae_params = get_params_dict()
        # get vqvae keys now, dict is *dynamic* and shared
        vqvae_params_keys = [k for k in vqvae_params.keys()]
        vqvae_grads = tf.gradients(vqvae_loss, vqvae_params.values())

        learning_rate = 0.0002
        vqvae_optimizer = tf.train.AdamOptimizer(learning_rate,
                                                 use_locking=True)
        assert len(vqvae_grads) == len(vqvae_params)
        j = [(g, p) for g, p in zip(vqvae_grads, vqvae_params.values())]
        vqvae_train_step = vqvae_optimizer.apply_gradients(j)

        # rnn part
        # ultimately we will use 2 calls to feed_dict to make lookup mappings easier, but could do it like this
        #rnn_inputs = tf.cast(tf.stop_gradient(tf.transpose(z_i_x, (2, 0, 1))), tf.float32)
        rnn_inputs = tf.placeholder(tf.float32,
                                    shape=[None, rnn_batch_size, 1])
        rnn_inputs_tm1 = rnn_inputs[:-1]
        rnn_inputs_t = rnn_inputs[1:]

        init_hidden = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid])
        init_cell = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid])
        init_q_hidden = tf.placeholder(tf.float32,
                                       shape=[rnn_batch_size, n_hid])
        init_q_cell = tf.placeholder(tf.float32, shape=[rnn_batch_size, n_hid])
        r = create_vqrnn(rnn_inputs_tm1, rnn_inputs_t, init_hidden, init_cell,
                         init_q_hidden, init_q_cell)
        pred_sm, pred, hiddens, cells, q_hiddens, q_cells, q_nst_hiddens, q_nvq_hiddens, i_hiddens, oh_tm1 = r

        rnn_rec_loss = tf.reduce_mean(
            CategoricalCrossEntropyIndexCost(pred_sm, rnn_inputs_t))
        #rnn_rec_loss = tf.reduce_mean(CategoricalCrossEntropyLinearIndexCost(pred, rnn_inputs_t))

        rnn_alpha = 1.
        rnn_beta = 0.25
        rnn_vq_h_loss = tf.reduce_mean(
            tf.square(tf.stop_gradient(q_nvq_hiddens) - q_nst_hiddens))
        rnn_commit_h_loss = tf.reduce_mean(
            tf.square(q_nvq_hiddens - tf.stop_gradient(q_nst_hiddens)))

        rnn_loss = rnn_rec_loss + rnn_alpha * rnn_vq_h_loss + rnn_beta * rnn_commit_h_loss
        rnn_params = {
            k: v
            for k, v in get_params_dict().items() if k not in vqvae_params_keys
        }
        rnn_grads = tf.gradients(rnn_loss, rnn_params.values())
        learning_rate = 0.0001
        rnn_optimizer = tf.train.AdamOptimizer(learning_rate, use_locking=True)
        assert len(rnn_grads) == len(rnn_params)
        rnn_grads = [
            tf.clip_by_value(g, -10., 10.) if g is not None else None
            for g in rnn_grads
        ]
        j = [(g, p) for g, p in zip(rnn_grads, rnn_params.values())]
        rnn_train_step = rnn_optimizer.apply_gradients(j)

    things_names = [
        "vqvae_inputs", "bn_flag", "x_tilde", "z_e_x", "z_q_x", "z_i_x",
        "z_emb", "vqvae_loss", "vqvae_rec_loss", "vqvae_train_step",
        "rnn_inputs", "rnn_inputs_tm1", "rnn_inputs_t", "init_hidden",
        "init_cell", "init_q_hidden", "init_q_cell", "hiddens", "cells",
        "q_hiddens", "q_cells", "q_nvq_hiddens", "i_hiddens", "pred",
        "pred_sm", "oh_tm1", "rnn_loss", "rnn_rec_loss", "rnn_train_step"
    ]

    things_tf = [eval(name) for name in things_names]
    for tn, tt in zip(things_names, things_tf):
        graph.add_to_collection(tn, tt)
    train_model = namedtuple('Model', things_names)(*things_tf)
    return graph, train_model