예제 #1
0
def plr_slr(bs_seq_len_list):
    """Given a list of pairs (batch size, seq_len), 
    calculate the throughput of an LS-LSTM, an SRU, a QRNN(2),
    and QRNN(10) using the parallel kernel as opposed to the serial
    one"""
    import tensorflow as tf
    import numpy as np
    import scipy.io.wavfile
    from tensorflow.contrib import rnn
    import math
    from layers_new import linear_surrogate_lstm
    from layers_new import s_linear_surrogate_lstm
    from layers_new import SRU
    from layers_new import s_SRU
    from layers_new import QRNN
    from layers_new import s_QRNN
    import time
    import os
    import random

    throughput_list = []

    #TODO:
    #Make LS_LSTM with PLR
    #Make SRU with PLR
    #Make QRNN with PLR
    #Make LS_LSTM with SLR
    #Make SRU with SLR
    #Make QRNN with SLR

    for seq_len in seq_len_list:
        #First generate the LS-LSTM and work out the throughput
        tf.reset_default_graph()
        n_hidden = 256
        n_classes = 2
        n_steps = seq_len
        batch_size = 65536 / seq_len
        bs = batch_size
        print "Batch size is {} and sequence length is {}".format(bs, seq_len)
        n_input = 24
        n_layers = 2
        forget_gate_init = 1.0  # = 1/(n_in). We use uniform p(x)
        #Training Parameters
        sn = 1.0 / math.sqrt(n_hidden)
        learning_rate = 0.001
        training_iters = 5000000

        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal(
                                 [n_hidden, n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')

        layer1 = linear_surrogate_lstm(x, n_hidden, name='ls-lstm')
        outputs = linear_surrogate_lstm(layer1, n_hidden, name='ls-lstm2')
        pred = tf.matmul(outputs[-1], W1) + b1
        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        ls_lstm_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal(
                                 [n_hidden, n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = s_linear_surrogate_lstm(x, n_hidden, name='ls-lstm')
        output = s_linear_surrogate_lstm(layer1, n_hidden, name='ls-lstm')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        s_ls_lstm_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = SRU(x, name='SRU_1')
        output = SRU(layer1, name='SRU_2')
        pred = tf.matmul(output[-1], W1) + b1

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal(
                                 [n_hidden, n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = s_linear_surrogate_lstm(x, n_hidden, name='ls-lstm')
        output = s_linear_surrogate_lstm(layer1, n_hidden, name='ls-lstm')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        s_ls_lstm_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = SRU(x, name='SRU_1')
        output = SRU(layer1, name='SRU_2')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        sru_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = s_SRU(x, name='s_SRU_1')
        output = s_SRU(layer1, name='s_SRU_2')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        s_sru_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = QRNN(x, 2, name='QRNN_1')
        output = QRNN(layer1, 2, name='QRNN_2')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        qrnn_2_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = s_QRNN(x, 2, name='s_QRNN_3')
        output = s_QRNN(layer1, 2, name='s_QRNN_4')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        s_qrnn_2_tp = (bs * n_steps) / np.mean(times)
        print np.mean(times)
        print np.std(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = QRNN(x, 10, name='QRNN_2')
        output = QRNN(layer1, 10, name='QRNN_6')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        qrnn_10_tp = (bs * n_steps) / np.mean(times)

        tf.reset_default_graph()
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal([n_input,
                                                           n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')
        layer1 = s_QRNN(x, 10, name='s_QRNN_7')
        output = s_QRNN(layer1, 10, name='s_QRNN_8')
        pred = tf.matmul(output[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    step += 1
                    if step != 0:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
        s_qrnn_10_tp = (bs * n_steps) / np.mean(times)

        throughput_list.append([
            ls_lstm_tp, s_ls_lstm_tp, sru_tp, s_sru_tp, qrnn_2_tp, s_qrnn_2_tp,
            qrnn_10_tp, s_qrnn_10_tp
        ])
    return throughput_list
예제 #2
0
def random_test(bs_seq_len_list):
    """Given a list of pairs (batch size, seq_len), 
    calculate the throughput of an LS-LSTM vs a cudnn on 
    random data"""
    import tensorflow as tf
    import numpy as np
    import scipy.io.wavfile
    from tensorflow.contrib import rnn
    import math
    from layers_new import linear_surrogate_lstm
    import time
    import os
    import random

    ls_lstm_throughput_dict = {}
    cudnn_throughput_dict = {}
    for bs, seq_len in bs_seq_len_list:
        #First generate the LS-LSTM and work out the throughput
        tf.reset_default_graph()
        n_hidden = 234
        n_classes = 2
        n_steps = seq_len
        batch_size = bs
        n_input = 4
        n_layers = 2
        forget_gate_init = 1.0  # = 1/(n_in). We use uniform p(x)
        sn = 1.0 / math.sqrt(n_hidden)
        #Training Parameters
        learning_rate = 0.001
        training_iters = 5000000

        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])
        tf.get_variable_scope().reuse == True
        W1 = tf.get_variable('W1',
                             initializer=tf.random_normal(
                                 [n_hidden, n_classes]),
                             dtype='float')
        b1 = tf.get_variable('b1',
                             initializer=tf.zeros([n_classes]),
                             dtype='float')

        layer1 = linear_surrogate_lstm(x, n_hidden, name='ls-lstm')
        outputs = linear_surrogate_lstm(layer1, n_hidden, name='ls-lstm2')
        pred = tf.matmul(outputs[-1], W1) + b1

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        #Initialise the model and evaluate
        step = 0
        times = []
        x_in = np.random.random((n_steps, batch_size, n_input))
        y_in = np.random.random((batch_size, n_classes))
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:  #Do a few iters to warm up
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    if step > 3:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
                    step += 1
        ls_lstm_throughput_dict[(bs,
                                 n_steps)] = (bs * n_steps) / np.mean(times)

        #--------------------------------------------------------------------------------
        # Now we do the CUDNN
        tf.reset_default_graph()

        #Initialise variables
        ################################################################################
        #Generate the lstm hook to CUDA
        model = tf.contrib.cudnn_rnn.CudnnLSTM(n_layers, n_hidden, n_input)

        # tf Graph input
        x = tf.placeholder("float", [n_steps, batch_size, n_input])
        y = tf.placeholder("float", [batch_size, n_classes])

        #Define weights & rnn initial states
        weights = {
            'out':
            tf.Variable(tf.random_normal([n_hidden, n_classes]), dtype='float')
        }
        biases = {
            'out': tf.Variable(tf.random_normal([n_classes]), dtype='float')
        }
        #Initial state of the LSTM at each batch, we don't let this be trained.
        input_h = {
            'out':
            tf.Variable(tf.zeros([n_layers, batch_size, n_hidden]),
                        dtype='float',
                        trainable=False)
        }
        input_c = {
            'out':
            tf.Variable(tf.zeros([n_layers, batch_size, n_hidden]),
                        dtype='float',
                        trainable=False)
        }
        #Initialise all weights & biases for the cudnnlstm: set weights according to Glorot
        #There are eight weights and biases per layer in the LSTM. Described in
        #http://docs.nvidia.com/deeplearning/sdk/cudnn-user-guide/index.html#cudnnRNNMode_t
        #There are two biases which sum to give the biases in the canonical form of the LSTM
        #This seems redundant - I'm not sure why CUDA is implemented in this way.

        weight_list = []
        bias_list = []
        for n in range(4):
            weight_list.append(
                np.float32(
                    np.random.uniform(low=-sn,
                                      high=sn,
                                      size=[n_hidden, n_input])))

        for n in range(4, 8):
            weight_list.append(
                np.float32(
                    np.random.uniform(low=-sn,
                                      high=sn,
                                      size=[n_hidden, n_hidden])))
        if n_layers == 2:
            for n in range(4):
                weight_list.append(
                    np.float32(
                        np.random.uniform(low=-sn,
                                          high=sn,
                                          size=[n_hidden, n_hidden])))

            for n in range(4, 8):
                weight_list.append(
                    np.float32(
                        np.random.uniform(low=-sn,
                                          high=sn,
                                          size=[n_hidden, n_hidden])))

        for n in range(8):
            bias_list.append(np.float32(np.zeros([n_hidden])))
        if n_layers == 2:
            for n in range(8):
                bias_list.append(np.float32(np.zeros([n_hidden])))
            bias_list[13] = np.float32(forget_gate_init * np.ones([n_hidden]))

        bias_list[5] = np.float32(forget_gate_init * np.ones([n_hidden]))

        #Initialize the opaque parameter buffer used to handle the cudnnlstm params
        #If we try to pass the canonical_to_params tensor through the call graph,
        #we fail because the size must be known statically. The easiest way to get
        #around this (though hacky) is to get the values out by casting to an np array
        #and then initialising a tensor with those values.

        params_size_t = ((n_input * n_hidden * 4) + (n_hidden * n_hidden * 4) +
                         (n_hidden * 2 * 4))
        flat_params = model.canonical_to_params(weight_list, bias_list)
        flat_params_as_ndarray = tf.Session().run(flat_params)

        params = {
            'out':
            tf.get_variable('param_buffer',
                            initializer=tf.constant(flat_params_as_ndarray))
        }

        #Generate network
        ################################################################################
        outputs, states1, states2 = model(is_training=True,
                                          input_data=x,
                                          input_h=input_h['out'],
                                          input_c=input_c['out'],
                                          params=params['out'])

        # Linear activation, using rnn inner loop on last output
        pred = tf.matmul(outputs[-1], weights['out']) + biases['out']

        #Evaluate network, run adam and clip gradients
        ################################################################################
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
        optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
        raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
        gradients = raw_gradients
        optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
        init = tf.global_variables_initializer()

        step = 0
        times = []
        with tf.device("gpu:0"):
            with tf.Session() as sess:
                sess.run(init)
                while step < 10:
                    out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                    #Warm up with a few iters first
                    if step > 3:
                        start = time.time()
                        out = sess.run(pred, feed_dict={x: x_in, y: y_in})
                        finish = time.time()
                        times.append(finish - start)
                    step += 1
        cudnn_throughput_dict[(bs, n_steps)] = (bs * n_steps) / np.mean(times)
    return cudnn_throughput_dict, ls_lstm_throughput_dict
예제 #3
0
def ls_lstm(n_steps=1024,
            n_hidden=1024,
            n_input=128,
            batch_size=8,
            n_layers=1,
            n_converge=5):
    #Network Parameters
    tf.reset_default_graph()
    n_classes = 2
    sn = 1 / math.sqrt(n_hidden)  #Glorot initialisation, var(p(x))
    forget_gate_init = 5.0  # = 1/(n_in). We use uniform p(x)
    clip = 20  #We use gradient clipping to stop the gradient exploding initially
    #for the much larger networks

    #Training Parameters
    learning_rate = 0.0001
    training_iters = 5000000
    display_step = 10
    id_num = np.random.uniform(
        0, 50)  #To distinguish from other runs of identical models

    #Initialise variables
    ################################################################################
    #Generate the lstm hook to PLR

    # tf Graph input
    x = tf.placeholder("float", [n_steps, batch_size, n_input])
    y = tf.placeholder("float", [batch_size, n_classes])

    #Define weights & rnn initial states

    tf.get_variable_scope().reuse == True
    W1 = tf.get_variable('W1',
                         initializer=tf.random_normal([n_hidden, n_classes]),
                         dtype='float')
    b1 = tf.get_variable('b1',
                         initializer=tf.zeros([n_classes]),
                         dtype='float')

    #Initialise all weights & biases for the plrlstm: set weights according to Glorot
    #There are eight weights and 4 biases per layer in the LSTM. Described in
    #http://docs.nvidia.com/deeplearning/sdk/cudnn-user-guide/index.html#cudnnRNNMode_t
    #There are two biases which sum to give the biases in the canonical form of the LSTM

    #Generate network
    ################################################################################
    layer1 = linear_surrogate_lstm(x, n_hidden, name='ls-lstm')
    outputs = linear_surrogate_lstm(layer1, n_hidden, name='ls-lstm2')

    # Linear activation, using rnn inner loop last output
    pred = tf.matmul(outputs[-1], W1) + b1

    #Evaluate network, run adam and clip gradients
    ################################################################################
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
    raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
    gradients, _ = tf.clip_by_global_norm(raw_gradients, clip)
    optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    tf.summary.scalar('cost', cost)
    tf.summary.scalar('acc', accuracy)
    init = tf.global_variables_initializer()
    merged = tf.summary.merge_all()
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

    start = time.time()
    acc_list = [0] * n_converge
    if not os.path.exists('./LS_LSTM_' + str(n_steps) + '_steps_model_'):
        os.makedirs('./LS_LSTM_' + str(n_steps) + '_steps_model_')
    if not os.path.exists('./LS_LSTM_' + str(n_steps) + '_steps_log_'):
        os.makedirs('./LS_LSTM_' + str(n_steps) + '_steps_log_')

    with tf.device("gpu:0"):
        with tf.Session() as sess:
            sess.run(init)
            step = 1
            test_writer = tf.summary.FileWriter(
                './LS_LSTM_' + str(n_steps) + '_stepslog_', sess.graph)
            # Keep training until reach max iterations
            while step * batch_size < training_iters:
                if batch_size == 1:
                    batch_x, batch_y = gen_2b_data_1(n_steps - 1, n_input - 1)
                else:
                    batch_x, batch_y = gen_2b_data(n_steps - 1, n_input - 1,
                                                   batch_size)
                # Run optimization op (backprop)
                sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
                if step % display_step == 0:
                    # Calculate batch accuracy
                    acc = sess.run(accuracy,
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y
                                   })
                    # Calculate batch loss
                    loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
                    summary, _ = sess.run([merged, cost],
                                          feed_dict={
                                              x: batch_x,
                                              y: batch_y
                                          })
                    summary, _ = sess.run([merged, accuracy],
                                          feed_dict={
                                              x: batch_x,
                                              y: batch_y
                                          })
                    test_writer.add_summary(summary, step)
                    print("Iter " + str(step) + ", Minibatch Loss= " + \
                          "{:.6f}".format(loss) + ", Training Accuracy= " + \
                          "{:.5f}".format(acc))
                    if step % (display_step *
                               10) == 0:  #Save the model every so often
                        saver.save(sess,
                                   './LS_LSTM_' + str(n_steps) +
                                   '_steps_model_',
                                   global_step=step)
                    if acc_list == [1.0] * n_converge:
                        print "Converged after {} iterations and {} seconds".format(
                            step,
                            time.time() - start)
                        break
                    else:
                        acc_list.append(acc)
                        acc_list.pop(0)
                step += 1

            print("Optimization Finished!")
    return step, time.time() - start
예제 #4
0
# with tf.variable_scope('pre_fc'): #preact params. specify separately so we can
#     tf.get_variable_scope().reuse == True    #set forget bias
#     W = tf.get_variable('W',
#                         initializer=tf.random_uniform([n_input+n_hidden, 4*n_hidden],
#                                                       minval=-sn, maxval=sn), dtype='float')
#     init = tf.constant(forget_gate_init*np.ones((n_hidden)), dtype='float32')
#     f_bias = tf.get_variable('f_bias', initializer= init, dtype='float')
#     other_bias = tf.get_variable('other_bias',
#                                  initializer=tf.zeros([3*n_hidden]), dtype='float')
#     b = tf.get_variable('b', initializer=tf.concat([f_bias, other_bias],axis=0),
#                         dtype='float')

#Generate network
################################################################################
outputs = linear_surrogate_lstm(x, n_hidden, name='ls-lstm')

# Linear activation, using rnn inner loop last output
with tf.variable_scope(None, default_name='linear_layer'):
    pred = tf.matmul(outputs[-1], W1) + b1

#Evaluate network, run adam and clip gradients
################################################################################
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
gradients, _ = tf.clip_by_global_norm(raw_gradients, clip)
optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
예제 #5
0
tf.get_variable_scope().reuse == True
W1 = tf.get_variable('W1',
                     initializer=tf.random_normal([n_hidden, n_classes]),
                     dtype='float')
b1 = tf.get_variable('b1', initializer=tf.zeros([n_classes]), dtype='float')

#Initialise all weights & biases for the plrlstm: set weights according to Glorot
#There are eight weights and 4 biases per layer in the LSTM. Described in
#http://docs.nvidia.com/deeplearning/sdk/cudnn-user-guide/index.html#cudnnRNNMode_t
#There are two biases which sum to give the biases in the canonical form of the LSTM

#Generate network
################################################################################

layer1 = linear_surrogate_lstm(x, n_hidden, name='ls-lstm')
outputs = linear_surrogate_lstm(layer1, n_hidden, name='ls-lstm2')
pred = tf.matmul(outputs[-1], W1) + b1

#Evaluate network, run adam and clip gradients
################################################################################
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer_0 = tf.train.AdamOptimizer(learning_rate=learning_rate)
raw_gradients, variables = zip(*optimizer_0.compute_gradients(cost))
gradients, _ = tf.clip_by_global_norm(raw_gradients, clip)
optimizer = optimizer_0.apply_gradients(zip(gradients, variables))
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
tf.summary.scalar('cost', cost)
tf.summary.scalar('acc', accuracy)