def main(model, T, n_iter, n_batch, n_hidden, capacity, fft): # --- Set data params ---------------- n_input = 11 n_output = 10 n_sequence = 10 n_train = n_iter * n_batch n_test = n_batch n_steps = T + 11 n_classes = 10 # --- Create graph and compute gradients ---------------------- x = tf.placeholder("int32", [None, n_steps]) y = tf.placeholder("int64", [None, n_sequence]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # --- Input to hidden layer ---------------------- if model == "LSTM": cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = tf.nn.rnn_cell.GRUCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EUNN": cell = EUNNCell(n_hidden, capacity, fft, comp) if comp: hidden_out_comp, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, fft) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # --- Hidden Layer to Output ---------------------- V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32) hidden_out_list = tf.unstack(hidden_out, axis=1)[-n_sequence:] temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # --- evaluate process ---------------------- cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) correct_pred = tf.equal(tf.argmax(output_data, 2), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # --- Initialization ---------------------- optimizer = tf.train.RMSPropOptimizer(learning_rate=0.001, decay=0.9).minimize(cost) init = tf.global_variables_initializer() # --- Training Loop ---------------------- step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: sess.run(init) while step < n_iter: batch_x, batch_y = noise_data(T, n_batch, n_sequence) sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) acc, loss = sess.run([accuracy, cost], feed_dict={ x: batch_x, y: batch_y }) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) step += 1 print("Optimization Finished!") # --- test ---------------------- test_x, test_y = noise_data(T, n_test, n_sequence) test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) print("Test result: Loss= " + "{:.6f}".format(test_loss) + \ ", Accuracy= " + "{:.5f}".format(test_acc))
def main(model, T, n_epochs, n_batch, n_hidden, capacity, comp, FFT, learning_rate, decay): # --- Set data params ---------------- #Create Data max_len_data = 100000000 epoch_train, vocab_to_idx = file_data('train', n_batch, max_len_data, T, n_epochs, None) n_input = len(vocab_to_idx) epoch_val, _ = file_data('valid', n_batch, max_len_data, T, 10000, vocab_to_idx) epoch_test, _ = file_data('test', n_batch, max_len_data, T, 1, vocab_to_idx) n_output = n_input # --- Create graph and compute gradients ---------------------- x = tf.placeholder("int32", [None, T]) y = tf.placeholder("int64", [None, T]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # Input to hidden layer cell = None h = None #h_b = None if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) if h == None: h = cell.zero_state(n_batch, tf.float32) hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = GRUCell(n_hidden) if h == None: h = cell.zero_state(n_batch, tf.float32) hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RNN": cell = BasicRNNCell(n_hidden) if h == None: h = cell.zero_state(n_batch, tf.float32) hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EURNN": cell = EURNNCell(n_hidden, capacity, FFT, comp) if h == None: h = cell.zero_state(n_batch, tf.float32) if comp: hidden_out_comp, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT, comp) if h == None: h = cell.zero_state(n_batch, tf.float32) if comp: hidden_out_comp, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, states = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # Hidden Layer to Output V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape = [n_hidden, n_output], \ dtype=tf.float32, initializer=tf.random_uniform_initializer(-V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_output], \ dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # define evaluate process cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) correct_pred = tf.equal(tf.argmax(output_data, 2), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # --- Initialization ---------------------- optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay).minimize(cost) init = tf.global_variables_initializer() for i in tf.global_variables(): print(i.name) # --- save result ---------------------- filename = "./output/character/text8/T=" + str( T) + "/" + model + "_N=" + str( n_hidden ) # + "_lambda=" + str(learning_rate) + "_beta=" + str(decay) if model == "EURNN" or model == "GORU": print(model) if FFT: filename += "_FFT" else: filename = filename + "_L=" + str(capacity) filename = filename + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise f = open(filename, 'w') f.write("########\n\n") f.write("## \tModel: %s with N=%d" % (model, n_hidden)) if model == "EURNN" or model == "GORU": if FFT: f.write(" FFT") else: f.write(" L=%d" % (capacity)) f.write("\n\n") f.write("########\n\n") # --- baseline ----- # --- Training Loop --------------------------------------------------------------- # if saveTo == "my-model": # print("Autogenerating the save name") # saveTo = "nlp_"+str(model)+"_"+str(n_hidden)+"_"+str(capacity)+"_"+str(approx)+"_"+str(num_layers) # print("Save name is: " , saveTo) # savename="./output/nlp/"+str(saveTo) # if not os.path.exists(os.path.dirname(savename)): # try: # os.makedirs(os.path.dirname(savename)) # except OSError as exc: # Guard against race condition # if exc.errno != errno.EEXIST: # raise def do_validation(): j = 0 val_losses = [] for val in epoch_val: j += 1 if j >= 2: break print("Running validation...") val_state = None for stepb, (X_val, Y_val) in enumerate(val): val_batch_x = X_val val_batch_y = Y_val val_dict = {x: val_batch_x, y: val_batch_y} if val_state is not None: #This needs to be initialized from the original net creation. val_dict[h] = val_state if notstates: val_acc, val_loss = sess.run([accuracy, cost], feed_dict=val_dict) else: val_acc, val_loss, val_state = sess.run( [accuracy, cost, states], feed_dict=val_dict) val_losses.append(val_loss) print("Validations:", ) validation_losses.append(sum(val_losses) / len(val_losses)) print("Validation Loss= " + \ "{:.6f}".format(validation_losses[-1])) f.write("%d\t%f\n" % (t, validation_losses[-1])) f.flush() # saver = tf.train.Saver() step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: print("Session Created") # if loadFrom != "": # new_saver = tf.train.import_meta_graph(loadFrom+'.meta') # new_saver.restore(sess, tf.train.latest_checkpoint('./')) # print("Session loaded from: " , loadFrom) # else: # #summary_writer = tf.train.SummaryWriter('/tmp/logdir', sess.graph) # sess.run(init) steps = [] losses = [] accs = [] validation_losses = [] sess.run(init) training_state = None i = 0 t = 0 for epoch in epoch_train: print("Epoch: ", i) for step, (X, Y) in enumerate(epoch): batch_x = X batch_y = Y myfeed_dict = {x: batch_x, y: batch_y} if training_state is not None: myfeed_dict[h] = training_state # if training_state is not None: # # #This needs to be initialized from the original net creation. #myfeed_dict[h] = training_state # #print("State: " , training_state) #print("Comp : ", training_state[0]) #print("Sum: " , sum([i*i for i in training_state[0]])) #print("Feed dict: " , myfeed_dict) if notstates: _, acc, loss = sess.run([optimizer, accuracy, cost], feed_dict=myfeed_dict) else: empty, acc, loss, training_state = sess.run( [optimizer, accuracy, cost, states], feed_dict=myfeed_dict) #print("Sum: " , sum([i*i for i in training_state[0]])) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) steps.append(t) losses.append(loss) accs.append(acc) t += 1 if step % 5000 == 4999: do_validation() # saver.save(sess,savename) #Now I need to take an epoch and go through it. I will average the losses at the end # f2.write("%d\t%f\t%f\n"%(step, loss, acc)) # f.flush() # f2.flush() # mystates = sess.run(states, feed_dict=myfeed_dict) # print ("States",training_state) i += 1 print("Optimization Finished!") j = 0 test_losses = [] for test in epoch_test: j += 1 if j >= 2: break print("Running validation...") test_state = None for stepb, (X_test, Y_test) in enumerate(test): test_batch_x = X_test test_batch_y = Y_test test_dict = {x: test_batch_x, y: test_batch_y} # if test_state is not None: #This needs to be initialized from the original net creation. # test_dict[h] = test_state test_acc, test_loss = sess.run([accuracy, cost], feed_dict=test_dict) test_losses.append(test_loss) print("test:", ) test_losses.append(sum(test_losses) / len(test_losses)) print("test Loss= " + \ "{:.6f}".format(test_losses[-1])) f.write("Test result: %d\t%f\n" % (t, test_losses[-1]))
def main(model, T, n_iter, n_batch, n_hidden, capacity, complex, fft): # --- Set data params ---------------- n_input = 30 n_output = 10 n_test = 10000 n_steps = T n_classes = 21 # --- Create graph and compute gradients ---------------------- x = tf.placeholder("int32", [None, n_steps]) y = tf.placeholder("int64", [None, n_steps, n_output]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # --- Input to hidden layer ---------------------- if model == "LSTM": cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = tf.nn.rnn_cell.GRUCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EUNN": cell = EUNNCell(n_hidden, capacity, fft, complex) if complex: hidden_out_comp, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, fft) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "Orthogonal_LSTM": cell = Orthogonal_LSTM_Cell(n_hidden, capacity, fft) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # --- Hidden Layer to Output ---------------------- V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes * n_output], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes * n_output], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.reshape( tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias), [-1, n_steps, n_output, n_classes]) # --- evaluate process ---------------------- cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) correct_pred = tf.equal(tf.argmax(output_data, 3), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # --- Initialization ---------------------- optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(cost) init = tf.global_variables_initializer() # --- Training Loop ---------------------- # Create some arrays to store training data iterations = [] losses = [] step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: sess.run(init) while step < n_iter: batch_x, batch_y = paren_data(T, n_batch) sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) acc, loss = sess.run([accuracy, cost], feed_dict={ x: batch_x, y: batch_y }) print("Iter " + str(step) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc)) iterations.append(step) losses.append(loss) step += 1 print("Optimization Finished!") # --- test ---------------------- test_x, test_y = paren_data(T, n_test) test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) print("Test result: Loss= " + "{:.6f}".format(test_loss) + ", Accuracy= " + "{:.5f}".format(test_acc)) plot_log_loss(iterations, losses, model)
def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT, learning_rate, decay, ismatrix): learning_rate = float(learning_rate) decay = float(decay) # --- Set data params ---------------- n_input = 10 n_output = 9 n_sequence = 10 n_train = n_iter * n_batch n_test = n_batch n_steps = T + 20 n_classes = 9 # --- Create data -------------------- train_x, train_y = copying_data(T, n_train, n_sequence) test_x, test_y = copying_data(T, n_test, n_sequence) # --- Create graph and compute gradients ---------------------- x = tf.placeholder("int32", [None, n_steps]) y = tf.placeholder("int64", [None, n_steps]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # --- Input to hidden layer ---------------------- if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "LSTSM": cell = BasicLSTSMCell(n_hidden, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn( cell, input_data, initial_state=(LSTMStateTuple( random_variable([n_batch, n_hidden], 0.1), random_variable([n_batch, n_hidden], 0.1))), dtype=tf.float32) elif model == "LSTUM": cell = BasicLSTUMCell(n_hidden, size_batch=n_batch, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn( cell, input_data, initial_state=(LSTMStateTuple( random_variable([n_batch, n_hidden**2], 0.1), random_variable([n_batch, n_hidden], 0.1))), dtype=tf.float32) elif model == "LSTRM": if ismatrix: cell = BasicLSTRMCell(n_hidden, size_batch=n_batch, forget_bias=1, isMatrix=True) hidden_out, _ = tf.nn.dynamic_rnn( cell, input_data, initial_state=(LSTMStateTuple( random_variable([n_batch, n_hidden**2], 0.1), random_variable([n_batch, n_hidden], 0.1))), dtype=tf.float32) else: cell = BasicLSTRMCell(n_hidden, size_batch=n_batch, forget_bias=1, isMatrix=False) hidden_out, _ = tf.nn.dynamic_rnn( cell, input_data, initial_state=(LSTMStateTuple( random_variable([n_batch, n_hidden], 0.1), random_variable([n_batch, n_hidden], 0.1))), dtype=tf.float32) elif model == "GRU": cell = GRUCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RNN": cell = BasicRNNCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EURNN": cell = EURNNCell(n_hidden, capacity, FFT, comp) if comp: hidden_out_comp, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.complex64) hidden_out = tf.real(hidden_out_comp) else: hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # --- Hidden Layer to Output ---------------------- V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # --- evaluate process ---------------------- cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) correct_pred = tf.equal(tf.argmax(output_data, 2), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # --- Initialization ---------------------- optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay).minimize(cost) init = tf.global_variables_initializer() for i in tf.global_variables(): print(i.name) # --- save result ---------------------- filename = "./output/copying/T=" + str(T) + "/" + model + "_N=" + str( n_hidden) + "_lambda=" + str(learning_rate) + "_ismatrix=" + str( ismatrix) if model == "EURNN" or model == "GORU": print(model) if FFT: filename += "_FFT" else: filename = filename + "_L=" + str(capacity) filename = filename + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise f = open(filename, 'w') f.write("########\n\n") f.write("## \tModel: %s with N=%d" % (model, n_hidden)) if model == "EURNN" or model == "GORU": if FFT: f.write(" FFT") else: f.write(" L=%d" % (capacity)) f.write("\n\n") f.write("########\n\n") # --- Training Loop ---------------------- step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: sess.run(init) steps = [] losses = [] accs = [] while step < n_iter: batch_x = train_x[step * n_batch:(step + 1) * n_batch] batch_y = train_y[step * n_batch:(step + 1) * n_batch] sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y}) loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y}) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) steps.append(step) losses.append(loss) accs.append(acc) step += 1 f.write("%d\t%f\t%f\n" % (step, loss, acc)) print("Optimization Finished!") # --- test ---------------------- test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + \ ", Accuracy= " + "{:.5f}".format(test_acc))