def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT, learning_rate, norm, update_gate, activation, lambd, layer_norm, zoneout, visualization_experiment): learning_rate = float(learning_rate) # data params n_input = 10 n_output = 9 n_sequence = 10 n_train = n_iter * n_batch n_test = n_batch n_steps = T + 20 n_classes = 9 # create data train_x, train_y = copying_data(T, n_train, n_sequence) test_x, test_y = copying_data(T, n_test, n_sequence) # graph and gradients x = tf.placeholder("int32", [None, n_steps]) y = tf.placeholder("int64", [None, n_steps]) input_data = tf.one_hot(x, n_input, dtype=tf.float32) # input to hidden if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) elif model == "GRU": cell = GRUCell(n_hidden, kernel_initializer=tf.orthogonal_initializer()) elif model == "RUM": # activation if activation == "relu": act = tf.nn.relu elif activation == "sigmoid": act = tf.nn.sigmoid elif activation == "tanh": act = tf.nn.tanh elif activation == "softsign": act = tf.nn.softsign if visualization_experiment: # placeholder temp_target = tf.placeholder("float32", [n_hidden + 10, n_hidden]) temp_target_bias = tf.placeholder("float32", [n_hidden]) temp_embed = tf.placeholder("float32", [10, n_hidden]) cell = RUMCell( n_hidden, eta_=norm, update_gate=update_gate, lambda_=lambd, activation=act, use_layer_norm=layer_norm, use_zoneout=zoneout, visualization=visualization_experiment, temp_target=temp_target if visualization_experiment else None, temp_target_bias=temp_target_bias if visualization_experiment else None, temp_embed=temp_embed if visualization_experiment else None) elif model == "EUNN": if visualization_experiment: # placeholder temp_theta0 = tf.placeholder("float32", [n_hidden // 2]) temp_theta1 = tf.placeholder("float32", [n_hidden // 2 - 1]) cell = EUNNCell(n_hidden, capacity, FFT, comp, name="eunn") elif model == "GORU": if visualization_experiment: # placeholder temp_theta0 = tf.placeholder("float32", [n_hidden // 2]) temp_theta1 = tf.placeholder("float32", [n_hidden // 2 - 1]) cell = GORUCell(n_hidden, capacity, FFT, temp_theta0=temp_theta0, temp_theta1=temp_theta1) elif model == "RNN": cell = BasicRNNCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # hidden to output V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # evaluate process cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data, labels=y)) tf.summary.scalar('cost', cost) correct_pred = tf.equal(tf.argmax(output_data, 2), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('accuracy', accuracy) # initialization optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate).minimize(cost) init = tf.global_variables_initializer() # save filename = model + "_H" + str(n_hidden) + "_" + \ ("L" + str(lambd) + "_" if lambd else "") + \ ("E" + str(eta) + "_" if norm else "") + \ ("A" + activation + "_" if activation else "") + \ ("U_" if update_gate else "") + \ ("Z_" if zoneout and model == "RUM" else "") + \ ("ln_" if layer_norm and model == "RUM" else "") + \ (str(capacity) if model in ["EUNN", "GORU"] else "") + \ ("FFT_" if model in ["EUNN", "GORU"] and FFT else "") + \ ("VE_" if model in ["EUNN", "GORU", "RUM"] and visualization_experiment else "") + \ "B" + str(n_batch) save_path = os.path.join('../../train_log', 'copying', 'T' + str(T), filename) file_manager(save_path) # what follows is task specific filepath = os.path.join(save_path, "eval.txt") if not os.path.exists(os.path.dirname(filepath)): try: os.makedirs(os.path.dirname(filepath)) except OSError as exc: if exc.errno != errno.EEXIST: raise f = open(filepath, 'w') f.write("accuracies \n") log(kwargs, save_path) merged_summary = tf.summary.merge_all() saver = tf.train.Saver() parameters_profiler() # train saver = tf.train.Saver() step = 0 with tf.Session() as sess: sess.run(init) train_writer = tf.summary.FileWriter(save_path, sess.graph) steps = [] losses = [] accs = [] while step < n_iter: batch_x = train_x[step * n_batch:(step + 1) * n_batch] batch_y = train_y[step * n_batch:(step + 1) * n_batch] if visualization_experiment: """ initiative to write simpler code """ if model == "RUM": number_of_weights = (n_hidden + 10) * \ n_hidden + n_hidden + 10 * n_hidden elif model in ["GORU", "EUNN"]: # assuming that n_hidden is even. number_of_weights = n_hidden - 1 print(col("strating linear visualization", 'b')) num_points = 200 coord, weights = generate_points_for_visualization( number_of_weights, num_points) processed_placeholders = process_vis(weights, num_points, n_hidden=n_hidden, cell=model) if model == "RUM": feed_temp_target, feed_temp_target_bias, feed_temp_embed = processed_placeholders else: feed_temp_theta0, feed_temp_theta1 = processed_placeholders collect_losses = [] for i in range(num_points): if model == "RUM": loss = sess.run(cost, feed_dict={ x: batch_x, y: batch_y, temp_target: feed_temp_target[i], temp_target_bias: feed_temp_target_bias[i], temp_embed: feed_temp_embed[i] }) elif model in ["EUNN", "GORU"]: loss = sess.run(cost, feed_dict={ x: batch_x, y: batch_y, temp_theta0: feed_temp_theta0[i], temp_theta1: feed_temp_theta1[i] }) print(col("iter: " + str(i) + " loss: " + str(loss), 'y')) collect_losses.append(loss) np.save(os.path.join(save_path, "linear_height"), np.array(collect_losses)) np.save(os.path.join(save_path, "linear_coord"), np.array(coord)) print(col("done with linear visualization", 'b')) ##################### print(col("strating contour visualization", 'b')) num_points = 20 coord, weights = generate_points_for_visualization( number_of_weights, num_points, type_vis="contour") np.save(os.path.join(save_path, "contour_coord"), np.array(coord)) processed_placeholders = process_vis(weights, num_points**2, n_hidden=n_hidden, cell=model) if model == "RUM": feed_temp_target, feed_temp_target_bias, feed_temp_embed = processed_placeholders else: feed_temp_theta0, feed_temp_theta1 = processed_placeholders collect_contour = np.empty((num_points, num_points)) for i in range(num_points): for j in range(num_points): if model == "RUM": loss = sess.run( cost, feed_dict={ x: batch_x, y: batch_y, temp_target: feed_temp_target[i * num_points + j], temp_target_bias: feed_temp_target_bias[i * num_points + j], temp_embed: feed_temp_embed[i * num_points + j] }) elif model in ["GORU", "EUNN"]: loss = sess.run( cost, feed_dict={ x: batch_x, y: batch_y, temp_theta0: feed_temp_theta0[i * num_points + j], temp_theta1: feed_temp_theta1[i * num_points + j] }) collect_contour[i, j] = loss print( col( "iter: " + str(i) + "," + str(j) + " loss: " + str(loss), 'y')) np.save(os.path.join(save_path, "contour_height"), np.array(collect_contour)) print(col("exiting visualization experiment", 'r')) exit() summ, acc, loss = sess.run([merged_summary, accuracy, cost], feed_dict={ x: batch_x, y: batch_y }) train_writer.add_summary(summ, step) sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) print( col( "Iter " + str(step) + ", Minibatch Loss: " + "{:.6f}".format(loss) + ", Training Accuracy: " + "{:.5f}".format(acc), 'g')) steps.append(step) losses.append(loss) accs.append(acc) if step % 200 == 0: f.write(col("%d\t%f\t%f\n" % (step, loss, acc), 'y')) f.flush() if step % 1000 == 0: print(col("saving graph and metadata in " + save_path, "b")) saver.save(sess, os.path.join(save_path, "model")) step += 1 print(col("Optimization Finished!", 'b')) # test test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) f.write( col( "Test result: Loss= " + "{:.6f}".format(test_loss) + ", Accuracy= " + "{:.5f}".format(test_acc), 'g')) f.close()
def main(model, qid, data_path, level, attention, n_iter, n_batch, n_hidden, n_embed, capacity, comp, FFT, learning_rate, norm, update_gate, activation, lambd, layer_norm, zoneout, attn_rum): """ assembles the model, trains and then evaluates. """ # preprocessing learning_rate = float(learning_rate) tar = tarfile.open(data_path) name_str = [ 'single-supporting-fact', 'two-supporting-facts', 'three-supporting-facts', 'two-arg-relations', 'three-arg-relations', 'yes-no-questions', 'counting', 'lists-sets', 'simple-negation', 'indefinite-knowledge', 'basic-coreference', 'conjunction', 'compound-coreference', 'time-reasoning', 'basic-deduction', 'basic-induction', 'positional-reasoning', 'size-reasoning', 'path-finding', 'agents-motivations', ] challenge = 'tasks_1-20_v1-2/en-10k/qa' + \ str(qid) + '_' + name_str[qid - 1] + '_{}.txt' train = get_stories(level, tar.extractfile(challenge.format('train'))) test = get_stories(level, tar.extractfile(challenge.format('test'))) # gets vocabulary vocab = set() for story, q, answer in train + test: if level == "word": vocab |= set(story + q + [answer]) elif level == "sentence": vocab |= set([item for sublist in story for item in sublist] + q + [answer]) else: raise vocab = sorted(vocab) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) if level == "word" else None train_x, train_q, train_y, train_x_len, train_q_len = vectorize_stories( train, word_idx, story_maxlen, query_maxlen, attention, level) test_x, test_q, test_y, test_x_len, test_q_len = vectorize_stories( test, word_idx, story_maxlen, query_maxlen, attention, level) # notes: query_maxlen will be `None` if `level == sentence`; # moreover we added the `attention` and `level` arguments. # number of data points n_data = len(train_x) n_val = int(0.1 * n_data) # val data val_x = train_x[-n_val:] val_q = train_q[-n_val:] val_y = train_y[-n_val:] val_x_len = train_x_len[-n_val:] val_q_len = train_q_len[-n_val:] if level == "word" else None # train data train_x = train_x[:-n_val] train_q = train_q[:-n_val] train_y = train_y[:-n_val] train_q_len = train_q_len[:-n_val] if level == "word" else None train_x_len = train_x_len[:-n_val] n_train = len(train_x) # profiler printing print(col('level: ' + level, 'y')) print(col('attention: ' + str(attention), 'y')) print(col('qid: ' + str(qid), 'y')) print(col('vocab = {}'.format(vocab), 'y')) print(col('x.shape = {}'.format(np.array(train_x).shape), 'y')) print(col('xq.shape = {}'.format(np.array(train_q).shape), 'y')) print(col('y.shape = {}'.format(np.array(train_y).shape), 'y')) print( col( 'story_maxlen, query_maxlen = {}, {}'.format( story_maxlen, query_maxlen), 'y')) print(col("building model", "b")) # defines the rnn cell if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) elif model == "GRU": cell = GRUCell(n_hidden) elif model == "RUM": if activation == "relu": act = tf.nn.relu elif activation == "sigmoid": act = tf.nn.sigmoid elif activation == "tanh": act = tf.nn.tanh elif activation == "softsign": act = tf.nn.softsign cell = RUMCell(n_hidden, eta_=norm, update_gate=update_gate, lambda_=lambd, activation=act, use_layer_norm=layer_norm, use_zoneout=zoneout) elif model == "EUNN": cell = EUNNCell(n_hidden, capacity, FFT, comp, name="eunn") elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT) elif model == "RNN": cell = BasicRNNCell(n_hidden) cost, accuracy, input_story, question, answer_holder = nn_model( cell, level, attention, n_hidden, n_embed, vocab_size, story_maxlen, query_maxlen, attn_rum) # initialization tf.summary.scalar('cost', cost) if not (level == "word" and attention): tf.summary.scalar('accuracy', accuracy) optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) init = tf.global_variables_initializer() # save filename = ("attn" if attention else "") + \ model + "_H" + str(n_hidden) + "_" + \ ("L" + str(lambd) + "_" if lambd else "") + \ ("E" + str(norm) + "_" if norm else "") + \ ("A" + activation + "_" if activation else "") + \ ("U_" if update_gate and model == "RUM" else "") + \ ("Z_" if zoneout and model == "RUM" else "") + \ ("RA_" if attn_rum and model == "RUM" else "") + \ ("ln_" if layer_norm and model == "RUM" else "") + \ (str(capacity) if model in ["EUNN", "GORU"] else "") + \ ("FFT_" if model in ["EUNN", "GORU"] and FFT else "") + \ ("NE" + str(n_embed) + "_") + \ "B" + str(n_batch) save_dir = os.path.join('../../train_log', 'babi', level) save_path = os.path.join(save_dir, str(qid), filename) print(col("file managing: " + save_path, "b")) file_manager(save_path) # what follows is task specific filepath = os.path.join(save_path, "eval.txt") if not os.path.exists(os.path.dirname(filepath)): try: os.makedirs(os.path.dirname(filepath)) except OSError as exc: if exc.errno != errno.EEXIST: raise f = open(filepath, 'w') f.write("validation\n") log(kwargs, save_path) # training loop merged_summary = tf.summary.merge_all() saver = tf.train.Saver() parameters_profiler() # early stop ultimate_accuracy = -1.0 ultimate_steps = 0 # if 5 steps with no improvement, we should stop training step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: print(col("saving summary data in " + save_path, "b")) train_writer = tf.summary.FileWriter(save_path, sess.graph) sess.run(init) steps = [] losses = [] accs = [] # prepare validation/test dictinary # validation val_dict = {input_story: val_x, question: val_q, answer_holder: val_y} # test test_dict = { input_story: test_x, question: test_q, answer_holder: test_y } # the factor of 10 is tentative [experimental] while step < 10 * n_iter: a = int(step % (n_train / n_batch)) batch_x = train_x[a * n_batch:(a + 1) * n_batch] batch_q = train_q[a * n_batch:(a + 1) * n_batch] batch_y = train_y[a * n_batch:(a + 1) * n_batch] train_dict = { input_story: batch_x, question: batch_q, answer_holder: batch_y } summ, loss = sess.run([merged_summary, cost], feed_dict=train_dict) train_writer.add_summary(summ, step) sess.run(optimizer, feed_dict=train_dict) if not (level == "word" and attention): acc = sess.run(accuracy, feed_dict=train_dict) if step % 100 == 0: print( col( "Iter " + str(step) + ", Minibatch Loss= " + "{:.6f}".format(loss) + ", Training Accuracy= " + "{:.5f}".format(acc), 'g')) else: if step % 100 == 0: print( col( "Iter " + str(step) + ", Minibatch Loss= " + "{:.6f}".format(loss), 'g')) steps.append(step) losses.append(loss) if not (level == "word" and attention): accs.append(acc) step += 1 if step % 500 == 1: val_loss, val_acc = sess.run([cost, accuracy], feed_dict=val_dict) print( col( "Validation Loss= " + "{:.6f}".format(val_loss) + ", Validation Accuracy= " + "{:.5f}".format(val_acc), "g")) if val_acc > ultimate_accuracy: ultimate_accuracy = val_acc print(col("saving graph and metadata in " + save_path, "b")) saver.save(sess, os.path.join(save_path, "model")) ultimate_steps = 0 else: ultimate_steps += 1 if ultimate_steps == 10: print(col("Early stop!", 'r')) break print(col((ultimate_accuracy, ultimate_steps), 'r')) print(col("Optimization Finished!", 'b')) # test print(col("restoring from " + save_path + "/model", "b")) saver.restore(sess, save_path + "/model") print(col("restored the best model on the validation data", "b")) test_acc, test_loss = sess.run([accuracy, cost], feed_dict=test_dict) f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + ", Accuracy= " + "{:.5f}\n".format(test_acc)) print( col( "Test result: Loss= " + "{:.6f}".format(test_loss) + ", Accuracy= " + "{:.5f}".format(test_acc), "g")) f.close() # what follow is for the single pass global sp global g if sp: single_pass_path = os.path.join(save_dir, "summary_eval_" + filename + ".txt") if g == None: g = open(single_pass_path, 'w') if not os.path.exists(single_pass_path): try: os.makedirs(os.path.dirname(single_pass_path)) except OSError as exc: if exc.errno != errno.EEXIST: raise g.write( col( datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + "\n", 'r')) g.write( col("id " + str(qid) + ": " + "{:.5f}".format(test_acc) + "\n", "y")) g.flush() return test_acc # returns the test accuracy to calculate the average accuracy
def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT, learning_rate, decay, learning_rate_decay, norm, grid_name): learning_rate = float(learning_rate) decay = float(decay) # --- Set data params ---------------- n_input = 10 n_output = 9 n_sequence = 10 n_train = n_iter * n_batch n_test = n_batch n_steps = T + 20 n_classes = 9 # --- Create data -------------------- train_x, train_y = copying_data(T, n_train, n_sequence) test_x, test_y = copying_data(T, n_test, n_sequence) # --- Create graph and compute gradients ---------------------- with tf.name_scope('inputs'): x = tf.placeholder("int32", [None, n_steps], name='x_input') y = tf.placeholder("int64", [None, n_steps], name='y_input') input_data = tf.one_hot(x, n_input, dtype=tf.float32) # --- Input to hidden layer ---------------------- #with tf.name_scope('layer'): if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = GRUCell(n_hidden, kernel_initializer=tf.orthogonal_initializer()) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RUM": cell = RUMCell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "ARUM": cell = ARUMCell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EUNN": cell = EUNNCell(n_hidden, capacity, FFT, comp) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RNN": cell = BasicRNNCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) # --- Hidden Layer to Output ---------------------- V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) hidden_out_list = tf.unstack(hidden_out, axis=1) temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list]) output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias) # --- evaluate process ---------------------- with tf.name_scope('evaluate'): with tf.name_scope('cost'): cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=output_data, labels=y)) tf.summary.scalar('cost', cost) with tf.name_scope('correnct_pred'): correct_pred = tf.equal(tf.argmax(output_data, 2), y) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar('accuracy', accuracy) # --- Initialization ---------------------- optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=decay).minimize(cost) init = tf.global_variables_initializer() print("\n###") sumz = 0 for i in tf.global_variables(): print(i.name, i.shape, np.prod(np.array(i.get_shape().as_list()))) sumz += np.prod(np.array(i.get_shape().as_list())) print("# parameters: ", sumz) print("###\n") # --- save result ---------------------- filename = "./output/copying/" if grid_name != None: filename += grid_name + "/" filename += "T=" + str(T) + "/" research_filename = filename + "researchModels" + "/" + model + "_N=" + str( n_hidden) + "_lambda=" + str(learning_rate) + "_decay=" + str( decay) + "/" filename += model + "_N=" + str(n_hidden) + "_lambda=" + str( learning_rate) + "_decay=" + str(decay) if norm is not None: filename += "_norm=" + str(norm) filename = filename + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise if not os.path.exists(os.path.dirname(research_filename)): try: os.makedirs(os.path.dirname(research_filename)) except OSError as exc: if exc.errno != errno.EEXIST: raise if not os.path.exists( os.path.dirname(research_filename + "/modelCheckpoint/")): try: os.makedirs( os.path.dirname(research_filename + "/modelCheckpoint/")) except OSError as exc: if exc.errno != errno.EEXIST: raise f = open(filename, 'w') f.write("########\n\n") f.write("## \tModel: %s with N=%d" % (model, n_hidden)) f.write("\n\n") f.write("########\n\n") # --- Training Loop ---------------------- saver = tf.train.Saver() mx2 = 0 step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: merged = tf.summary.merge_all() writer = tf.summary.FileWriter("./logs/", sess.graph) sess.run(init) steps = [] losses = [] accs = [] while step < n_iter: batch_x = train_x[step * n_batch:(step + 1) * n_batch] batch_y = train_y[step * n_batch:(step + 1) * n_batch] sess.run(optimizer, feed_dict={x: batch_x, y: batch_y}) result = sess.run(merged, feed_dict={x: batch_x, y: batch_y}) writer.add_summary(result, step) result = sess.run(merged, feed_dict={x: batch_x, y: batch_y}) writer.add_summary(result, step) #with tf.name_scope('loss'): with tf.name_scope('loss'): with tf.name_scope('acc'): acc = sess.run(accuracy, feed_dict={ x: batch_x, y: batch_y }) with tf.name_scope('loss'): loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y}) tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() write = tf.summary.FileWriter("logs/", sess.graph) result = sess.run(merged, feed_dict={x: batch_x, y: batch_y}) writer.add_summary(result, step) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) steps.append(step) losses.append(loss) accs.append(acc) if step == 0: f.write("%d\t%f\t%f\n" % (step, loss, acc)) step += 1 if step % 200 == 199: f.write("%d\t%f\t%f\n" % (step, loss, acc)) if step % 10000 == 0: saver.save(sess, research_filename + "/modelCheckpoint/") if step % 1000 == 0: if model == "GRU": tmp = "gru" if model == "RUM": tmp = "rum" if model == "ARUM": tmp = "arum" if model == "GRU" or model == "RUM" or model == "ARUM": kernel = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/gates/kernel:0" ][0] bias = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/gates/bias:0" ][0] k, b = sess.run([kernel, bias]) np.save(research_filename + "/kernel_" + str(step), k) np.save(research_filename + "/bias_" + str(step), b) if model == "RUM" or model == "ARUM": kernel_emb = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/candidate/kernel:0" ][0] bias_emb = [ v for v in tf.global_variables() if v.name == "rnn/" + tmp + "_cell/candidate/bias:0" ][0] k_emb, b_emb = sess.run([kernel_emb, bias_emb]) np.save(research_filename + "/kernel_emb_" + str(step), k_emb) np.save(research_filename + "/bias_emb_" + str(step), b_emb) #result = sess.run(merged,feed_dict={x: batch_x, y: batch_y}) #writer.add_summary(result, step) print("Optimization Finished!") # --- test ---------------------- test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y}) test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y}) #tf.scalar_summary('test_loss',test_loss) #result = sess.run(merged,feed_dict={x: batch_x, y: batch_y}) #writer.add_summary(result, step) f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + \ ", Accuracy= " + "{:.5f}".format(test_acc))
def main(model, qid, n_iter, n_batch, n_hidden, n_embed, capacity, comp, FFT, learning_rate, norm, grid_name): learning_rate = float(learning_rate) path = './data/tasks_1-20_v1-2.tar.gz' tar = tarfile.open(path) name_str = [ 'single-supporting-fact', 'two-supporting-facts', 'three-supporting-facts', 'two-arg-relations', 'three-arg-relations', 'yes-no-questions', 'counting', 'lists-sets', 'simple-negation', 'indefinite-knowledge', 'basic-coreference', 'conjunction', 'compound-coreference', 'time-reasoning', 'basic-deduction', 'basic-induction', 'positional-reasoning', 'size-reasoning', 'path-finding', 'agents-motivations', ] challenge = 'tasks_1-20_v1-2/en-10k/qa' + str(qid) + '_' + name_str[ qid - 1] + '_{}.txt' train = get_stories(tar.extractfile(challenge.format('train'))) test = get_stories(tar.extractfile(challenge.format('test'))) vocab = set() for story, q, answer in train + test: vocab |= set(story + q + [answer]) vocab = sorted(vocab) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) train_x, train_q, train_y, train_x_len, train_q_len = vectorize_stories( train, word_idx, story_maxlen, query_maxlen) test_x, test_q, test_y, test_x_len, test_q_len = vectorize_stories( test, word_idx, story_maxlen, query_maxlen) n_data = len(train_x) n_val = int(0.1 * n_data) val_x = train_x[-n_val:] val_q = train_q[-n_val:] val_y = train_y[-n_val:] val_x_len = train_x_len[-n_val:] val_q_len = train_q_len[-n_val:] train_x = train_x[:-n_val] train_q = train_q[:-n_val] train_y = train_y[:-n_val] train_q_len = train_q_len[:-n_val] train_x_len = train_x_len[:-n_val] n_train = len(train_x) print('vocab = {}'.format(vocab)) print('x.shape = {}'.format(np.array(train_x).shape)) print('xq.shape = {}'.format(np.array(train_q).shape)) print('y.shape = {}'.format(np.array(train_y).shape)) print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen)) print('Build model...') # sentence = layers.Input(shape=(story_maxlen,), dtype='int32') sentence = tf.placeholder("int32", [None, story_maxlen]) n_output = n_hidden n_input = n_embed n_classes = vocab_size embed_init_val = np.sqrt(6.) / np.sqrt(vocab_size) embed = tf.get_variable('Embedding', [vocab_size, n_embed], initializer=tf.random_normal_initializer( -embed_init_val, embed_init_val), dtype=tf.float32) encoded_sentence = tf.nn.embedding_lookup(embed, sentence) question = tf.placeholder("int32", [None, query_maxlen]) encoded_question = tf.nn.embedding_lookup(embed, question) merged = tf.concat([encoded_sentence, encoded_question], axis=1) print(encoded_sentence, encoded_question, merged) input_data = merged if model == "LSTM": cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GRU": cell = GRUCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RUM": cell = RUMCell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "ARUM": cell = ARUMCell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "ARUM2": cell = ARUM2Cell(n_hidden, T_norm=norm) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "RNN": cell = BasicRNNCell(n_hidden) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "EUNN": cell = EUNNCell(n_hidden, capacity, FFT, comp) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) elif model == "GORU": cell = GORUCell(n_hidden, capacity, FFT) hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32) merged, _ = tf.nn.dynamic_rnn(cell, merged, dtype=tf.float32) # --- Hidden Layer to Output ---------------------- V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input) V_weights = tf.get_variable("V_weights", shape=[n_hidden, n_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer( -V_init_val, V_init_val)) V_bias = tf.get_variable("V_bias", shape=[n_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.01)) merged_list = tf.unstack(merged, axis=1)[-1] temp_out = tf.matmul(merged_list, V_weights) final_out = tf.nn.bias_add(temp_out, V_bias) answer_holder = tf.placeholder("int64", [None]) # --- evaluate process ---------------------- cost = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=final_out, labels=answer_holder)) correct_pred = tf.equal(tf.argmax(final_out, 1), answer_holder) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # --- Initialization ---------------------- optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) init = tf.global_variables_initializer() # --- save result ---------------------- folder = "./output/babi/" + str( qid ) + '/' + model # + "_lambda=" + str(learning_rate) + "_beta=" + str(decay) filename = folder + "_h=" + str(n_hidden) filename = filename + "_lr=" + str(learning_rate) filename = filename + "_norm=" + str(norm) filename = filename + ".txt" if not os.path.exists(os.path.dirname(filename)): try: os.makedirs(os.path.dirname(filename)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise if not os.path.exists(os.path.dirname(folder + "/modelCheckpoint/")): try: print(folder + "/modelCheckpoint/") os.makedirs(os.path.dirname(folder + "/modelCheckpoint/")) except OSError as exc: if exc.errno != errno.EEXIST: raise f = open(filename, 'w') f.write("########\n\n") f.write("## \tModel: %s with N=%d" % (model, n_hidden)) f.write("########\n\n") # --- Training Loop ---------------------- saver = tf.train.Saver() step = 0 with tf.Session(config=tf.ConfigProto(log_device_placement=False, allow_soft_placement=False)) as sess: sess.run(init) steps = [] losses = [] accs = [] while step < n_iter: a = int(step % (n_train / n_batch)) batch_x = train_x[a * n_batch:(a + 1) * n_batch] batch_q = train_q[a * n_batch:(a + 1) * n_batch] batch_y = train_y[a * n_batch:(a + 1) * n_batch] train_dict = { sentence: batch_x, question: batch_q, answer_holder: batch_y } sess.run(optimizer, feed_dict=train_dict) acc = sess.run(accuracy, feed_dict=train_dict) loss = sess.run(cost, feed_dict=train_dict) print("Iter " + str(step) + ", Minibatch Loss= " + \ "{:.6f}".format(loss) + ", Training Accuracy= " + \ "{:.5f}".format(acc)) steps.append(step) losses.append(loss) accs.append(acc) step += 1 if step % 200 == 1: saver.save(sess, folder + "/modelCheckpoint/step=" + str(step)) val_dict = { sentence: val_x, question: val_q, answer_holder: val_y } val_acc = sess.run(accuracy, feed_dict=val_dict) val_loss = sess.run(cost, feed_dict=val_dict) print("Validation Loss= " + \ "{:.6f}".format(val_loss) + ", Validation Accuracy= " + \ "{:.5f}".format(val_acc)) f.write("%d\t%f\t%f\n" % (step, val_loss, val_acc)) print("Optimization Finished!") # --- test ---------------------- test_dict = {sentence: test_x, question: test_q, answer_holder: test_y} test_acc = sess.run(accuracy, feed_dict=test_dict) test_loss = sess.run(cost, feed_dict=test_dict) f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + \ ", Accuracy= " + "{:.5f}".format(test_acc)) print("Test result: Loss= " + "{:.6f}".format(test_loss) + \ ", Accuracy= " + "{:.5f}".format(test_acc))