def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT,
         learning_rate, norm, update_gate, activation, lambd, layer_norm,
         zoneout, visualization_experiment):

    learning_rate = float(learning_rate)

    # data params
    n_input = 10
    n_output = 9
    n_sequence = 10
    n_train = n_iter * n_batch
    n_test = n_batch

    n_steps = T + 20
    n_classes = 9

    # create data
    train_x, train_y = copying_data(T, n_train, n_sequence)
    test_x, test_y = copying_data(T, n_test, n_sequence)

    # graph and gradients
    x = tf.placeholder("int32", [None, n_steps])
    y = tf.placeholder("int64", [None, n_steps])

    input_data = tf.one_hot(x, n_input, dtype=tf.float32)

    # input to hidden
    if model == "LSTM":
        cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1)
    elif model == "GRU":
        cell = GRUCell(n_hidden,
                       kernel_initializer=tf.orthogonal_initializer())
    elif model == "RUM":
        # activation
        if activation == "relu":
            act = tf.nn.relu
        elif activation == "sigmoid":
            act = tf.nn.sigmoid
        elif activation == "tanh":
            act = tf.nn.tanh
        elif activation == "softsign":
            act = tf.nn.softsign
        if visualization_experiment:
            # placeholder
            temp_target = tf.placeholder("float32", [n_hidden + 10, n_hidden])
            temp_target_bias = tf.placeholder("float32", [n_hidden])
            temp_embed = tf.placeholder("float32", [10, n_hidden])

        cell = RUMCell(
            n_hidden,
            eta_=norm,
            update_gate=update_gate,
            lambda_=lambd,
            activation=act,
            use_layer_norm=layer_norm,
            use_zoneout=zoneout,
            visualization=visualization_experiment,
            temp_target=temp_target if visualization_experiment else None,
            temp_target_bias=temp_target_bias
            if visualization_experiment else None,
            temp_embed=temp_embed if visualization_experiment else None)
    elif model == "EUNN":
        if visualization_experiment:
            # placeholder
            temp_theta0 = tf.placeholder("float32", [n_hidden // 2])
            temp_theta1 = tf.placeholder("float32", [n_hidden // 2 - 1])
        cell = EUNNCell(n_hidden, capacity, FFT, comp, name="eunn")
    elif model == "GORU":
        if visualization_experiment:
            # placeholder
            temp_theta0 = tf.placeholder("float32", [n_hidden // 2])
            temp_theta1 = tf.placeholder("float32", [n_hidden // 2 - 1])
        cell = GORUCell(n_hidden,
                        capacity,
                        FFT,
                        temp_theta0=temp_theta0,
                        temp_theta1=temp_theta1)
    elif model == "RNN":
        cell = BasicRNNCell(n_hidden)

    hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)

    # hidden to output
    V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input)
    V_weights = tf.get_variable("V_weights",
                                shape=[n_hidden, n_classes],
                                dtype=tf.float32,
                                initializer=tf.random_uniform_initializer(
                                    -V_init_val, V_init_val))
    V_bias = tf.get_variable("V_bias",
                             shape=[n_classes],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(0.01))

    hidden_out_list = tf.unstack(hidden_out, axis=1)
    temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list])
    output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias)

    # evaluate process
    cost = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output_data,
                                                       labels=y))
    tf.summary.scalar('cost', cost)
    correct_pred = tf.equal(tf.argmax(output_data, 2), y)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    tf.summary.scalar('accuracy', accuracy)

    # initialization
    optimizer = tf.train.RMSPropOptimizer(
        learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()

    # save
    filename = model + "_H" + str(n_hidden) + "_" + \
        ("L" + str(lambd) + "_" if lambd else "") + \
        ("E" + str(eta) + "_" if norm else "") + \
        ("A" + activation + "_" if activation else "") + \
        ("U_" if update_gate else "") + \
        ("Z_" if zoneout and model == "RUM" else "") + \
        ("ln_" if layer_norm and model == "RUM" else "") + \
        (str(capacity) if model in ["EUNN", "GORU"] else "") + \
        ("FFT_" if model in ["EUNN", "GORU"] and FFT else "") + \
        ("VE_" if model in ["EUNN", "GORU", "RUM"] and visualization_experiment else "") + \
        "B" + str(n_batch)
    save_path = os.path.join('../../train_log', 'copying', 'T' + str(T),
                             filename)

    file_manager(save_path)

    # what follows is task specific
    filepath = os.path.join(save_path, "eval.txt")
    if not os.path.exists(os.path.dirname(filepath)):
        try:
            os.makedirs(os.path.dirname(filepath))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    f = open(filepath, 'w')
    f.write("accuracies \n")

    log(kwargs, save_path)

    merged_summary = tf.summary.merge_all()
    saver = tf.train.Saver()

    parameters_profiler()

    # train
    saver = tf.train.Saver()
    step = 0
    with tf.Session() as sess:
        sess.run(init)
        train_writer = tf.summary.FileWriter(save_path, sess.graph)

        steps = []
        losses = []
        accs = []

        while step < n_iter:
            batch_x = train_x[step * n_batch:(step + 1) * n_batch]
            batch_y = train_y[step * n_batch:(step + 1) * n_batch]
            if visualization_experiment:
                """ initiative to write simpler code """

                if model == "RUM":
                    number_of_weights = (n_hidden + 10) * \
                        n_hidden + n_hidden + 10 * n_hidden
                elif model in ["GORU", "EUNN"]:
                    # assuming that n_hidden is even.
                    number_of_weights = n_hidden - 1

                print(col("strating linear visualization", 'b'))
                num_points = 200

                coord, weights = generate_points_for_visualization(
                    number_of_weights, num_points)

                processed_placeholders = process_vis(weights,
                                                     num_points,
                                                     n_hidden=n_hidden,
                                                     cell=model)
                if model == "RUM":
                    feed_temp_target, feed_temp_target_bias, feed_temp_embed = processed_placeholders

                else:
                    feed_temp_theta0, feed_temp_theta1 = processed_placeholders

                collect_losses = []
                for i in range(num_points):
                    if model == "RUM":
                        loss = sess.run(cost,
                                        feed_dict={
                                            x:
                                            batch_x,
                                            y:
                                            batch_y,
                                            temp_target:
                                            feed_temp_target[i],
                                            temp_target_bias:
                                            feed_temp_target_bias[i],
                                            temp_embed:
                                            feed_temp_embed[i]
                                        })
                    elif model in ["EUNN", "GORU"]:
                        loss = sess.run(cost,
                                        feed_dict={
                                            x: batch_x,
                                            y: batch_y,
                                            temp_theta0: feed_temp_theta0[i],
                                            temp_theta1: feed_temp_theta1[i]
                                        })

                    print(col("iter: " + str(i) + " loss: " + str(loss), 'y'))
                    collect_losses.append(loss)
                np.save(os.path.join(save_path, "linear_height"),
                        np.array(collect_losses))
                np.save(os.path.join(save_path, "linear_coord"),
                        np.array(coord))
                print(col("done with linear visualization", 'b'))

                #####################

                print(col("strating contour visualization", 'b'))
                num_points = 20
                coord, weights = generate_points_for_visualization(
                    number_of_weights, num_points, type_vis="contour")
                np.save(os.path.join(save_path, "contour_coord"),
                        np.array(coord))
                processed_placeholders = process_vis(weights,
                                                     num_points**2,
                                                     n_hidden=n_hidden,
                                                     cell=model)
                if model == "RUM":
                    feed_temp_target, feed_temp_target_bias, feed_temp_embed = processed_placeholders
                else:
                    feed_temp_theta0, feed_temp_theta1 = processed_placeholders

                collect_contour = np.empty((num_points, num_points))
                for i in range(num_points):
                    for j in range(num_points):
                        if model == "RUM":
                            loss = sess.run(
                                cost,
                                feed_dict={
                                    x:
                                    batch_x,
                                    y:
                                    batch_y,
                                    temp_target:
                                    feed_temp_target[i * num_points + j],
                                    temp_target_bias:
                                    feed_temp_target_bias[i * num_points + j],
                                    temp_embed:
                                    feed_temp_embed[i * num_points + j]
                                })
                        elif model in ["GORU", "EUNN"]:
                            loss = sess.run(
                                cost,
                                feed_dict={
                                    x:
                                    batch_x,
                                    y:
                                    batch_y,
                                    temp_theta0:
                                    feed_temp_theta0[i * num_points + j],
                                    temp_theta1:
                                    feed_temp_theta1[i * num_points + j]
                                })
                        collect_contour[i, j] = loss
                        print(
                            col(
                                "iter: " + str(i) + "," + str(j) + " loss: " +
                                str(loss), 'y'))
                np.save(os.path.join(save_path, "contour_height"),
                        np.array(collect_contour))

                print(col("exiting visualization experiment", 'r'))
                exit()

            summ, acc, loss = sess.run([merged_summary, accuracy, cost],
                                       feed_dict={
                                           x: batch_x,
                                           y: batch_y
                                       })
            train_writer.add_summary(summ, step)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
            print(
                col(
                    "Iter " + str(step) + ", Minibatch Loss: " +
                    "{:.6f}".format(loss) + ", Training Accuracy: " +
                    "{:.5f}".format(acc), 'g'))
            steps.append(step)
            losses.append(loss)
            accs.append(acc)
            if step % 200 == 0:
                f.write(col("%d\t%f\t%f\n" % (step, loss, acc), 'y'))
                f.flush()

            if step % 1000 == 0:
                print(col("saving graph and metadata in " + save_path, "b"))
                saver.save(sess, os.path.join(save_path, "model"))

            step += 1

        print(col("Optimization Finished!", 'b'))

        # test
        test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y})
        test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y})
        f.write(
            col(
                "Test result: Loss= " + "{:.6f}".format(test_loss) +
                ", Accuracy= " + "{:.5f}".format(test_acc), 'g'))

        f.close()
예제 #2
0
def main(model, qid, data_path, level, attention, n_iter, n_batch, n_hidden,
         n_embed, capacity, comp, FFT, learning_rate, norm, update_gate,
         activation, lambd, layer_norm, zoneout, attn_rum):
    """ assembles the model, trains and then evaluates. """

    # preprocessing
    learning_rate = float(learning_rate)
    tar = tarfile.open(data_path)
    name_str = [
        'single-supporting-fact',
        'two-supporting-facts',
        'three-supporting-facts',
        'two-arg-relations',
        'three-arg-relations',
        'yes-no-questions',
        'counting',
        'lists-sets',
        'simple-negation',
        'indefinite-knowledge',
        'basic-coreference',
        'conjunction',
        'compound-coreference',
        'time-reasoning',
        'basic-deduction',
        'basic-induction',
        'positional-reasoning',
        'size-reasoning',
        'path-finding',
        'agents-motivations',
    ]
    challenge = 'tasks_1-20_v1-2/en-10k/qa' + \
        str(qid) + '_' + name_str[qid - 1] + '_{}.txt'
    train = get_stories(level, tar.extractfile(challenge.format('train')))
    test = get_stories(level, tar.extractfile(challenge.format('test')))

    # gets vocabulary
    vocab = set()
    for story, q, answer in train + test:
        if level == "word":
            vocab |= set(story + q + [answer])
        elif level == "sentence":
            vocab |= set([item for sublist in story
                          for item in sublist] + q + [answer])
        else:
            raise
    vocab = sorted(vocab)

    # Reserve 0 for masking via pad_sequences
    vocab_size = len(vocab) + 1
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))

    story_maxlen = max(map(len, (x for x, _, _ in train + test)))
    query_maxlen = max(map(len, (x for _, x, _ in train +
                                 test))) if level == "word" else None

    train_x, train_q, train_y, train_x_len, train_q_len = vectorize_stories(
        train, word_idx, story_maxlen, query_maxlen, attention, level)
    test_x, test_q, test_y, test_x_len, test_q_len = vectorize_stories(
        test, word_idx, story_maxlen, query_maxlen, attention, level)
    # notes: query_maxlen will be `None` if `level == sentence`;
    # moreover we added the `attention` and `level` arguments.

    # number of data points
    n_data = len(train_x)
    n_val = int(0.1 * n_data)
    # val data
    val_x = train_x[-n_val:]
    val_q = train_q[-n_val:]
    val_y = train_y[-n_val:]
    val_x_len = train_x_len[-n_val:]
    val_q_len = train_q_len[-n_val:] if level == "word" else None
    # train data
    train_x = train_x[:-n_val]
    train_q = train_q[:-n_val]
    train_y = train_y[:-n_val]
    train_q_len = train_q_len[:-n_val] if level == "word" else None
    train_x_len = train_x_len[:-n_val]
    n_train = len(train_x)

    # profiler printing
    print(col('level: ' + level, 'y'))
    print(col('attention: ' + str(attention), 'y'))
    print(col('qid: ' + str(qid), 'y'))
    print(col('vocab = {}'.format(vocab), 'y'))
    print(col('x.shape = {}'.format(np.array(train_x).shape), 'y'))
    print(col('xq.shape = {}'.format(np.array(train_q).shape), 'y'))
    print(col('y.shape = {}'.format(np.array(train_y).shape), 'y'))
    print(
        col(
            'story_maxlen, query_maxlen = {}, {}'.format(
                story_maxlen, query_maxlen), 'y'))
    print(col("building model", "b"))

    # defines the rnn cell
    if model == "LSTM":
        cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1)
    elif model == "GRU":
        cell = GRUCell(n_hidden)
    elif model == "RUM":
        if activation == "relu":
            act = tf.nn.relu
        elif activation == "sigmoid":
            act = tf.nn.sigmoid
        elif activation == "tanh":
            act = tf.nn.tanh
        elif activation == "softsign":
            act = tf.nn.softsign
        cell = RUMCell(n_hidden,
                       eta_=norm,
                       update_gate=update_gate,
                       lambda_=lambd,
                       activation=act,
                       use_layer_norm=layer_norm,
                       use_zoneout=zoneout)
    elif model == "EUNN":
        cell = EUNNCell(n_hidden, capacity, FFT, comp, name="eunn")
    elif model == "GORU":
        cell = GORUCell(n_hidden, capacity, FFT)
    elif model == "RNN":
        cell = BasicRNNCell(n_hidden)

    cost, accuracy, input_story, question, answer_holder = nn_model(
        cell, level, attention, n_hidden, n_embed, vocab_size, story_maxlen,
        query_maxlen, attn_rum)

    # initialization
    tf.summary.scalar('cost', cost)
    if not (level == "word" and attention):
        tf.summary.scalar('accuracy', accuracy)
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()

    # save
    filename = ("attn" if attention else "") + \
        model + "_H" + str(n_hidden) + "_" + \
        ("L" + str(lambd) + "_" if lambd else "") + \
        ("E" + str(norm) + "_" if norm else "") + \
        ("A" + activation + "_" if activation else "") + \
        ("U_" if update_gate and model == "RUM" else "") + \
        ("Z_" if zoneout and model == "RUM" else "") + \
        ("RA_" if attn_rum and model == "RUM" else "") + \
        ("ln_" if layer_norm and model == "RUM" else "") + \
        (str(capacity) if model in ["EUNN", "GORU"] else "") + \
        ("FFT_" if model in ["EUNN", "GORU"] and FFT else "") + \
        ("NE" + str(n_embed) + "_") + \
        "B" + str(n_batch)
    save_dir = os.path.join('../../train_log', 'babi', level)
    save_path = os.path.join(save_dir, str(qid), filename)

    print(col("file managing: " + save_path, "b"))
    file_manager(save_path)

    # what follows is task specific
    filepath = os.path.join(save_path, "eval.txt")
    if not os.path.exists(os.path.dirname(filepath)):
        try:
            os.makedirs(os.path.dirname(filepath))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    f = open(filepath, 'w')
    f.write("validation\n")

    log(kwargs, save_path)

    # training loop
    merged_summary = tf.summary.merge_all()
    saver = tf.train.Saver()
    parameters_profiler()

    # early stop
    ultimate_accuracy = -1.0
    ultimate_steps = 0  # if 5 steps with no improvement, we should stop training

    step = 0
    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=False)) as sess:

        print(col("saving summary data in " + save_path, "b"))
        train_writer = tf.summary.FileWriter(save_path, sess.graph)
        sess.run(init)

        steps = []
        losses = []
        accs = []

        # prepare validation/test dictinary
        # validation
        val_dict = {input_story: val_x, question: val_q, answer_holder: val_y}
        # test
        test_dict = {
            input_story: test_x,
            question: test_q,
            answer_holder: test_y
        }

        # the factor of 10 is tentative [experimental]
        while step < 10 * n_iter:
            a = int(step % (n_train / n_batch))
            batch_x = train_x[a * n_batch:(a + 1) * n_batch]
            batch_q = train_q[a * n_batch:(a + 1) * n_batch]
            batch_y = train_y[a * n_batch:(a + 1) * n_batch]

            train_dict = {
                input_story: batch_x,
                question: batch_q,
                answer_holder: batch_y
            }
            summ, loss = sess.run([merged_summary, cost], feed_dict=train_dict)
            train_writer.add_summary(summ, step)
            sess.run(optimizer, feed_dict=train_dict)

            if not (level == "word" and attention):
                acc = sess.run(accuracy, feed_dict=train_dict)
                if step % 100 == 0:
                    print(
                        col(
                            "Iter " + str(step) + ", Minibatch Loss= " +
                            "{:.6f}".format(loss) + ", Training Accuracy= " +
                            "{:.5f}".format(acc), 'g'))
            else:
                if step % 100 == 0:
                    print(
                        col(
                            "Iter " + str(step) + ", Minibatch Loss= " +
                            "{:.6f}".format(loss), 'g'))
            steps.append(step)
            losses.append(loss)
            if not (level == "word" and attention):
                accs.append(acc)
            step += 1

            if step % 500 == 1:
                val_loss, val_acc = sess.run([cost, accuracy],
                                             feed_dict=val_dict)
                print(
                    col(
                        "Validation Loss= " + "{:.6f}".format(val_loss) +
                        ", Validation Accuracy= " + "{:.5f}".format(val_acc),
                        "g"))
                if val_acc > ultimate_accuracy:
                    ultimate_accuracy = val_acc
                    print(col("saving graph and metadata in " + save_path,
                              "b"))
                    saver.save(sess, os.path.join(save_path, "model"))
                    ultimate_steps = 0
                else:
                    ultimate_steps += 1
                if ultimate_steps == 10:
                    print(col("Early stop!", 'r'))
                    break
                print(col((ultimate_accuracy, ultimate_steps), 'r'))

        print(col("Optimization Finished!", 'b'))

        # test
        print(col("restoring from " + save_path + "/model", "b"))
        saver.restore(sess, save_path + "/model")
        print(col("restored the best model on the validation data", "b"))
        test_acc, test_loss = sess.run([accuracy, cost], feed_dict=test_dict)
        f.write("Test result: Loss= " + "{:.6f}".format(test_loss) +
                ", Accuracy= " + "{:.5f}\n".format(test_acc))
        print(
            col(
                "Test result: Loss= " + "{:.6f}".format(test_loss) +
                ", Accuracy= " + "{:.5f}".format(test_acc), "g"))
        f.close()

    # what follow is for the single pass
    global sp
    global g
    if sp:
        single_pass_path = os.path.join(save_dir,
                                        "summary_eval_" + filename + ".txt")
        if g == None:
            g = open(single_pass_path, 'w')
            if not os.path.exists(single_pass_path):
                try:
                    os.makedirs(os.path.dirname(single_pass_path))
                except OSError as exc:
                    if exc.errno != errno.EEXIST:
                        raise
            g.write(
                col(
                    datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +
                    "\n", 'r'))
        g.write(
            col("id " + str(qid) + ": " + "{:.5f}".format(test_acc) + "\n",
                "y"))
        g.flush()
    return test_acc  # returns the test accuracy to calculate the average accuracy
예제 #3
0
def main(model, T, n_iter, n_batch, n_hidden, capacity, comp, FFT,
         learning_rate, decay, learning_rate_decay, norm, grid_name):
    learning_rate = float(learning_rate)
    decay = float(decay)

    # --- Set data params ----------------
    n_input = 10
    n_output = 9
    n_sequence = 10
    n_train = n_iter * n_batch
    n_test = n_batch

    n_steps = T + 20
    n_classes = 9

    # --- Create data --------------------
    train_x, train_y = copying_data(T, n_train, n_sequence)
    test_x, test_y = copying_data(T, n_test, n_sequence)

    # --- Create graph and compute gradients ----------------------
    with tf.name_scope('inputs'):
        x = tf.placeholder("int32", [None, n_steps], name='x_input')
        y = tf.placeholder("int64", [None, n_steps], name='y_input')

    input_data = tf.one_hot(x, n_input, dtype=tf.float32)

    # --- Input to hidden layer ----------------------
    #with tf.name_scope('layer'):

    if model == "LSTM":
        cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "GRU":
        cell = GRUCell(n_hidden,
                       kernel_initializer=tf.orthogonal_initializer())
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "RUM":
        cell = RUMCell(n_hidden, T_norm=norm)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "ARUM":
        cell = ARUMCell(n_hidden, T_norm=norm)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "EUNN":
        cell = EUNNCell(n_hidden, capacity, FFT, comp)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "GORU":
        cell = GORUCell(n_hidden, capacity, FFT)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "RNN":
        cell = BasicRNNCell(n_hidden)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)

    # --- Hidden Layer to Output ----------------------

    V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input)

    V_weights = tf.get_variable("V_weights",
                                shape=[n_hidden, n_classes],
                                dtype=tf.float32,
                                initializer=tf.random_uniform_initializer(
                                    -V_init_val, V_init_val))

    V_bias = tf.get_variable("V_bias",
                             shape=[n_classes],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(0.01))

    hidden_out_list = tf.unstack(hidden_out, axis=1)
    temp_out = tf.stack([tf.matmul(i, V_weights) for i in hidden_out_list])
    output_data = tf.nn.bias_add(tf.transpose(temp_out, [1, 0, 2]), V_bias)

    # --- evaluate process ----------------------
    with tf.name_scope('evaluate'):
        with tf.name_scope('cost'):
            cost = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=output_data, labels=y))
            tf.summary.scalar('cost', cost)
        with tf.name_scope('correnct_pred'):
            correct_pred = tf.equal(tf.argmax(output_data, 2), y)
        with tf.name_scope('accuracy'):
            accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
            tf.summary.scalar('accuracy', accuracy)

    # --- Initialization ----------------------
    optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
                                          decay=decay).minimize(cost)
    init = tf.global_variables_initializer()

    print("\n###")
    sumz = 0
    for i in tf.global_variables():
        print(i.name, i.shape, np.prod(np.array(i.get_shape().as_list())))
        sumz += np.prod(np.array(i.get_shape().as_list()))
    print("# parameters: ", sumz)
    print("###\n")

    # --- save result ----------------------
    filename = "./output/copying/"
    if grid_name != None:
        filename += grid_name + "/"
    filename += "T=" + str(T) + "/"
    research_filename = filename + "researchModels" + "/" + model + "_N=" + str(
        n_hidden) + "_lambda=" + str(learning_rate) + "_decay=" + str(
            decay) + "/"
    filename += model + "_N=" + str(n_hidden) + "_lambda=" + str(
        learning_rate) + "_decay=" + str(decay)
    if norm is not None:
        filename += "_norm=" + str(norm)
    filename = filename + ".txt"

    if not os.path.exists(os.path.dirname(filename)):
        try:
            os.makedirs(os.path.dirname(filename))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    if not os.path.exists(os.path.dirname(research_filename)):
        try:
            os.makedirs(os.path.dirname(research_filename))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    if not os.path.exists(
            os.path.dirname(research_filename + "/modelCheckpoint/")):
        try:
            os.makedirs(
                os.path.dirname(research_filename + "/modelCheckpoint/"))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    f = open(filename, 'w')
    f.write("########\n\n")
    f.write("## \tModel: %s with N=%d" % (model, n_hidden))
    f.write("\n\n")
    f.write("########\n\n")

    # --- Training Loop ----------------------
    saver = tf.train.Saver()
    mx2 = 0
    step = 0
    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=False)) as sess:
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter("./logs/", sess.graph)

        sess.run(init)

        steps = []
        losses = []
        accs = []

        while step < n_iter:

            batch_x = train_x[step * n_batch:(step + 1) * n_batch]
            batch_y = train_y[step * n_batch:(step + 1) * n_batch]

            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})

            result = sess.run(merged, feed_dict={x: batch_x, y: batch_y})
            writer.add_summary(result, step)
            result = sess.run(merged, feed_dict={x: batch_x, y: batch_y})
            writer.add_summary(result, step)
            #with tf.name_scope('loss'):
            with tf.name_scope('loss'):
                with tf.name_scope('acc'):
                    acc = sess.run(accuracy,
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y
                                   })
                with tf.name_scope('loss'):
                    loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})

                    tf.summary.scalar('loss', loss)
            merged = tf.summary.merge_all()
            write = tf.summary.FileWriter("logs/", sess.graph)
            result = sess.run(merged, feed_dict={x: batch_x, y: batch_y})
            writer.add_summary(result, step)

            print("Iter " + str(step) + ", Minibatch Loss= " + \
               "{:.6f}".format(loss) + ", Training Accuracy= " + \
               "{:.5f}".format(acc))

            steps.append(step)
            losses.append(loss)
            accs.append(acc)
            if step == 0:
                f.write("%d\t%f\t%f\n" % (step, loss, acc))
            step += 1
            if step % 200 == 199:
                f.write("%d\t%f\t%f\n" % (step, loss, acc))

            if step % 10000 == 0:
                saver.save(sess, research_filename + "/modelCheckpoint/")

            if step % 1000 == 0:
                if model == "GRU": tmp = "gru"
                if model == "RUM": tmp = "rum"
                if model == "ARUM": tmp = "arum"
                if model == "GRU" or model == "RUM" or model == "ARUM":
                    kernel = [
                        v for v in tf.global_variables()
                        if v.name == "rnn/" + tmp + "_cell/gates/kernel:0"
                    ][0]
                    bias = [
                        v for v in tf.global_variables()
                        if v.name == "rnn/" + tmp + "_cell/gates/bias:0"
                    ][0]
                    k, b = sess.run([kernel, bias])
                    np.save(research_filename + "/kernel_" + str(step), k)
                    np.save(research_filename + "/bias_" + str(step), b)
                if model == "RUM" or model == "ARUM":
                    kernel_emb = [
                        v for v in tf.global_variables()
                        if v.name == "rnn/" + tmp + "_cell/candidate/kernel:0"
                    ][0]
                    bias_emb = [
                        v for v in tf.global_variables()
                        if v.name == "rnn/" + tmp + "_cell/candidate/bias:0"
                    ][0]
                    k_emb, b_emb = sess.run([kernel_emb, bias_emb])
                    np.save(research_filename + "/kernel_emb_" + str(step),
                            k_emb)
                    np.save(research_filename + "/bias_emb_" + str(step),
                            b_emb)

                    #result = sess.run(merged,feed_dict={x: batch_x, y: batch_y})
                    #writer.add_summary(result, step)
        print("Optimization Finished!")

        # --- test ----------------------
        test_acc = sess.run(accuracy, feed_dict={x: test_x, y: test_y})
        test_loss = sess.run(cost, feed_dict={x: test_x, y: test_y})
        #tf.scalar_summary('test_loss',test_loss)
        #result = sess.run(merged,feed_dict={x: batch_x, y: batch_y})
        #writer.add_summary(result, step)
        f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + \
           ", Accuracy= " + "{:.5f}".format(test_acc))
예제 #4
0
def main(model, qid, n_iter, n_batch, n_hidden, n_embed, capacity, comp, FFT,
         learning_rate, norm, grid_name):

    learning_rate = float(learning_rate)

    path = './data/tasks_1-20_v1-2.tar.gz'
    tar = tarfile.open(path)

    name_str = [
        'single-supporting-fact',
        'two-supporting-facts',
        'three-supporting-facts',
        'two-arg-relations',
        'three-arg-relations',
        'yes-no-questions',
        'counting',
        'lists-sets',
        'simple-negation',
        'indefinite-knowledge',
        'basic-coreference',
        'conjunction',
        'compound-coreference',
        'time-reasoning',
        'basic-deduction',
        'basic-induction',
        'positional-reasoning',
        'size-reasoning',
        'path-finding',
        'agents-motivations',
    ]

    challenge = 'tasks_1-20_v1-2/en-10k/qa' + str(qid) + '_' + name_str[
        qid - 1] + '_{}.txt'

    train = get_stories(tar.extractfile(challenge.format('train')))
    test = get_stories(tar.extractfile(challenge.format('test')))

    vocab = set()
    for story, q, answer in train + test:
        vocab |= set(story + q + [answer])
    vocab = sorted(vocab)

    # Reserve 0 for masking via pad_sequences
    vocab_size = len(vocab) + 1
    word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
    story_maxlen = max(map(len, (x for x, _, _ in train + test)))
    query_maxlen = max(map(len, (x for _, x, _ in train + test)))

    train_x, train_q, train_y, train_x_len, train_q_len = vectorize_stories(
        train, word_idx, story_maxlen, query_maxlen)
    test_x, test_q, test_y, test_x_len, test_q_len = vectorize_stories(
        test, word_idx, story_maxlen, query_maxlen)

    n_data = len(train_x)
    n_val = int(0.1 * n_data)

    val_x = train_x[-n_val:]
    val_q = train_q[-n_val:]
    val_y = train_y[-n_val:]
    val_x_len = train_x_len[-n_val:]
    val_q_len = train_q_len[-n_val:]
    train_x = train_x[:-n_val]
    train_q = train_q[:-n_val]
    train_y = train_y[:-n_val]
    train_q_len = train_q_len[:-n_val]
    train_x_len = train_x_len[:-n_val]

    n_train = len(train_x)

    print('vocab = {}'.format(vocab))
    print('x.shape = {}'.format(np.array(train_x).shape))
    print('xq.shape = {}'.format(np.array(train_q).shape))
    print('y.shape = {}'.format(np.array(train_y).shape))
    print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen,
                                                       query_maxlen))

    print('Build model...')

    # sentence = layers.Input(shape=(story_maxlen,), dtype='int32')
    sentence = tf.placeholder("int32", [None, story_maxlen])

    n_output = n_hidden
    n_input = n_embed
    n_classes = vocab_size

    embed_init_val = np.sqrt(6.) / np.sqrt(vocab_size)
    embed = tf.get_variable('Embedding', [vocab_size, n_embed],
                            initializer=tf.random_normal_initializer(
                                -embed_init_val, embed_init_val),
                            dtype=tf.float32)
    encoded_sentence = tf.nn.embedding_lookup(embed, sentence)
    question = tf.placeholder("int32", [None, query_maxlen])
    encoded_question = tf.nn.embedding_lookup(embed, question)
    merged = tf.concat([encoded_sentence, encoded_question], axis=1)
    print(encoded_sentence, encoded_question, merged)

    input_data = merged

    if model == "LSTM":
        cell = BasicLSTMCell(n_hidden, state_is_tuple=True, forget_bias=1)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "GRU":
        cell = GRUCell(n_hidden)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "RUM":
        cell = RUMCell(n_hidden, T_norm=norm)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "ARUM":
        cell = ARUMCell(n_hidden, T_norm=norm)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "ARUM2":
        cell = ARUM2Cell(n_hidden, T_norm=norm)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "RNN":
        cell = BasicRNNCell(n_hidden)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "EUNN":
        cell = EUNNCell(n_hidden, capacity, FFT, comp)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)
    elif model == "GORU":
        cell = GORUCell(n_hidden, capacity, FFT)
        hidden_out, _ = tf.nn.dynamic_rnn(cell, input_data, dtype=tf.float32)

    merged, _ = tf.nn.dynamic_rnn(cell, merged, dtype=tf.float32)

    # --- Hidden Layer to Output ----------------------
    V_init_val = np.sqrt(6.) / np.sqrt(n_output + n_input)

    V_weights = tf.get_variable("V_weights",
                                shape=[n_hidden, n_classes],
                                dtype=tf.float32,
                                initializer=tf.random_uniform_initializer(
                                    -V_init_val, V_init_val))
    V_bias = tf.get_variable("V_bias",
                             shape=[n_classes],
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(0.01))

    merged_list = tf.unstack(merged, axis=1)[-1]
    temp_out = tf.matmul(merged_list, V_weights)
    final_out = tf.nn.bias_add(temp_out, V_bias)

    answer_holder = tf.placeholder("int64", [None])

    # --- evaluate process ----------------------
    cost = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(logits=final_out,
                                                       labels=answer_holder))
    correct_pred = tf.equal(tf.argmax(final_out, 1), answer_holder)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # --- Initialization ----------------------
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)
    init = tf.global_variables_initializer()

    # --- save result ----------------------
    folder = "./output/babi/" + str(
        qid
    ) + '/' + model  # + "_lambda=" + str(learning_rate) + "_beta=" + str(decay)
    filename = folder + "_h=" + str(n_hidden)
    filename = filename + "_lr=" + str(learning_rate)
    filename = filename + "_norm=" + str(norm)
    filename = filename + ".txt"
    if not os.path.exists(os.path.dirname(filename)):
        try:
            os.makedirs(os.path.dirname(filename))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    if not os.path.exists(os.path.dirname(folder + "/modelCheckpoint/")):
        try:
            print(folder + "/modelCheckpoint/")
            os.makedirs(os.path.dirname(folder + "/modelCheckpoint/"))
        except OSError as exc:
            if exc.errno != errno.EEXIST:
                raise
    f = open(filename, 'w')
    f.write("########\n\n")
    f.write("## \tModel: %s with N=%d" % (model, n_hidden))
    f.write("########\n\n")

    # --- Training Loop ----------------------
    saver = tf.train.Saver()

    step = 0
    with tf.Session(config=tf.ConfigProto(log_device_placement=False,
                                          allow_soft_placement=False)) as sess:

        sess.run(init)

        steps = []
        losses = []
        accs = []

        while step < n_iter:
            a = int(step % (n_train / n_batch))
            batch_x = train_x[a * n_batch:(a + 1) * n_batch]
            batch_q = train_q[a * n_batch:(a + 1) * n_batch]
            batch_y = train_y[a * n_batch:(a + 1) * n_batch]

            train_dict = {
                sentence: batch_x,
                question: batch_q,
                answer_holder: batch_y
            }
            sess.run(optimizer, feed_dict=train_dict)
            acc = sess.run(accuracy, feed_dict=train_dict)
            loss = sess.run(cost, feed_dict=train_dict)

            print("Iter " + str(step) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))

            steps.append(step)
            losses.append(loss)
            accs.append(acc)
            step += 1

            if step % 200 == 1:

                saver.save(sess, folder + "/modelCheckpoint/step=" + str(step))

                val_dict = {
                    sentence: val_x,
                    question: val_q,
                    answer_holder: val_y
                }
                val_acc = sess.run(accuracy, feed_dict=val_dict)
                val_loss = sess.run(cost, feed_dict=val_dict)

                print("Validation Loss= " + \
                      "{:.6f}".format(val_loss) + ", Validation Accuracy= " + \
                      "{:.5f}".format(val_acc))
                f.write("%d\t%f\t%f\n" % (step, val_loss, val_acc))

        print("Optimization Finished!")

        # --- test ----------------------
        test_dict = {sentence: test_x, question: test_q, answer_holder: test_y}
        test_acc = sess.run(accuracy, feed_dict=test_dict)
        test_loss = sess.run(cost, feed_dict=test_dict)
        f.write("Test result: Loss= " + "{:.6f}".format(test_loss) + \
                    ", Accuracy= " + "{:.5f}".format(test_acc))
        print("Test result: Loss= " + "{:.6f}".format(test_loss) + \
                    ", Accuracy= " + "{:.5f}".format(test_acc))