Esempio n. 1
0
def train(word2vec, dataset, parameters, class_weights):
    modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"])
    if not os.path.exists(modeldir):
        os.mkdir(modeldir)
    logdir = os.path.join(modeldir, "log")
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    logdir_train = os.path.join(logdir, "train")
    if not os.path.exists(logdir_train):
        os.mkdir(logdir_train)
    logdir_test = os.path.join(logdir, "test")
    if not os.path.exists(logdir_test):
        os.mkdir(logdir_test)
    # logdir_dev = os.path.join(logdir, "dev")
    # if not os.path.exists(logdir_dev):
    #     os.mkdir(logdir_dev)
    savepath = os.path.join(modeldir, "save")

    #device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    device_string = "/cpu:0"
    with tf.device(device_string):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        headline_ph = tf.placeholder(tf.float32,
                                     shape=[
                                         parameters["sequence_length"], None,
                                         parameters["embedding_dim"]
                                     ],
                                     name="headline")
        body_ph = tf.placeholder(tf.float32,
                                 shape=[
                                     parameters["sequence_length"], None,
                                     parameters["embedding_dim"]
                                 ],
                                 name="body")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(
            filter_height=1,
            filter_width=parameters["embedding_dim"],
            in_channels=1,
            out_channels=parameters["num_units"],
            name="projecter")

        optimizer = tf.train.AdamOptimizer(
            learning_rate=parameters["learning_rate"],
            name="ADAM",
            beta1=0.9,
            beta2=0.999)

        with tf.variable_scope(name_or_scope="headline"):
            headline = RNN(cell=LSTMCell,
                           num_units=parameters["num_units"],
                           embedding_dim=parameters["embedding_dim"],
                           projecter=projecter,
                           keep_prob=keep_prob_ph,
                           class_weights=class_weights)
            headline.process(sequence=headline_ph)

        with tf.variable_scope(name_or_scope="body"):
            body = RNN(cell=AttentionLSTMCell,
                       num_units=parameters["num_units"],
                       embedding_dim=parameters["embedding_dim"],
                       hiddens=headline.hiddens,
                       states=headline.states,
                       projecter=projecter,
                       keep_prob=keep_prob_ph,
                       class_weights=class_weights)
            body.process(sequence=body_ph)

        loss, loss_summary, accuracy, accuracy_summary = body.loss(
            targets=targets_ph)

        weight_decay = tf.reduce_sum([
            tf.reduce_sum(parameter)
            for parameter in headline.parameters + body.parameters
        ])

        global_loss = loss + parameters["weight_decay"] * weight_decay

        train_summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph)
        test_summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        test_summary_writer = tf.summary.FileWriter(logdir_test)

        saver = tf.train.Saver(max_to_keep=10)
        summary_writer = tf.summary.FileWriter(logdir)
        tf.train.write_graph(sess.graph_def,
                             modeldir,
                             "graph.pb",
                             as_text=False)
        loader = tf.train.Saver(tf.global_variables())

        optimizer = tf.train.AdamOptimizer(
            learning_rate=parameters["learning_rate"],
            name="ADAM",
            beta1=0.9,
            beta2=0.999)
        train_op = optimizer.minimize(global_loss)

        sess.run(tf.global_variables_initializer())

        batcher = Batcher(word2vec=word2vec)
        train_batches = batcher.batch_generator(
            dataset=dataset["train"],
            num_epochs=parameters["num_epochs"],
            batch_size=parameters["batch_size"]["train"],
            sequence_length=parameters["sequence_length"])
        num_step_by_epoch = int(
            math.ceil(
                len(dataset["train"]["targets"]) /
                parameters["batch_size"]["train"]))
        for train_step, (train_batch, epoch) in enumerate(train_batches):
            feed_dict = {
                headline_ph: np.transpose(train_batch["headline"], (1, 0, 2)),
                body_ph: np.transpose(train_batch["body"], (1, 0, 2)),
                targets_ph: train_batch["targets"],
                keep_prob_ph: parameters["keep_prob"],
            }

            _, summary_str, train_loss, train_accuracy = sess.run(
                [train_op, train_summary_op, loss, accuracy],
                feed_dict=feed_dict)
            train_summary_writer.add_summary(summary_str, train_step)
            if train_step % 10 == 0:
                sys.stdout.write(
                    "\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}%   "
                    .format(epoch + 1, parameters["num_epochs"],
                            train_step % num_step_by_epoch, num_step_by_epoch,
                            train_loss, 100. * train_accuracy))
                sys.stdout.flush()
            if train_step % 500 == 0:
                test_batches = batcher.batch_generator(
                    dataset=dataset["test"],
                    num_epochs=1,
                    batch_size=parameters["batch_size"]["test"],
                    sequence_length=parameters["sequence_length"])
                for test_step, (test_batch, _) in enumerate(test_batches):
                    feed_dict = {
                        headline_ph:
                        np.transpose(test_batch["headline"], (1, 0, 2)),
                        body_ph:
                        np.transpose(test_batch["body"], (1, 0, 2)),
                        targets_ph:
                        test_batch["targets"],
                        keep_prob_ph:
                        1.,
                    }

                    summary_str, test_loss, test_accuracy = sess.run(
                        [test_summary_op, loss, accuracy], feed_dict=feed_dict)
                    print "\nTEST | loss={0:.2f}, accuracy={1:.2f}%   ".format(
                        test_loss, 100. * test_accuracy)
                    print ""
                    test_summary_writer.add_summary(summary_str, train_step)
                    break
            if train_step % 5000 == 0:
                saver.save(sess, save_path=savepath, global_step=train_step)
        print ""
def model_load(word2vec, dataset, data_ids, parameters):
    model_number = input("model num:")
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
         saver = tf.train.import_meta_graph("../runs/attention_lstm/save-{}.meta".format(model_number))
         saver.restore(sess, "../runs/attention_lstm/save-{}".format(model_number))
         graph = tf.get_default_graph()
         headline = graph.get_tensor_by_name("headline:0")
         body = graph.get_tensor_by_name("body:0")
         targets = graph.get_tensor_by_name("targets:0")
         keep_prob = graph.get_tensor_by_name("keep_prob:0")

         batcher = Batcher(word2vec=word2vec)
         test_batches = batcher.batch_generator(dataset=dataset["test"], data_ids=data_ids["test"], num_epochs=1, batch_size=1000, sequence_length=parameters["sequence_length"], return_body=True)
         for test_step, (test_batch,_) in enumerate(test_batches):
             feed_dict = {
                           headline: np.transpose(test_batch["headline"], (1, 0, 2)),
                           body: np.transpose(test_batch["body"], (1, 0, 2)),
                           targets: test_batch["targets"],
                           keep_prob: 1.,
                          }
             accuracy_op = graph.get_tensor_by_name("accuracy/Mean:0")
             predictions_op = graph.get_tensor_by_name("accuracy/ToInt32:0")
             test_accuracy, predictions = sess.run([accuracy_op, predictions_op], feed_dict=feed_dict)
             print"\nTEST | accuracy={0:.2f}%   ".format(100.*test_accuracy)
             print "prediction={0}".format(predictions)
             predictions_name = []
             test_predictions = map(str,predictions)
             for predictions_num in range(len(test_predictions)):
                 #if test_predictions[predictions_num] == "0":
                    #predictions_name.append(["neutral"])
                 #if test_predictions[predictions_num] == "1":
                    #predictions_name.append(["entailment"])
	         #if test_predictions[predictions_num] == "2":
                    #predictions_name.append(["contradiction"])
                 #if test_predictions[predictions_num] == "3":
                    #predictions_name.append(["unrelated"])
                 if test_predictions[predictions_num] == "0":
                    predictions_name.append(["True"])
                 if test_predictions[predictions_num] == "1":
                    predictions_name.append(["False"])
             with open('../runs/attention_lstm/log/predictions.csv', "w") as f_predictions:
                  writer_predictions = csv.writer(f_predictions, lineterminator="\n")
                  writer_predictions.writerow("0")
                  writer_predictions.writerows(predictions_name)
             


             alphas_values = []
             for i in range(parameters["sequence_length"]-1):
                 num = str(3*i+5)
                 alphas_op = graph.get_tensor_by_name("body_1/ExpandDims_{}:0".format(num))
                 alphas_values.append(sess.run(alphas_op, feed_dict=feed_dict))
             for j in range(parameters["sequence_length"]-1):
                 if j==0:
                     attention_matrix = np.array(alphas_values[j])
                 else:
                     attention_matrix = np.append(attention_matrix, np.array(alphas_values[j]),axis = 0)
             attention_matrix = np.matrix(attention_matrix)   
    
             df_headlines = pd.read_csv(os.path.join("../runs/attention_lstm/log/", "headlines.csv"), delimiter=",")
             df_bodies = pd.read_csv(os.path.join("../runs/attention_lstm/log/", "bodies.csv"), delimiter=",")
             df_headlines = df_headlines.dropna(axis=1)
             df_bodies = df_bodies.dropna(axis=1)
             header_headlines = range(len(df_headlines.columns))
             header_headlines = map(str,header_headlines)
	     header_bodies = range(len(df_bodies.columns))
             header_bodies = map(str,header_bodies)
             headlines = df_headlines[header_headlines]
             bodies = df_bodies[header_bodies]
             resize_row_attention_matrix = np.delete(attention_matrix, np.s_[len(df_headlines.columns):parameters["sequence_length"]], 1)
             resize_line_attention_matrix = np.delete(resize_row_attention_matrix, np.s_[len(df_bodies.columns):parameters["sequence_length"]-1], 0)
             fig = plt.figure()
             ax = fig.add_subplot(1,1,1)
             ax.set_aspect('equal')
             plt.imshow(resize_line_attention_matrix, interpolation='nearest', cmap=plt.cm.Greys)
             plt.yticks(np.arange(0,len(bodies.ix[0])), bodies.ix[0])
             plt.xticks(np.arange(0,len(headlines.ix[0])), headlines.ix[0])
             plt.ylabel('articlebody',fontsize=18)
             plt.xlabel('headline',fontsize=18)
             plt.colorbar()
             plt.show()
             break
Esempio n. 3
0
def test(word2vec, dataset, parameters, loadpath):
    print "1"
    device_string = "/gpu:{}".format(
        parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    with tf.device(device_string):
        print "2"
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        premises_ph = tf.placeholder(tf.float32,
                                     shape=[
                                         parameters["sequence_length"], None,
                                         parameters["embedding_dim"]
                                     ],
                                     name="premises")
        hypothesis_ph = tf.placeholder(tf.float32,
                                       shape=[
                                           parameters["sequence_length"], None,
                                           parameters["embedding_dim"]
                                       ],
                                       name="hypothesis")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(
            filter_height=1,
            filter_width=parameters["embedding_dim"],
            in_channels=1,
            out_channels=parameters["num_units"],
            name="projecter")

        with tf.variable_scope(name_or_scope="premise"):
            premise = RNN(cell=LSTMCell,
                          num_units=parameters["num_units"],
                          embedding_dim=parameters["embedding_dim"],
                          projecter=projecter,
                          keep_prob=keep_prob_ph)
            premise.process(sequence=premises_ph)

        with tf.variable_scope(name_or_scope="hypothesis"):
            hypothesis = RNN(cell=AttentionLSTMCell,
                             num_units=parameters["num_units"],
                             embedding_dim=parameters["embedding_dim"],
                             hiddens=premise.hiddens,
                             states=premise.states,
                             projecter=projecter,
                             keep_prob=keep_prob_ph)
            hypothesis.process(sequence=hypothesis_ph)

        loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(
            targets=targets_ph)

        loader = tf.train.Saver()
        loader.restore(sess, loadpath)

        batcher = Batcher(word2vec=word2vec, settings=parameters)
        test_batches = batcher.batch_generator(
            dataset=dataset["test"],
            num_epochs=1,
            batch_size=parameters["batch_size"]["test"],
            sequence_length=parameters["sequence_length"])
        print "2.5"
        for test_step, (test_batch, _) in enumerate(test_batches):
            print "3"
            feed_dict = {
                premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)),
                hypothesis_ph: np.transpose(test_batch["hypothesis"],
                                            (1, 0, 2)),
                targets_ph: test_batch["targets"],
                keep_prob_ph: 1.,
            }

            test_loss, test_accuracy = sess.run([loss, accuracy],
                                                feed_dict=feed_dict)
            print "\nTEST | loss={0:.2f}, accuracy={1:.2f}%   ".format(
                test_loss, 100. * test_accuracy)
            print ""
def train(word2vec, dataset, parameters):
    modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"])
    if not os.path.exists(modeldir):
        os.mkdir(modeldir)
    logdir = os.path.join(modeldir, "log")
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    logdir_train = os.path.join(logdir, "train")
    if not os.path.exists(logdir_train):
        os.mkdir(logdir_train)
    logdir_test = os.path.join(logdir, "test")
    if not os.path.exists(logdir_test):
        os.mkdir(logdir_test)
    logdir_dev = os.path.join(logdir, "dev")
    if not os.path.exists(logdir_dev):
        os.mkdir(logdir_dev)
    savepath = os.path.join(modeldir, "save")

    device_string = "/gpu:{}".format(parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    with tf.device(device_string):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        premises_ph = tf.placeholder(tf.float32, shape=[parameters["sequence_length"], None, parameters["embedding_dim"]], name="premises")
        hypothesis_ph = tf.placeholder(tf.float32, shape=[parameters["sequence_length"], None, parameters["embedding_dim"]], name="hypothesis")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(filter_height=1, filter_width=parameters["embedding_dim"], in_channels=1, out_channels=parameters["num_units"], name="projecter")

        optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999)
        
        with tf.variable_scope(name_or_scope="premise"):
            premise = RNN(cell=LSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], projecter=projecter, keep_prob=keep_prob_ph)
            premise.process(sequence=premises_ph)

        with tf.variable_scope(name_or_scope="hypothesis"):
            hypothesis = RNN(cell=AttentionLSTMCell, num_units=parameters["num_units"], embedding_dim=parameters["embedding_dim"], hiddens=premise.hiddens, states=premise.states, projecter=projecter, keep_prob=keep_prob_ph)
            hypothesis.process(sequence=hypothesis_ph)

        loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(targets=targets_ph)

        weight_decay = tf.reduce_sum([tf.reduce_sum(parameter) for parameter in premise.parameters + hypothesis.parameters])

        global_loss = loss + parameters["weight_decay"] * weight_decay

        train_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph)
        test_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        test_summary_writer = tf.train.SummaryWriter(logdir_test)
        
        saver = tf.train.Saver(max_to_keep=10)
        summary_writer = tf.train.SummaryWriter(logdir)
        tf.train.write_graph(sess.graph_def, modeldir, "graph.pb", as_text=False)
        loader = tf.train.Saver(tf.all_variables())

        optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999)
        train_op = optimizer.minimize(global_loss)

        sess.run(tf.initialize_all_variables())
        
        batcher = Batcher(word2vec=word2vec)
        train_batches = batcher.batch_generator(dataset=dataset["train"], num_epochs=parameters["num_epochs"], batch_size=parameters["batch_size"]["train"], sequence_length=parameters["sequence_length"])
        num_step_by_epoch = int(math.ceil(len(dataset["train"]["targets"]) / parameters["batch_size"]["train"]))
        for train_step, (train_batch, epoch) in enumerate(train_batches):
            feed_dict = {
                            premises_ph: np.transpose(train_batch["premises"], (1, 0, 2)),
                            hypothesis_ph: np.transpose(train_batch["hypothesis"], (1, 0, 2)),
                            targets_ph: train_batch["targets"],
                            keep_prob_ph: parameters["keep_prob"],
                        }

            _, summary_str, train_loss, train_accuracy = sess.run([train_op, train_summary_op, loss, accuracy], feed_dict=feed_dict)
            train_summary_writer.add_summary(summary_str, train_step)
            if train_step % 100 == 0:
                sys.stdout.write("\rTRAIN | epoch={0}/{1}, step={2}/{3} | loss={4:.2f}, accuracy={5:.2f}%   ".format(epoch + 1, parameters["num_epochs"], train_step % num_step_by_epoch, num_step_by_epoch, train_loss, 100. * train_accuracy))
                sys.stdout.flush()
            if train_step % 5000 == 0:
                test_batches = batcher.batch_generator(dataset=dataset["test"], num_epochs=1, batch_size=parameters["batch_size"]["test"], sequence_length=parameters["sequence_length"])
                for test_step, (test_batch, _) in enumerate(test_batches):
                    feed_dict = {
                                    premises_ph: np.transpose(test_batch["premises"], (1, 0, 2)),
                                    hypothesis_ph: np.transpose(test_batch["hypothesis"], (1, 0, 2)),
                                    targets_ph: test_batch["targets"],
                                    keep_prob_ph: 1.,
                                }

                    summary_str, test_loss, test_accuracy = sess.run([test_summary_op, loss, accuracy], feed_dict=feed_dict)
                    print"\nTEST | loss={0:.2f}, accuracy={1:.2f}%   ".format(test_loss, 100. * test_accuracy)
                    print ""
                    test_summary_writer.add_summary(summary_str, train_step)
                    break
            if train_step % 5000 == 0:
                saver.save(sess, save_path=savepath, global_step=train_step)
        print ""
Esempio n. 5
0
def train(word2vec, dataset, parameters):
    modeldir = os.path.join(parameters["runs_dir"], parameters["model_name"])
    if not os.path.exists(modeldir):
        os.mkdir(modeldir)
    logdir = os.path.join(modeldir, "log")
    if not os.path.exists(logdir):
        os.mkdir(logdir)
    logdir_train = os.path.join(logdir, "train")
    if not os.path.exists(logdir_train):
        os.mkdir(logdir_train)
    logdir_test = os.path.join(logdir, "test")
    if not os.path.exists(logdir_test):
        os.mkdir(logdir_test)
    logdir_dev = os.path.join(logdir, "dev")
    if not os.path.exists(logdir_dev):
        os.mkdir(logdir_dev)
    savepath = os.path.join(modeldir, "save")

    device_string = "/gpu:{}".format(
        parameters["gpu"]) if parameters["gpu"] else "/cpu:0"
    with tf.device(device_string):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
        config_proto = tf.ConfigProto(allow_soft_placement=True,
                                      gpu_options=gpu_options)

        sess = tf.Session(config=config_proto)

        premises_ph = tf.placeholder(tf.float32,
                                     shape=[
                                         parameters["sequence_length"], None,
                                         parameters["embedding_dim"]
                                     ],
                                     name="premises")
        hypothesis_ph = tf.placeholder(tf.float32,
                                       shape=[
                                           parameters["sequence_length"], None,
                                           parameters["embedding_dim"]
                                       ],
                                       name="hypothesis")
        targets_ph = tf.placeholder(tf.int32, shape=[None], name="targets")
        keep_prob_ph = tf.placeholder(tf.float32, name="keep_prob")

        _projecter = TensorFlowTrainable()
        projecter = _projecter.get_4Dweights(
            filter_height=1,
            filter_width=parameters["embedding_dim"],
            in_channels=1,
            out_channels=parameters["num_units"],
            name="projecter")

        # optimizer = tf.train.AdamOptimizer(learning_rate=parameters["learning_rate"], name="ADAM", beta1=0.9, beta2=0.999)
        with tf.variable_scope(name_or_scope="premise"):
            premise = RNN(cell=LSTMCell,
                          num_units=parameters["num_units"],
                          embedding_dim=parameters["embedding_dim"],
                          projecter=projecter,
                          keep_prob=keep_prob_ph)
            premise.process(sequence=premises_ph)

        with tf.variable_scope(name_or_scope="hypothesis"):
            hypothesis = RNN(cell=AttentionLSTMCell,
                             num_units=parameters["num_units"],
                             embedding_dim=parameters["embedding_dim"],
                             hiddens=premise.hiddens,
                             states=premise.states,
                             projecter=projecter,
                             keep_prob=keep_prob_ph)
            hypothesis.process(sequence=hypothesis_ph)

        loss, loss_summary, accuracy, accuracy_summary = hypothesis.loss(
            targets=targets_ph)

        weight_decay = tf.reduce_sum([
            tf.reduce_sum(parameter)
            for parameter in premise.parameters + hypothesis.parameters
        ])

        global_loss = loss + parameters["weight_decay"] * weight_decay

        train_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        # train_summary_op = tf.summary.merge([loss_summary, accuracy_summary])
        train_summary_writer = tf.train.SummaryWriter(logdir_train, sess.graph)
        # train_summary_writer = tf.summary.FileWriter(logdir_train, sess.graph)
        # test_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        dev_summary_op = tf.merge_summary([loss_summary, accuracy_summary])
        # test_summary_writer = tf.train.SummaryWriter(logdir_test)
        dev_summary_writer = tf.train.SummaryWriter(logdir_dev)

        saver = tf.train.Saver(max_to_keep=10)
        # summary_writer = tf.train.SummaryWriter(logdir)
        tf.train.write_graph(sess.graph_def,
                             modeldir,
                             "graph.pb",
                             as_text=False)

        optimizer = tf.train.AdamOptimizer(
            learning_rate=parameters["learning_rate"],
            name="ADAM",
            beta1=0.9,
            beta2=0.999)
        train_op = optimizer.minimize(global_loss)

        sess.run(tf.initialize_all_variables())
        # sess.run(tf.global_variables_initializer())

        batcher = Batcher(word2vec=word2vec, settings=parameters)
        #train_split = "train"
        #train_batches = batcher.batch_generator(dataset=dataset[train_split], num_epochs=parameters["num_epochs"],
        # batch_size=parameters["batch_size"]["train"],
        # sequence_length=parameters["sequence_length"])
        #print("train data size: %d" % len(dataset["train"]["targets"]))
        #num_step_by_epoch = int(math.ceil(len(dataset[train_split]["targets"]) / parameters["batch_size"]["train"]))
        #best_dev_accuracy = 0
        print("train data size: %d" % len(dataset["train"]["targets"]))
        best_dev_accuracy = 0.0
        total_loss = 0.0
        timestamp = time.time()
        for epoch in range(parameters["num_epochs"]):
            print("epoch %d" % epoch)
            train_batches = batcher.batch_generator(
                dataset=dataset["train"],
                num_epochs=1,
                batch_size=parameters["batch_size"]["train"],
                sequence_length=parameters["sequence_length"])
            steps = len(dataset["train"]
                        ["targets"]) / parameters["batch_size"]["train"]

            # progress bar http://stackoverflow.com/a/3002114
            bar = progressbar.ProgressBar(maxval=steps / 10 + 1,
                                          widgets=[
                                              progressbar.Bar('=', '[', ']'),
                                              ' ',
                                              progressbar.Percentage()
                                          ])
            bar.start()
            for step, (train_batch, train_epoch) in enumerate(train_batches):
                feed_dict = {
                    premises_ph:
                    np.transpose(train_batch["premises"], (1, 0, 2)),
                    hypothesis_ph:
                    np.transpose(train_batch["hypothesis"], (1, 0, 2)),
                    targets_ph:
                    train_batch["targets"],
                    keep_prob_ph:
                    parameters["keep_prob"],
                }
                _, summary_str, train_loss, train_accuracy = sess.run(
                    [train_op, train_summary_op, loss, accuracy],
                    feed_dict=feed_dict)
                total_loss += train_loss
                train_summary_writer.add_summary(summary_str, step)
                if step % 100 == 0:  # eval 1 random dev batch
                    # eval 1 random dev batch
                    dev_batches = batcher.batch_generator(
                        dataset=dataset["dev"],
                        num_epochs=1,
                        batch_size=parameters["batch_size"]["dev"],
                        sequence_length=parameters["sequence_length"])
                    for dev_step, (dev_batch, _) in enumerate(dev_batches):
                        feed_dict = {
                            premises_ph:
                            np.transpose(dev_batch["premises"], (1, 0, 2)),
                            hypothesis_ph:
                            np.transpose(dev_batch["hypothesis"], (1, 0, 2)),
                            targets_ph:
                            dev_batch["targets"],
                            keep_prob_ph:
                            1.,
                        }

                        summary_str, dev_loss, dev_accuracy = sess.run(
                            [dev_summary_op, loss, accuracy],
                            feed_dict=feed_dict)
                        dev_summary_writer.add_summary(summary_str, step)
                        break
                    bar.update(step / 10 + 1)
            bar.finish()
            # eval on all dev
            dev_batches = batcher.batch_generator(
                dataset=dataset["dev"],
                num_epochs=1,
                batch_size=len(dataset["dev"]["targets"]),
                sequence_length=parameters["sequence_length"])
            dev_accuracy = 0
            for dev_step, (dev_batch, _) in enumerate(dev_batches):
                feed_dict = {
                    premises_ph: np.transpose(dev_batch["premises"],
                                              (1, 0, 2)),
                    hypothesis_ph: np.transpose(dev_batch["hypothesis"],
                                                (1, 0, 2)),
                    targets_ph: dev_batch["targets"],
                    keep_prob_ph: 1.,
                }
                summary_str, dev_loss, dev_accuracy = sess.run(
                    [dev_summary_op, loss, accuracy], feed_dict=feed_dict)
                print "\nDEV full | loss={0:.2f}, accuracy={1:.2f}%   ".format(
                    dev_loss, 100. * dev_accuracy)
                print ""
                if dev_accuracy > best_dev_accuracy:
                    saver.save(sess,
                               save_path=savepath + '_best',
                               global_step=(epoch + 1) * steps)
                break
            saver.save(sess,
                       save_path=savepath,
                       global_step=(epoch + 1) * steps)
            current_time = time.time()
            print("Iter %3d  Loss %-8.3f  Dev Acc %-6.2f  Time %-5.2f at %s" %
                  (epoch, total_loss, dev_accuracy,
                   (current_time - timestamp) / 60.0,
                   str(datetime.datetime.now())))
            total_loss = 0.0
        print ""