Exemple #1
0
def evaluate(sess, x_, y_):
    """评估在某一数据上的准确率和损失"""
    data_len = len(x_)
    batch_eval = batch_iter(x_, y_, 128)
    total_loss = 0.0
    total_acc = 0.0
    for x_batch, y_batch in batch_eval:
        batch_len = len(x_batch)
        feed_dict = feed_data(x_batch, y_batch, 1.0)
        loss, acc = sess.run([model.loss, model.acc], feed_dict=feed_dict)
        total_loss += loss * batch_len
        total_acc += acc * batch_len

    return total_loss / data_len, total_acc / data_len
def evalute_dev_set(run_number, model_number):
    checkpoint_file = cwd + "/runs/{}/checkpoints/model-{}".format(
        run_number, model_number)
    graph = tf.Graph()
    with graph.as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            saver.restore(sess, checkpoint_file)
            # Get the placeholders from the graph by name
            input = graph.get_operation_by_name("true_images").outputs[0]
            # Tensors we want to evaluate
            scores = graph.get_operation_by_name(
                "discr/output/score_pred"
            ).outputs[
                0]  #this has to be changed to output/score_pred with the new version of model
            # Generate batches for evaluation
            batches = preprocessing.batch_iter(scored_data,
                                               FLAGS.batch_size,
                                               1,
                                               shuffle=False)
            # Predicting the score from the model
            acc = 0
            n = 0
            for test_batch in batches:
                batch_imgs = np.reshape(np.concatenate(test_batch[:, 0]),
                                        (-1, 1000, 1000))
                batch_score = np.reshape(test_batch[:, 1], (-1))
                batch_pred_scores = sess.run(scores, {input: batch_imgs})
                for i in range(len(batch_pred_scores)):
                    tmp = max(0.0, min(8.0, batch_pred_scores[i]))
                    acc += abs(tmp - batch_score[i])
                    n += 1
            # Return the MAE on the dev set.
            return (acc / n)
Exemple #3
0
    saver = tf.train.import_meta_graph(meta_graph_path)
    saver.restore(sess, checkpoint_folder)


    graph = tf.get_default_graph()
    graph_input_y = graph.get_tensor_by_name("input_y:0")
    graph_input_x = graph.get_tensor_by_name("input_x:0")
    graph_batch_size = graph.get_tensor_by_name("batch_size:0")

    graph_predictions = graph.get_tensor_by_name("output_layer/predictions:0")
    graph_losses = graph.get_tensor_by_name("output_layer/losses/Reshape_2:0")

    nlpdata = datasets.NLUProject1Dataset()
    nlpdatatest = datasets.NLUProject1TestDataset()
    print(len(nlpdatatest.test_x))
    batches = prep.batch_iter(list(zip(nlpdatatest.test_x, nlpdatatest.test_y)), con.BATCH_SIZE, con.n_epochs)
    #print(len(batches))
    for batch in batches:
        x_batch, y_batch = zip(*batch)

        feed_dict = {
            graph_input_x: x_batch,
            graph_input_y: y_batch,
            graph_batch_size: len(x_batch)
        }

        losses, input_y = sess.run(
            [graph_losses, graph_input_y],
            feed_dict)
        time_str = datetime.datetime.now().isoformat()
Exemple #4
0
                step, summaries, loss, accuracy = sess.run(
                    [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                    feed_dict)
                acc.append(accuracy)
                losses.append(loss)
                time_str = datetime.datetime.now().isoformat()
                print("batch " + str(i + 1) + " in dev >>" +
                      " {}: loss {:g}, acc {:g}".format(
                          time_str, loss, accuracy))
                if writer:
                    writer.add_summary(summaries, step)
            print("\nMean accuracy=" + str(sum(acc) / len(acc)))
            print("Mean loss=" + str(sum(losses) / len(losses)))

        # Generate batches in one-hot-encoding format
        batches = preprocessing.batch_iter(x_train, y_train, FLAGS.batch_size,
                                           FLAGS.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            train_step(x_batch, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(x_dev, y_dev, writer=dev_summary_writer)
                print("")
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess,
                                  checkpoint_prefix,
                                  global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))
Exemple #5
0
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # Load the saved meta graph and restore variables
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # Get the placeholders from the graph by name
        input = graph.get_operation_by_name("true_images").outputs[0]

        # Tensors we want to evaluate
        scores = graph.get_operation_by_name(
            "discr/output/score_pred").outputs[0]

        # Generate batches from the query dataset for one epoch
        batches = preprocessing.batch_iter(query_data,
                                           FLAGS.batch_size,
                                           1,
                                           shuffle=False)

        # If necessary Create the directory predictions
        timestamp = str(run_number)
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "predictions"))
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

# Defining the csv file name
        out_file = os.path.abspath(
            os.path.join(out_dir, "{}".format(timestamp)))
        print("Writing to {}\n".format(out_file))

        with open("{}.csv".format(out_file), "w") as file:
            # Create the required header
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        # load the saved meta graph and restore variables
        saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
        saver.restore(sess, checkpoint_file)

        # Get the placeholders from the graph by name
        x = graph.get_operation_by_name("X").outputs[0]

        dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]

        # Tensors we wanna evaluate
        predictions = graph.get_operation_by_name("accuracy/predictions").outputs[0]

        # Generate batches for one epoch
        batches = preprocessing.batch_iter(x_test, FLAGS.batch_size, 1, shuffle=False)

        # Collect the predictions here
        all_predictions = []

        for x_test_batch in batches:
            batch_pred = sess.run(predictions, {x: x_test_batch, dropout_keep_prob: 1.0})
            all_predictions = np.concatenate([all_predictions, batch_pred])

correct_pred = float(sum(all_predictions == y_test))
print("Total number of text examples: {}".format(len(y_test)))
print("Accuracy: {:g}".format(correct_pred/float(len(y_test))))

                        sess.run(assign_op)
                        W_fc = cnn.W_fc.eval()
                        l2_norm = np.linalg.norm(W_fc)
                        # print(">>>>>>> NEW L2 NORM IS:" + str(l2_norm))

                def validation_step(x_batch, y_batch, writer=None):
                    feed_dict = {cnn.x: x_batch, cnn.y: y_batch, cnn.dropout_keep_prob: 1.0}
                    step, summaries, loss, accuracy = sess.run([global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                                                               feed_dict=feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
                    if writer:
                        writer.add_summary(summaries, step)

                # Generate batches
                batches = preprocessing.batch_iter(list(zip(x_train, y_train)), tf.flags.FLAGS.batch_size,
                                                   tf.flags.FLAGS.n_epochs)

                # Training loop. For each batch...
                for batch in batches:
                    x_batch, y_batch = zip(*batch)
                    train_step(x_batch, y_batch)
                    current_step = tf.train.global_step(sess, global_step)
                    # if current_step % tf.flags.FLAGS.evaluate_every == 0:
                    #     print("\nEvaluation on Validation set:")
                    #     validation_step(x_valid, y_valid, writer=dev_summary_writer)
                    #     print("")
                    # if current_step % tf.flags.FLAGS.checkpoint_every == 0:
                    #     path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                    #     print("Saved model checkpoint to {}\n".format(path))
                print("\nEvaluation on Validation set:")
                validation_step(x_valid, y_valid, writer=dev_summary_writer)
Exemple #8
0
        #testing step
        def test_step(x_batch, y_batch, writer=None):
            feed_dict = {
                cnn.input_x:x_batch,
                cnn.input_y:y_batch,
                cnn.dropout_keep_prob:1.0
            }
            step, summaries, loss, accuracy = sess.run([global_step, test_summary_op, cnn.loss, cnn.accuracy], feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)
            return loss, accuracy

        batches = preprocessing.batch_iter(list(zip(x_train, y_train)), batch_size, num_epochs)

        #存储训练集和样本集的loss和accuracy,以供之后的画图
        train_loss_all = []
        train_accuracy_all = []
        test_loss_all = []
        test_accuracy_all = []
        #进行训练 training loop for every step
        for batch in batches:
            x_batch, y_batch = zip(*batch)
            loss_train, accuracy_train = train_step(x_batch, y_batch)
            train_loss_all.append(loss_train)
            train_accuracy_all.append(accuracy_train)
            current_step = tf.train.global_step(sess, global_step)  #将Session和global_step
            #每evaluateevery次进行一次测试
            if current_step % evaluate_every == 0:
Exemple #9
0
def train():
    print("Configuring TensorBoard and Saver...")

    # 配置 Tensorboard,重新训练时,请将tensorboard文件夹删除,不然图会覆盖
    if os.path.exists('tensorboard'):
        shutil.rmtree('tensorboard')

    tensorboard_dir = 'tensorboard/textcnn'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    tf.summary.scalar("loss", model.loss)
    tf.summary.scalar("accuracy", model.acc)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)

    # 配置 Saver
    saver = tf.train.Saver()
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    print("Loading training and validation data...")
    # 载入训练集与验证集
    start_time = time.time()
    x_train, y_train = process_file(train_dir, word_to_id, cat_to_id,
                                    config.seq_length)
    x_val, y_val = process_file(val_dir, word_to_id, cat_to_id,
                                config.seq_length)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    # 创建session
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    print('Training and evaluating...')
    start_time = time.time()
    total_batch = 0  # 总批次
    best_acc_val = 0.0  # 最佳验证集准确率
    last_improved = 0  # 记录上一次提升批次
    require_improvement = 1000  # 如果超过1000轮未提升,提前结束训练

    flag = False
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        batch_train = batch_iter(x_train, y_train, config.batch_size)
        for x_batch, y_batch in batch_train:
            feed_dict = feed_data(x_batch, y_batch, config.dropout_keep_prob)

            if total_batch % config.save_per_batch == 0:
                # 每多少轮次将训练结果写入tensorboard scalar
                s = session.run(merged_summary, feed_dict=feed_dict)
                writer.add_summary(s, total_batch)

            if total_batch % config.print_per_batch == 0:
                # 每多少轮次输出在训练集和验证集上的性能
                feed_dict[model.keep_prob] = 1.0
                loss_train, acc_train = session.run([model.loss, model.acc],
                                                    feed_dict=feed_dict)
                loss_val, acc_val = evaluate(session, x_val, y_val)  # todo

                if acc_val > best_acc_val:
                    # 保存最好结果
                    best_acc_val = acc_val
                    last_improved = total_batch
                    saver.save(sess=session, save_path=save_path)
                    improved_str = '*'
                else:
                    improved_str = ''

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss:{1:>6.2}, Train Acc:{2:>7.2%},' \
                      + ' Val Loss:{3:>6.2}, Val Acc:{4:>7.2%}, Time:{5} {6}'
                print(
                    msg.format(total_batch, loss_train, acc_train, loss_val,
                               acc_val, time_dif, improved_str))

            feed_dict[model.keep_prob] = config.dropout_keep_prob
            session.run(model.optim, feed_dict=feed_dict)  # 运行优化
            total_batch += 1

            if total_batch - last_improved > require_improvement:
                # 验证集正确率长期不提升,提前结束训练
                print("No optimization for a long time, auto-stopping...")
                flag = True
                break  # 跳出循环
        if flag:  # 同上
            break
Exemple #10
0
##### TRAIN / TEST SPLIT #####
# Randomly shuffle data
np.random.seed(10)
# Train/test split for score
shuffled_indices = np.random.permutation(len(scored_data[:,0]))
dev_sample_index = int(FLAGS.dev_sample_percentage * float(len(shuffled_indices)))
test_indices = shuffled_indices[:dev_sample_index]
train_indices = shuffled_indices[dev_sample_index:]
train_score = scored_data[train_indices,:]
test_scored = scored_data[test_indices,:]
# Needed for dev step - but actually we only evaluated on the first 64 (not the full 96 test set) for tensorboard.
test_score_imgs = np.reshape(np.concatenate(test_scored[:, 0]), (-1,1000,1000))
test_score = np.reshape(test_scored[:, 1], (-1))
# Generate training batches for scored images
batches_scored = preprocessing.batch_iter(train_score, FLAGS.batch_size, FLAGS.num_epochs)

# Train/test split for labels
shuffled_indices = np.random.permutation(len(labeled_data[:,0]))
dev_sample_index = int(FLAGS.dev_sample_percentage * float(len(shuffled_indices)))
test_indices = shuffled_indices[:dev_sample_index]
train_indices = shuffled_indices[dev_sample_index:]
train_label = labeled_data[train_indices,:]
test_labeled = labeled_data[test_indices,:]
# Generate training batches for labeled images
batches_labeled = preprocessing.batch_iter(train_label, FLAGS.batch_size, FLAGS.num_epochs)


##### PRINTING THE USED PARAMETERS TO THE LOG FILE #####
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):