FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()
print('all related parameters in :')
for attr, value in sorted(FLAGS.__flags.items()):
    print('{}={}'.format(attr.upper(), value))

print('参数打印完毕.....')

#加载数据
train_x, train_y, dev_x, dev_y = data_helper.load_dataset(FLAGS.raw_file)
print('load data finished!')

with tf.Session() as sess:

    han = HAN_model.HAN(FLAGS.vocab_size, FLAGS.num_classes,
                        FLAGS.embedding_size, FLAGS.hidden_size)

    with tf.name_scope('loss'):
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=han.input_y,
                                                    logits=han.out,
                                                    name='loss'))

    with tf.name_scope('accuracy'):
        predict = tf.argmax(han.out, axis=1, name='predict')
        label = tf.argmax(han.input_y, axis=1, name='label')
        acc = tf.reduce_mean(tf.cast(tf.equal(predict, label), tf.float32))

    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print("Writing to {}\n".format(out_dir))
def evaluate(evalDataSet, ckpt_value, eval_value, time_list):
    with tf.Graph().as_default() as g, tf.device('/gpu:0'):

        # Placeholders for input, output and dropout
        feature_size = OPTION.SENT_HIDDEN_SIZE * 2
        input_x = tf.placeholder(tf.int32,
                                 [None, OPTION.SEQUENCE_LEN, OPTION.SENT_LEN],
                                 name="input_x")
        input_y = tf.placeholder(tf.int32, [None, OPTION.NUM_CLASSES],
                                 name="input_y")

        feature_size = feature_size * min(time_list[-1] - 0, OPTION.DP_DEPTH)
        if feature_size > 0:
            features_before = tf.placeholder(tf.float32, [None, feature_size],
                                             name="features_before")
        else:
            features_before = None

        han = HAN_model.Model(sequence_length=OPTION.SEQUENCE_LEN,
                              sent_length=OPTION.SENT_LEN,
                              num_classes=OPTION.NUM_CLASSES,
                              vocab_size=None,
                              embedding_size=OPTION.EMEBEDDING_DIMENSION,
                              Word2vec=True,
                              Trainable=False)

        # inference model.
        logits, _ = han.inference(input_x, features_before, eval_data=True)

        # Calculate loss.
        loss = myTF.calculate_cross_entropy_loss(logits, input_y)
        logits = tf.nn.softmax(logits)

        # Restore the moving average version of the learned variables for eval. # ?????????????????????????
        variable_averages = tf.train.ExponentialMovingAverage(
            OPTION.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(OPTION.EVAL_DIR, g)

        last_eval_ckpt = ckpt_value
        best_eval_value = eval_value

        config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 程序最多只能占用指定gpu50%的显存
        config.gpu_options.allow_growth = True  # 程序按需申请内存

        while True:
            # Start running operations on the Graph. allow_soft_placement must be set to
            # True to build towers on GPU, as some of the ops do not have GPU implementations.
            with tf.Session(config=config) as sess:
                ckpt = tf.train.get_checkpoint_state(OPTION.CHECKPOINT_DIR)
                if ckpt and ckpt.model_checkpoint_path:
                    # extract global_step
                    global_step_for_restore = int(
                        ckpt.model_checkpoint_path.split('/')[-1].split('-')
                        [-1])
                    if global_step_for_restore > last_eval_ckpt:
                        # Restores from checkpoint
                        saver.restore(sess, ckpt.model_checkpoint_path)
                    else:
                        if tf.gfile.Exists("TRAIN_SUCCEED"):
                            print("Train terminated, eval terminating...")
                            return
                else:
                    print('No checkpoint file found')
                    time.sleep(FLAGS.eval_interval_secs)
                    continue

                if global_step_for_restore > last_eval_ckpt:
                    max_steps_per_epoch = int(
                        math.ceil(evalDataSet.get_dataset_size() /
                                  float(OPTION.EVAL_BATCH_SIZE)))
                    start_time = time.time()
                    total_predicted_value = []
                    total_true_value = []
                    total_loss = []
                    for step in range(max_steps_per_epoch):
                        test_data, test_label, test_features = evalDataSet.next_batch(
                            OPTION.EVAL_BATCH_SIZE)
                        if feature_size > 0:
                            feed_dict = {
                                input_x: test_data,
                                input_y: test_label,
                                features_before: test_features
                            }
                        else:
                            feed_dict = {
                                input_x: test_data,
                                input_y: test_label
                            }
                        predicted_value, true_value, loss_value = sess.run(
                            [logits, input_y, loss], feed_dict=feed_dict)
                        total_predicted_value.append(predicted_value)
                        total_true_value.append(true_value)
                        total_loss.append(loss_value)
                    duration = time.time() - start_time

                    # test_data, test_label = evalDataSet.next_batch(OPTION.EVAL_BATCH_SIZE)
                    summary = tf.Summary()
                    # summary.ParseFromString(sess.run(summary_op, feed_dict={input_x: test_data, input_y: test_label}))

                    total_predicted_value = np.concatenate(
                        total_predicted_value, axis=0)
                    total_true_value = np.concatenate(total_true_value, axis=0)
                    total_loss = np.concatenate(total_loss, axis=0)

                    total_predicted_value = total_predicted_value[
                        0:evalDataSet.get_dataset_size()]
                    total_true_value = total_true_value[0:evalDataSet.
                                                        get_dataset_size()]
                    total_loss = total_loss[0:evalDataSet.get_dataset_size()]

                    assert evalDataSet.get_dataset_size(
                    ) == total_predicted_value.shape[0], 'sample_count error!'

                    best_eval_value = evaluation_result(
                        OPTION.EVAL_DIR, total_predicted_value,
                        total_true_value, total_loss, global_step_for_restore,
                        best_eval_value, summary)
                    summary_writer.add_summary(summary,
                                               global_step_for_restore)

                    last_eval_ckpt = global_step_for_restore

            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
def train(newTrain, checkpoint, trainDataSet):
    with tf.Graph().as_default(), tf.device('/gpu:0'):
        global_step = tf.Variable(0, name="global_step", trainable=False)

        # Placeholders for input, output and dropout
        input_x = tf.placeholder(tf.int32,
                                 [None, OPTION.SEQUENCE_LEN, OPTION.SENT_LEN],
                                 name="input_x")
        input_y = tf.placeholder(tf.int32, [None, OPTION.NUM_CLASSES],
                                 name="input_y")

        han = HAN_model.Model(sequence_length=OPTION.SEQUENCE_LEN,
                              sent_length=OPTION.SENT_LEN,
                              num_classes=OPTION.NUM_CLASSES,
                              vocab_size=None,
                              embedding_size=OPTION.EMEBEDDING_DIMENSION,
                              Word2vec=True,
                              Trainable=False)

        # inference model.
        logits, _ = han.inference(input_x)

        # Calculate loss.
        loss = HAN_model.calculate_loss(logits, input_y)

        # Calculate accuracy
        accuracy = myTF.calculate_accuracy(logits, input_y)

        # updates the model parameters.
        train_op = myTF.train(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 程序最多只能占用指定gpu50%的显存
        config.gpu_options.allow_growth = OPTION.MEMORY_ALLOW_GROWTH  # 程序按需申请内存
        sess = tf.Session(config=config)

        first_step = 0
        if not newTrain:
            print('restoring...')
            if checkpoint == '0':  # choose the latest one
                ckpt = tf.train.get_checkpoint_state(OPTION.TRAIN_DIR)
                if ckpt and ckpt.model_checkpoint_path:
                    # new_saver = tf.train.import_meta_graph(ckpt.model_checkpoint_path+'.meta')
                    # Restores from checkpoint
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    global_step_for_restore = ckpt.model_checkpoint_path.split(
                        '/')[-1].split('-')[-1]
                    first_step = int(global_step_for_restore) + 1
                else:
                    print('No checkpoint file found')
                    return
            else:  #
                if os.path.exists(
                        os.path.join(OPTION.TRAIN_DIR,
                                     'model.ckpt-' + checkpoint + '.index')):
                    # new_saver = tf.train.import_meta_graph(
                    #     os.path.join(OPTION.TRAIN_DIR, 'model.ckpt-' + checkpoint + '.meta'))
                    saver.restore(
                        sess,
                        os.path.join(OPTION.TRAIN_DIR,
                                     'model.ckpt-' + checkpoint))
                    first_step = int(checkpoint) + 1
                else:
                    print('No checkpoint file found')
                    return
        else:
            sess.run(init)
            if os.path.exists(
                    os.path.join(OPTION.PRE_TRAIN_MODEL,
                                 'model.ckpt-pretrain.index')):
                # saver_load = tf.train.Saver(var_list=tf.get_collection('pretrained_variables'))
                saver_load = tf.train.Saver(var_list=tf.trainable_variables())
                print('load pretrained variables...')
                saver_load.restore(
                    sess,
                    os.path.join(OPTION.PRE_TRAIN_MODEL,
                                 'model.ckpt-pretrain'))

        summary_writer = tf.summary.FileWriter(OPTION.TRAIN_DIR, sess.graph)

        filename_train_log = os.path.join(OPTION.TRAIN_DIR, 'log_train')
        if os.path.exists(filename_train_log):
            file_train_log = open(filename_train_log, 'a')
        else:
            file_train_log = open(filename_train_log, 'w')

        max_steps_per_epoch = int(
            math.ceil(trainDataSet.get_dataset_size() /
                      float(OPTION.BATCH_SIZE)))
        max_steps = max_steps_per_epoch * OPTION.NUM_EPOCHS

        # ckpt_period = max_steps_per_epoch // OPTION.MIN_CKPTS
        # if ckpt_period > OPTION.MAX_CKPT_PERIOD:
        #     ckpt_period = OPTION.MAX_CKPT_PERIOD
        ckpt_period = OPTION.MAX_CKPT_PERIOD
        for step in range(first_step, max_steps):
            train_data, train_label = trainDataSet.next_batch(
                OPTION.BATCH_SIZE)
            start_time = time.time()
            _, loss_value, accuracy_value, current_global_step = sess.run(
                [train_op, loss, accuracy, global_step],
                feed_dict={
                    input_x: train_data,
                    input_y: train_label
                })
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
            assert step + 1 == current_global_step, 'step:%d, current_global_step:%d' % (
                step, current_global_step)

            current_epoch = int(
                current_global_step / float(max_steps_per_epoch)) + 1
            current_step = current_global_step % max_steps_per_epoch

            if current_global_step % 10 == 0:
                sec_per_batch = float(duration)
                format_str = '%s: step=%d(%d/%d), loss=%.4f, acc=%.4f; %.3f sec/batch)' % (
                    datetime.now(), current_global_step, current_step,
                    current_epoch, loss_value, accuracy_value, sec_per_batch)
                print(format_str, file=file_train_log)
                print(format_str)

            if current_global_step % OPTION.SUMMARY_PERIOD == 0:
                summary_str = sess.run(summary_op,
                                       feed_dict={
                                           input_x: train_data,
                                           input_y: train_label
                                       })
                summary_writer.add_summary(summary_str, current_global_step)

            # Save the model checkpoint periodically. (named 'model.ckpt-global_step.meta')
            if current_global_step % ckpt_period == 0 or (current_global_step +
                                                          1) == max_steps:
                checkpoint_path = os.path.join(OPTION.TRAIN_DIR, 'model.ckpt')
                saver.save(sess,
                           checkpoint_path,
                           global_step=current_global_step)
        file_train_log.close()
def evaluate(trainDataSet,time,model_time):

    with tf.Graph().as_default() as g, tf.device('/cpu:0'):

        # Placeholders for input, output and dropout
        input_x = tf.placeholder(tf.int32, [None, OPTION.SEQUENCE_LEN, OPTION.SENT_LEN], name="input_x")
        input_y = tf.placeholder(tf.int32, [None, OPTION.NUM_CLASSES], name="input_y")

        han = MODEL.Model(sequence_length = OPTION.SEQUENCE_LEN, sent_length = OPTION.SENT_LEN,
                              num_classes=OPTION.NUM_CLASSES,
                              vocab_size=None,
                              embedding_size=OPTION.EMEBEDDING_DIMENSION,
                              Word2vec=True, Trainable=False)

        # inference model.
        logits, _ = han.inference(input_x, eval_data=True)

        # Calculate loss.
        loss = myTF.calculate_cross_entropy_loss(logits, input_y)

        # get model paramaters
        # paramaters_list_reshape = han.get_paramaters_list_reshape()

        # Restore the moving average version of the learned variables for eval. # ?????????????????????????
        variable_averages = tf.train.ExponentialMovingAverage(OPTION.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(OPTION.EVAL_DIR, g)


        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        with tf.Session(config=tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)) as sess:

            if os.path.exists(os.path.join(OPTION.EVAL_DIR, 'model.ckpt-best.index')):
                # new_saver = tf.train.import_meta_graph(
                #     os.path.join(OPTION.TRAIN_DIR, 'model.ckpt-' + checkpoint + '.meta'))
                saver.restore(sess,
                              os.path.join(OPTION.EVAL_DIR, 'model.ckpt-best'))
            else:
                print('No checkpoint file found')
                return

            max_steps_per_epoch = int(math.ceil( trainDataSet.get_dataset_size() / float(OPTION.EVAL_BATCH_SIZE)))
            total_predicted_value = []
            for step in range(max_steps_per_epoch):
                train_data, train_label = trainDataSet.next_batch(OPTION.EVAL_BATCH_SIZE)
                predicted_value = sess.run(loss,
                                           feed_dict={input_x: train_data, input_y: train_label})
                total_predicted_value.append(predicted_value)

            # test_data, test_label = evalDataSet.next_batch(OPTION.EVAL_BATCH_SIZE)
            summary = tf.Summary()
            # summary.ParseFromString(sess.run(summary_op, feed_dict={input_x: test_data, input_y: test_label}))

            total_predicted_value = np.concatenate(total_predicted_value,axis=0)

            assert trainDataSet.get_dataset_size() == total_predicted_value.shape[0], 'sample_count error!'

            detail_filename = os.path.join(OPTION.MODELPARA_DIR, 'loss_%d_%d' %(time,model_time))
            if os.path.exists(detail_filename):
                os.remove(detail_filename)
            np.savetxt(detail_filename, total_predicted_value, fmt='%f')
Ejemplo n.º 5
0
def evaluate(evalDataSet, time, model_time, name='train'):
    with tf.Graph().as_default() as g, tf.device('/gpu:0'):

        feature_size = OPTION.SENT_HIDDEN_SIZE * 2
        # Placeholders for input, output and dropout
        input_x = tf.placeholder(tf.int32,
                                 [None, OPTION.SEQUENCE_LEN, OPTION.SENT_LEN],
                                 name="input_x")

        feature_size = feature_size * min(model_time - 0, OPTION.DP_DEPTH)
        if feature_size > 0:
            features_before = tf.placeholder(tf.float32, [None, feature_size],
                                             name="features_before")
        else:
            features_before = None

        han = HAN_model.Model(sequence_length=OPTION.SEQUENCE_LEN,
                              sent_length=OPTION.SENT_LEN,
                              num_classes=OPTION.NUM_CLASSES,
                              vocab_size=None,
                              embedding_size=OPTION.EMEBEDDING_DIMENSION,
                              Word2vec=True,
                              Trainable=False)

        # inference model.
        _, features = han.inference(input_x, features_before, eval_data=True)

        # get model paramaters
        # paramaters_list_reshape = han.get_paramaters_list_reshape()

        # Restore the moving average version of the learned variables for eval. # ?????????????????????????
        variable_averages = tf.train.ExponentialMovingAverage(
            OPTION.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(OPTION.EVAL_DIR, g)

        config = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        # config.gpu_options.per_process_gpu_memory_fraction = 0.5  # 程序最多只能占用指定gpu50%的显存
        config.gpu_options.allow_growth = OPTION.MEMORY_ALLOW_GROWTH  # 程序按需申请内存

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU implementations.
        with tf.Session(config=config) as sess:

            if os.path.exists(
                    os.path.join(OPTION.EVAL_DIR, 'model.ckpt-best.index')):
                # new_saver = tf.train.import_meta_graph(
                #     os.path.join(OPTION.TRAIN_DIR, 'model.ckpt-' + checkpoint + '.meta'))
                saver.restore(sess,
                              os.path.join(OPTION.EVAL_DIR, 'model.ckpt-best'))
            else:
                print('No checkpoint file found')
                return

            max_steps_per_epoch = int(
                math.ceil(evalDataSet.get_dataset_size() /
                          float(OPTION.EVAL_BATCH_SIZE)))
            total_predicted_value = []
            for step in range(max_steps_per_epoch):
                test_data, test_features = evalDataSet.next_batch(
                    OPTION.EVAL_BATCH_SIZE)
                if feature_size > 0:
                    feed_dict = {
                        input_x: test_data,
                        features_before: test_features
                    }
                else:
                    feed_dict = {input_x: test_data}
                predicted_value = sess.run(features, feed_dict=feed_dict)
                total_predicted_value.append(predicted_value)

            # test_data, test_label = evalDataSet.next_batch(OPTION.EVAL_BATCH_SIZE)
            summary = tf.Summary()
            # summary.ParseFromString(sess.run(summary_op, feed_dict={input_x: test_data, input_y: test_label}))

            total_predicted_value = np.concatenate(total_predicted_value,
                                                   axis=0)

            total_predicted_value = total_predicted_value[0:evalDataSet.
                                                          get_dataset_size()]

            assert evalDataSet.get_dataset_size(
            ) == total_predicted_value.shape[0], 'sample_count error!'

            detail_filename = os.path.join(
                OPTION.MODELPARA_DIR,
                'features_%s_%d_%d' % (name, time, model_time))
            if os.path.exists(detail_filename):
                os.remove(detail_filename)
            np.savetxt(detail_filename, total_predicted_value, fmt='%.4f')