Esempio n. 1
0
def train_and_eval(model):
    for n in range(FLAGS.train_epochs):
        tf.compat.v1.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) +
                                  '=' * 30 + '\n')
        train_data_list = list_files(FLAGS.train_data)  # dir to file list
        for f in train_data_list:
            t0 = time.time()
            tf.compat.v1.logging.info('<EPOCH {}>: Start training {}'.format(
                n + 1, f))
            model.train(input_fn=lambda: input_fn(f, FLAGS.image_train_data,
                                                  'train', FLAGS.batch_size),
                        hooks=None,
                        steps=None,
                        max_steps=None,
                        saving_listeners=None)
            tf.compat.v1.logging.info(
                '<EPOCH {}>: Finish training {}, take {} mins'.format(
                    n + 1, f, elapse_time(t0)))
            print('-' * 80)
            tf.compat.v1.logging.info('<EPOCH {}>: Start evaluating {}'.format(
                n + 1, FLAGS.eval_data))
            t0 = time.time()
            results = model.evaluate(
                input_fn=lambda:
                input_fn(FLAGS.eval_data, FLAGS.image_eval_data, 'eval', FLAGS.
                         batch_size),
                steps=None,  # Number of steps for which to evaluate model.
                hooks=None,
                checkpoint_path=None,  # latest checkpoint in model_dir is used.
                name=None)
            tf.compat.v1.logging.info(
                '<EPOCH {}>: Finish evaluation {}, take {} mins'.format(
                    n + 1, FLAGS.eval_data, elapse_time(t0)))
            print('-' * 80)
            # Display evaluation metrics
            for key in sorted(results):
                print('{}: {}'.format(key, results[key]))
        # every epochs_per_eval test the model (use larger test dataset)
        if (n + 1) % FLAGS.epochs_per_eval == 0:
            tf.compat.v1.logging.info('<EPOCH {}>: Start testing {}'.format(
                n + 1, FLAGS.test_data))
            results = model.evaluate(
                input_fn=lambda:
                input_fn(FLAGS.test_data, FLAGS.image_test_data, 'pred', FLAGS.
                         batch_size),
                steps=None,  # Number of steps for which to evaluate model.
                hooks=None,
                checkpoint_path=
                None,  # If None, the latest checkpoint in model_dir is used.
                name=None)
            tf.compat.v1.logging.info(
                '<EPOCH {}>: Finish testing {}, take {} mins'.format(
                    n + 1, FLAGS.test_data, elapse_time(t0)))
            print('-' * 80)
            # Display evaluation metrics
            for key in sorted(results):
                print('{}: {}'.format(key, results[key]))
Esempio n. 2
0
def dynamic_train(model):
    """Dynamic train mode.
    For example:
        train_data_files: [0301, 0302, 0303, ...]
        train mode:
            first take 0301 as train data, 0302 as test data;
            then keep training take 0302 as train data, 0303 as test data ...
    """
    data_files = list_files(FLAGS.train_data)
    data_files.sort()
    assert len(data_files) > 1, 'Dynamic train mode need more than 1 data file'

    for i in range(len(data_files) - 1):
        train_data = data_files[i]
        test_data = data_files[i + 1]
        tf.compat.v1.logging.info(
            '=' * 30 + ' START TRAINING DATA: {} '.format(train_data) +
            '=' * 30 + '\n')
        for n in range(FLAGS.train_epochs):
            t0 = time.time()
            tf.compat.v1.logging.info(
                'START TRAIN DATA <{}> <EPOCH {}>'.format(train_data, n + 1))
            model.train(input_fn=lambda: input_fn(
                train_data, FLAGS.image_train_data, 'train', FLAGS.batch_size),
                        hooks=None,
                        steps=None,
                        max_steps=None,
                        saving_listeners=None)
            tf.compat.v1.logging.info(
                'FINISH TRAIN DATA <{}> <EPOCH {}> take {} mins'.format(
                    train_data, n + 1, elapse_time(t0)))
            print('-' * 80)
            tf.compat.v1.logging.info(
                'START EVALUATE TEST DATA <{}> <EPOCH {}>'.format(
                    test_data, n + 1))
            t0 = time.time()
            results = model.evaluate(
                input_fn=lambda: input_fn(test_data, FLAGS.image_eval_data,
                                          'eval', FLAGS.batch_size),
                steps=None,  # Number of steps for which to evaluate model.
                hooks=None,
                checkpoint_path=None,  # latest checkpoint in model_dir is used.
                name=None)
            tf.compat.v1.logging.info(
                'FINISH EVALUATE TEST DATA <{}> <EPOCH {}>: take {} mins'.
                format(test_data, n + 1, elapse_time(t0)))
            print('-' * 80)
            # Display evaluation metrics
            for key in sorted(results):
                print('{}: {}'.format(key, results[key]))
Esempio n. 3
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    # if FLAGS.is_distribution:
    #     print("Using distribution tensoflow. Job_name:{} Task_index:{}"
    #           .format(CONFIG.distribution["job_name"], CONFIG.distribution["task_index"]))
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))
    checkpoint_path = FLAGS.checkpoint_path or model.latest_checkpoint()
    if checkpoint_path is None:
        raise ValueError(
            'No model checkpoint found, please check the model dir.')
    tf.logging.info('Using model checkpoint: {}'.format(checkpoint_path))

    print('\n')
    tf.logging.info('=' * 30 + ' START TESTING' + '=' * 30)
    s_time = time.time()
    results = model.evaluate(
        input_fn=lambda: input_fn(FLAGS.test_data, FLAGS.image_test_data,
                                  'eval', FLAGS.batch_size),
        steps=None,  # Number of steps for which to evaluate model.
        hooks=None,
        checkpoint_path=FLAGS.
        checkpoint_path,  # If None, the latest checkpoint is used.
        name=None)
    tf.logging.info('=' * 30 +
                    'FINISH TESTING, TAKE {}'.format(elapse_time(s_time)) +
                    '=' * 30)
    # Display evaluation metrics
    print('-' * 80)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
Esempio n. 4
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    if FLAGS.data_dir is None:
        raise ValueError("Must specify prediction data_file by --data_dir")
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))

    tf.logging.info('=' * 30 + 'START PREDICTION' + '=' * 30)
    t0 = time.time()
    predictions = model.predict(input_fn=lambda: input_fn(
        FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=FLAGS.checkpoint_path
                                )  # defaults None to use latest_checkpoint
    tf.logging.info('=' * 30 +
                    'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0)) +
                    '=' * 30)

    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print('\nPrediction is "{}" ({:.1f}%)'.format(class_id,
                                                      100 * probability))
Esempio n. 5
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    # if FLAGS.is_distribution:
    #     print("Using distribution tensoflow. Job_name:{} Task_index:{}"
    #           .format(CONFIG.distribution["job_name"], CONFIG.distribution["task_index"]))
    # model info
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))

    checkpoint_path = FLAGS.checkpoint_path or model.latest_checkpoint()
    if checkpoint_path is None:
        raise ValueError('No model checkpoint found, please check the model dir.')
    tf.logging.info('Using model checkpoint: {}'.format(checkpoint_path))

    print('-' * 80)
    tf.logging.info('='*30+' START PREDICTION'+'='*30)
    t0 = time.time()
    predictions = model.predict(input_fn=lambda: input_fn(FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=checkpoint_path)  # defaults None to use latest_checkpoint
    tf.logging.info('='*30+'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0))+'='*30)

    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print('\nPrediction is "{}" ({:.1f}%)'.format(class_id, 100 * probability))
Esempio n. 6
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    # assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    if FLAGS.data_dir is None:
        raise ValueError("Must specify prediction data_file by --data_dir")
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    # model = build_estimator(model_dir, FLAGS.model_type)
    model = build_custom_estimator(model_dir, FLAGS.model_type)
    tf.compat.v1.logging.info('Build estimator: {}'.format(model))

    # weights and other parameters (e.g. Adagrad) of the model
    name_ls = model.get_variable_names()
    print_shape = True
    total_linear_weights = 0
    for name in name_ls:
        if print_shape:
            shape = model.get_variable_value(name).shape
            print(name, "\t", shape)
            if name[:6] == "linear" and \
                    (name[-7:] == "weights"or name[-4:] == "bias"):
                total_linear_weights += np.prod(shape)
        else:
            print(name)
    if print_shape:
        print("Total parameters in linear model: {}".format(
            total_linear_weights))

    # embedding layer look up
    sample_embedding = model.get_variable_value(
        'dnn/input_from_feature_columns/input_layer/ad_cates_embedding/embedding_weights'
    )
    ids = [10, 20, 30]
    with tf.compat.v1.Session() as sess:
        lookup = tf.nn.embedding_lookup(params=sample_embedding,
                                        ids=ids).eval()
        print(lookup)

    # predictions
    tf.compat.v1.logging.info('=' * 30 + 'START PREDICTION' + '=' * 30)
    t0 = time.time()

    predictions = model.predict(input_fn=lambda: input_fn(
        FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=FLAGS.checkpoint_path
                                )  # defaults None to use latest_checkpoint

    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print('\nPrediction is "{}" ({:.1f}%)'.format(class_id,
                                                      100 * probability))

    tf.compat.v1.logging.info(
        '=' * 30 + 'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0)) +
        '=' * 30)
Esempio n. 7
0
def train(model):
    for n in range(FLAGS.train_epochs):
        tf.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) +
                        '=' * 30 + '\n')
        train_data_list = list_files(FLAGS.train_data)  # dir to file list
        for f in train_data_list:
            t0 = time.time()
            tf.logging.info('<EPOCH {}>: Start training {}'.format(n + 1, f))
            model.train(input_fn=lambda: input_fn(f, FLAGS.image_train_data,
                                                  'train', FLAGS.batch_size),
                        hooks=None,
                        steps=None,
                        max_steps=None,
                        saving_listeners=None)
            tf.logging.info(
                '<EPOCH {}>: Finish training {}, take {} mins'.format(
                    n + 1, f, elapse_time(t0)))