Exemple #1
0
    def build_and_test_estimator(self, model_type):
        """Ensure that model trains and minimizes loss."""
        model = build_estimator(self.temp_dir, model_type)

        # Train for 1 step to initialize model and evaluate initial loss
        model.train(
            input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=1),
            steps=1)
        initial_results = model.evaluate(
            input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=1))

        # Train for 100 epochs at batch size 3 and evaluate final loss
        model.train(
            input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=8))
        final_results = model.evaluate(
            input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=1))

        logging.info('\n%s initial results: %s', model_type, initial_results)
        logging.info('\n%s final results: %s', model_type, final_results)

        # Ensure loss has decreased, while accuracy and both AUCs have increased.
        self.assertLess(final_results['loss'], initial_results['loss'])
        self.assertGreaterEqual(final_results['auc'], initial_results['auc'])
        self.assertGreaterEqual(final_results['auc_precision_recall'],
                                initial_results['auc_precision_recall'])
        self.assertGreaterEqual(final_results['accuracy'],
                                initial_results['accuracy'])
Exemple #2
0
def train_and_eval_api(model):
    train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn(
        FLAGS.train_data, FLAGS.image_train_data, FLAGS.batch_size),
                                        max_steps=10000)
    eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn(
        FLAGS.eval_data, FLAGS.image_eval_data, FLAGS.batch_size))
    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
Exemple #3
0
def train_and_eval(model):
    for n in range(FLAGS.train_epochs):
        tf.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) +
                        '=' * 30 + '\n')
        train_data = FLAGS.train_data  # dir to file list
        #         for f in train_data_list:
        t0 = time.time()
        tf.logging.info('<EPOCH {}>: Start training'.format(n + 1))
        model.train(
            input_fn=lambda: input_fn(train_data, 'train', FLAGS.batch_size),
            hooks=None,
            steps=None,
            max_steps=None,
            saving_listeners=None)
        tf.logging.info('<EPOCH {}>: Finish training {}, take {} mins'.format(
            n + 1, train_data, elapse_time(t0)))
        print('-' * 80)
        tf.logging.info('<EPOCH {}>: Start evaluating {}'.format(
            n + 1, FLAGS.eval_data))
        t0 = time.time()
        results = model.evaluate(
            input_fn=lambda: input_fn(FLAGS.eval_data, 'eval', FLAGS.batch_size
                                      ),
            steps=None,  # Number of steps for which to evaluate model.
            hooks=None,
            checkpoint_path=None,  # latest checkpoint in model_dir is used.
            name=None)
        tf.logging.info(
            '<EPOCH {}>: Finish evaluation {}, take {} mins'.format(
                n + 1, FLAGS.eval_data, elapse_time(t0)))
        print('-' * 80)
        # Display evaluation metrics
        for key in sorted(results):
            print('{}: {}'.format(key, results[key]))
Exemple #4
0
def train_and_eval(model):
    for n in range(FLAGS.train_epochs):
        tf.compat.v1.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) +
                                  '=' * 30 + '\n')
        train_data_list = list_files(FLAGS.train_data)  # dir to file list
        for f in train_data_list:
            t0 = time.time()
            tf.compat.v1.logging.info('<EPOCH {}>: Start training {}'.format(
                n + 1, f))
            model.train(input_fn=lambda: input_fn(f, FLAGS.image_train_data,
                                                  'train', FLAGS.batch_size),
                        hooks=None,
                        steps=None,
                        max_steps=None,
                        saving_listeners=None)
            tf.compat.v1.logging.info(
                '<EPOCH {}>: Finish training {}, take {} mins'.format(
                    n + 1, f, elapse_time(t0)))
            print('-' * 80)
            tf.compat.v1.logging.info('<EPOCH {}>: Start evaluating {}'.format(
                n + 1, FLAGS.eval_data))
            t0 = time.time()
            results = model.evaluate(
                input_fn=lambda:
                input_fn(FLAGS.eval_data, FLAGS.image_eval_data, 'eval', FLAGS.
                         batch_size),
                steps=None,  # Number of steps for which to evaluate model.
                hooks=None,
                checkpoint_path=None,  # latest checkpoint in model_dir is used.
                name=None)
            tf.compat.v1.logging.info(
                '<EPOCH {}>: Finish evaluation {}, take {} mins'.format(
                    n + 1, FLAGS.eval_data, elapse_time(t0)))
            print('-' * 80)
            # Display evaluation metrics
            for key in sorted(results):
                print('{}: {}'.format(key, results[key]))
        # every epochs_per_eval test the model (use larger test dataset)
        if (n + 1) % FLAGS.epochs_per_eval == 0:
            tf.compat.v1.logging.info('<EPOCH {}>: Start testing {}'.format(
                n + 1, FLAGS.test_data))
            results = model.evaluate(
                input_fn=lambda:
                input_fn(FLAGS.test_data, FLAGS.image_test_data, 'pred', FLAGS.
                         batch_size),
                steps=None,  # Number of steps for which to evaluate model.
                hooks=None,
                checkpoint_path=
                None,  # If None, the latest checkpoint in model_dir is used.
                name=None)
            tf.compat.v1.logging.info(
                '<EPOCH {}>: Finish testing {}, take {} mins'.format(
                    n + 1, FLAGS.test_data, elapse_time(t0)))
            print('-' * 80)
            # Display evaluation metrics
            for key in sorted(results):
                print('{}: {}'.format(key, results[key]))
Exemple #5
0
    def __init__(self, mode='wide and deep'):
        self.mode = mode
        self.x_train_y_train = input_fn(
            csv_data_file="../data/train", img_data_file=None, mode="train", batch_size=1)
        self.x_test_y_test = input_fn(
            csv_data_file="../data/eval", img_data_file=None, mode="eval", batch_size=1)
        self.categ_inputs = None
        self.conti_input = None
        self.deep_component_outlayer = None
        self.logistic_input = None
        self.model = None

        (self.wide_columns, self.wide_dim), (self.deep_columns, self.deep_dim) = _build_model_columns()
Exemple #6
0
def dynamic_train(model):
    """Dynamic train mode.
    For example:
        train_data_files: [0301, 0302, 0303, ...]
        train mode:
            first take 0301 as train data, 0302 as test data;
            then keep training take 0302 as train data, 0303 as test data ...
    """
    data_files = list_files(FLAGS.train_data)
    data_files.sort()
    assert len(data_files) > 1, 'Dynamic train mode need more than 1 data file'

    for i in range(len(data_files) - 1):
        train_data = data_files[i]
        test_data = data_files[i + 1]
        tf.compat.v1.logging.info(
            '=' * 30 + ' START TRAINING DATA: {} '.format(train_data) +
            '=' * 30 + '\n')
        for n in range(FLAGS.train_epochs):
            t0 = time.time()
            tf.compat.v1.logging.info(
                'START TRAIN DATA <{}> <EPOCH {}>'.format(train_data, n + 1))
            model.train(input_fn=lambda: input_fn(
                train_data, FLAGS.image_train_data, 'train', FLAGS.batch_size),
                        hooks=None,
                        steps=None,
                        max_steps=None,
                        saving_listeners=None)
            tf.compat.v1.logging.info(
                'FINISH TRAIN DATA <{}> <EPOCH {}> take {} mins'.format(
                    train_data, n + 1, elapse_time(t0)))
            print('-' * 80)
            tf.compat.v1.logging.info(
                'START EVALUATE TEST DATA <{}> <EPOCH {}>'.format(
                    test_data, n + 1))
            t0 = time.time()
            results = model.evaluate(
                input_fn=lambda: input_fn(test_data, FLAGS.image_eval_data,
                                          'eval', FLAGS.batch_size),
                steps=None,  # Number of steps for which to evaluate model.
                hooks=None,
                checkpoint_path=None,  # latest checkpoint in model_dir is used.
                name=None)
            tf.compat.v1.logging.info(
                'FINISH EVALUATE TEST DATA <{}> <EPOCH {}>: take {} mins'.
                format(test_data, n + 1, elapse_time(t0)))
            print('-' * 80)
            # Display evaluation metrics
            for key in sorted(results):
                print('{}: {}'.format(key, results[key]))
Exemple #7
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    # if FLAGS.is_distribution:
    #     print("Using distribution tensoflow. Job_name:{} Task_index:{}"
    #           .format(CONFIG.distribution["job_name"], CONFIG.distribution["task_index"]))
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))
    checkpoint_path = FLAGS.checkpoint_path or model.latest_checkpoint()
    if checkpoint_path is None:
        raise ValueError(
            'No model checkpoint found, please check the model dir.')
    tf.logging.info('Using model checkpoint: {}'.format(checkpoint_path))

    print('\n')
    tf.logging.info('=' * 30 + ' START TESTING' + '=' * 30)
    s_time = time.time()
    results = model.evaluate(
        input_fn=lambda: input_fn(FLAGS.test_data, FLAGS.image_test_data,
                                  'eval', FLAGS.batch_size),
        steps=None,  # Number of steps for which to evaluate model.
        hooks=None,
        checkpoint_path=FLAGS.
        checkpoint_path,  # If None, the latest checkpoint is used.
        name=None)
    tf.logging.info('=' * 30 +
                    'FINISH TESTING, TAKE {}'.format(elapse_time(s_time)) +
                    '=' * 30)
    # Display evaluation metrics
    print('-' * 80)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
Exemple #8
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    if FLAGS.data_dir is None:
        raise ValueError("Must specify prediction data_file by --data_dir")
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_custom_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))

    tf.logging.info('='*30+'START PREDICTION'+'='*30)
    t0 = time.time()
    predictions = model.predict(input_fn=lambda: input_fn(FLAGS.data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=FLAGS.checkpoint_path)  # defaults None to use latest_checkpoint
    tf.logging.info('='*30+'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0))+'='*30)
    
    class_list = []
    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
#         class_id = pred_dict['class_ids'][0]
#         probability = pred_dict['probabilities'][class_id]
#         class_list.append([class_id,probability])
#     pd.DataFrame(class_list,columns=['class_ids','probabilities']).to_csv('final_prob_train.csv')

        prob_list = pred_dict['probabilities']
        max_prob_list = [(i,x) for i,x in enumerate(prob_list)] #if x>sorted(prob_list)[-4]
        class_list.append(str(max_prob_list))
    pd.DataFrame(class_list,columns=['prob_dict']).to_csv('final_prob.csv')
Exemple #9
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    if FLAGS.data_dir is None:
        raise ValueError("Must specify prediction data_file by --data_dir")
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))

    tf.logging.info('=' * 30 + 'START PREDICTION' + '=' * 30)
    t0 = time.time()
    predictions = model.predict(input_fn=lambda: input_fn(
        FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=FLAGS.checkpoint_path
                                )  # defaults None to use latest_checkpoint
    tf.logging.info('=' * 30 +
                    'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0)) +
                    '=' * 30)

    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print('\nPrediction is "{}" ({:.1f}%)'.format(class_id,
                                                      100 * probability))
Exemple #10
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    # if FLAGS.is_distribution:
    #     print("Using distribution tensoflow. Job_name:{} Task_index:{}"
    #           .format(CONFIG.distribution["job_name"], CONFIG.distribution["task_index"]))
    # model info
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    model = build_estimator(model_dir, FLAGS.model_type)
    tf.logging.info('Build estimator: {}'.format(model))

    checkpoint_path = FLAGS.checkpoint_path or model.latest_checkpoint()
    if checkpoint_path is None:
        raise ValueError('No model checkpoint found, please check the model dir.')
    tf.logging.info('Using model checkpoint: {}'.format(checkpoint_path))

    print('-' * 80)
    tf.logging.info('='*30+' START PREDICTION'+'='*30)
    t0 = time.time()
    predictions = model.predict(input_fn=lambda: input_fn(FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=checkpoint_path)  # defaults None to use latest_checkpoint
    tf.logging.info('='*30+'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0))+'='*30)

    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print('\nPrediction is "{}" ({:.1f}%)'.format(class_id, 100 * probability))
Exemple #11
0
def main(unused_argv):
    print("Using TensorFlow version %s" % tf.__version__)
    # assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed"
    if FLAGS.data_dir is None:
        raise ValueError("Must specify prediction data_file by --data_dir")
    print('Model type: {}'.format(FLAGS.model_type))
    model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type)
    print('Model directory: {}'.format(model_dir))
    # model = build_estimator(model_dir, FLAGS.model_type)
    model = build_custom_estimator(model_dir, FLAGS.model_type)
    tf.compat.v1.logging.info('Build estimator: {}'.format(model))

    # weights and other parameters (e.g. Adagrad) of the model
    name_ls = model.get_variable_names()
    print_shape = True
    total_linear_weights = 0
    for name in name_ls:
        if print_shape:
            shape = model.get_variable_value(name).shape
            print(name, "\t", shape)
            if name[:6] == "linear" and \
                    (name[-7:] == "weights"or name[-4:] == "bias"):
                total_linear_weights += np.prod(shape)
        else:
            print(name)
    if print_shape:
        print("Total parameters in linear model: {}".format(
            total_linear_weights))

    # embedding layer look up
    sample_embedding = model.get_variable_value(
        'dnn/input_from_feature_columns/input_layer/ad_cates_embedding/embedding_weights'
    )
    ids = [10, 20, 30]
    with tf.compat.v1.Session() as sess:
        lookup = tf.nn.embedding_lookup(params=sample_embedding,
                                        ids=ids).eval()
        print(lookup)

    # predictions
    tf.compat.v1.logging.info('=' * 30 + 'START PREDICTION' + '=' * 30)
    t0 = time.time()

    predictions = model.predict(input_fn=lambda: input_fn(
        FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size),
                                predict_keys=None,
                                hooks=None,
                                checkpoint_path=FLAGS.checkpoint_path
                                )  # defaults None to use latest_checkpoint

    for pred_dict in predictions:  # dict{probabilities, classes, class_ids}
        class_id = pred_dict['class_ids'][0]
        probability = pred_dict['probabilities'][class_id]
        print('\nPrediction is "{}" ({:.1f}%)'.format(class_id,
                                                      100 * probability))

    tf.compat.v1.logging.info(
        '=' * 30 + 'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0)) +
        '=' * 30)
Exemple #12
0
def train(model):
    for n in range(FLAGS.train_epochs):
        tf.compat.v1.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) + '=' * 30 + '\n')
        train_data_list = list_files(FLAGS.train_data)  # dir to file list
        for f in train_data_list:
            t0 = time.time()
            tf.compat.v1.logging.info('<EPOCH {}>: Start training {}'.format(n + 1, f))
            model.train(
                input_fn=lambda: input_fn(f, FLAGS.image_train_data, 'train', FLAGS.batch_size),
                hooks=None,
                steps=None,
                max_steps=None,
                saving_listeners=None)
            tf.compat.v1.logging.info('<EPOCH {}>: Finish training {}, take {} mins'.format(n + 1, f, elapse_time(t0)))
Exemple #13
0
    def test_input_fn(self):
        features, labels = input_fn(self.input_csv, 'eval', batch_size=1)
        with tf.Session() as sess:
            features, labels = sess.run((features, labels))
        # Compare the two features dictionaries.
        for key in USED_FEATURE_KEY:
            self.assertTrue(key in features)
            self.assertEqual(len(features[key]), 1)

            feature_value = features[key][0]
            # Convert from bytes to string for Python 3.
            if isinstance(feature_value, bytes):
                feature_value = feature_value.decode()
            self.assertEqual(TEST_INPUT[key], feature_value)
        self.assertFalse(labels)
Exemple #14
0
    def test_input_fn(self):
        tf.compat.v1.enable_eager_execution()
        features, labels = input_fn(self.input_csv,
                                    None,
                                    mode='eval',
                                    batch_size=1)
        # Compare the two features dictionaries.
        for KEY in USED_FEATURE_KEY:
            self.assertTrue(KEY in features)
            self.assertEqual(len(features[KEY][0]), len(TEST_INPUT[KEY]))

            feature_values = features[KEY][0].numpy()
            print(KEY, TEST_INPUT[KEY], feature_values)
            # Convert from bytes to string for Python 3.
            for i in range(len(TEST_INPUT[KEY])):
                feature_value = feature_values[i]
                if isinstance(feature_value, bytes):
                    feature_value = feature_value.decode()
                if isinstance(feature_value, np.int32):
                    feature_value = str(feature_value)
                if isinstance(feature_value, np.float32):
                    TEST_INPUT[KEY][i] = np.float32(TEST_INPUT[KEY][i])
                self.assertEqual(TEST_INPUT[KEY][i], feature_value)
        self.assertFalse(labels)
Exemple #15
0
def train_and_eval_api(model):
    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda: input_fn(FLAGS.train_data, 'train', FLAGS.batch_size))
    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda: input_fn(FLAGS.eval_data, 'eval', FLAGS.batch_size))
    tf.estimator.train_and_evaluate(model, train_spec, eval_spec)