def build_and_test_estimator(self, model_type): """Ensure that model trains and minimizes loss.""" model = build_estimator(self.temp_dir, model_type) # Train for 1 step to initialize model and evaluate initial loss model.train( input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=1), steps=1) initial_results = model.evaluate( input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=1)) # Train for 100 epochs at batch size 3 and evaluate final loss model.train( input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=8)) final_results = model.evaluate( input_fn=lambda: input_fn(TEST_CSV, None, 'eval', batch_size=1)) logging.info('\n%s initial results: %s', model_type, initial_results) logging.info('\n%s final results: %s', model_type, final_results) # Ensure loss has decreased, while accuracy and both AUCs have increased. self.assertLess(final_results['loss'], initial_results['loss']) self.assertGreaterEqual(final_results['auc'], initial_results['auc']) self.assertGreaterEqual(final_results['auc_precision_recall'], initial_results['auc_precision_recall']) self.assertGreaterEqual(final_results['accuracy'], initial_results['accuracy'])
def train_and_eval_api(model): train_spec = tf.estimator.TrainSpec(input_fn=lambda: input_fn( FLAGS.train_data, FLAGS.image_train_data, FLAGS.batch_size), max_steps=10000) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: input_fn( FLAGS.eval_data, FLAGS.image_eval_data, FLAGS.batch_size)) tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
def train_and_eval(model): for n in range(FLAGS.train_epochs): tf.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) + '=' * 30 + '\n') train_data = FLAGS.train_data # dir to file list # for f in train_data_list: t0 = time.time() tf.logging.info('<EPOCH {}>: Start training'.format(n + 1)) model.train( input_fn=lambda: input_fn(train_data, 'train', FLAGS.batch_size), hooks=None, steps=None, max_steps=None, saving_listeners=None) tf.logging.info('<EPOCH {}>: Finish training {}, take {} mins'.format( n + 1, train_data, elapse_time(t0))) print('-' * 80) tf.logging.info('<EPOCH {}>: Start evaluating {}'.format( n + 1, FLAGS.eval_data)) t0 = time.time() results = model.evaluate( input_fn=lambda: input_fn(FLAGS.eval_data, 'eval', FLAGS.batch_size ), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path=None, # latest checkpoint in model_dir is used. name=None) tf.logging.info( '<EPOCH {}>: Finish evaluation {}, take {} mins'.format( n + 1, FLAGS.eval_data, elapse_time(t0))) print('-' * 80) # Display evaluation metrics for key in sorted(results): print('{}: {}'.format(key, results[key]))
def train_and_eval(model): for n in range(FLAGS.train_epochs): tf.compat.v1.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) + '=' * 30 + '\n') train_data_list = list_files(FLAGS.train_data) # dir to file list for f in train_data_list: t0 = time.time() tf.compat.v1.logging.info('<EPOCH {}>: Start training {}'.format( n + 1, f)) model.train(input_fn=lambda: input_fn(f, FLAGS.image_train_data, 'train', FLAGS.batch_size), hooks=None, steps=None, max_steps=None, saving_listeners=None) tf.compat.v1.logging.info( '<EPOCH {}>: Finish training {}, take {} mins'.format( n + 1, f, elapse_time(t0))) print('-' * 80) tf.compat.v1.logging.info('<EPOCH {}>: Start evaluating {}'.format( n + 1, FLAGS.eval_data)) t0 = time.time() results = model.evaluate( input_fn=lambda: input_fn(FLAGS.eval_data, FLAGS.image_eval_data, 'eval', FLAGS. batch_size), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path=None, # latest checkpoint in model_dir is used. name=None) tf.compat.v1.logging.info( '<EPOCH {}>: Finish evaluation {}, take {} mins'.format( n + 1, FLAGS.eval_data, elapse_time(t0))) print('-' * 80) # Display evaluation metrics for key in sorted(results): print('{}: {}'.format(key, results[key])) # every epochs_per_eval test the model (use larger test dataset) if (n + 1) % FLAGS.epochs_per_eval == 0: tf.compat.v1.logging.info('<EPOCH {}>: Start testing {}'.format( n + 1, FLAGS.test_data)) results = model.evaluate( input_fn=lambda: input_fn(FLAGS.test_data, FLAGS.image_test_data, 'pred', FLAGS. batch_size), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path= None, # If None, the latest checkpoint in model_dir is used. name=None) tf.compat.v1.logging.info( '<EPOCH {}>: Finish testing {}, take {} mins'.format( n + 1, FLAGS.test_data, elapse_time(t0))) print('-' * 80) # Display evaluation metrics for key in sorted(results): print('{}: {}'.format(key, results[key]))
def __init__(self, mode='wide and deep'): self.mode = mode self.x_train_y_train = input_fn( csv_data_file="../data/train", img_data_file=None, mode="train", batch_size=1) self.x_test_y_test = input_fn( csv_data_file="../data/eval", img_data_file=None, mode="eval", batch_size=1) self.categ_inputs = None self.conti_input = None self.deep_component_outlayer = None self.logistic_input = None self.model = None (self.wide_columns, self.wide_dim), (self.deep_columns, self.deep_dim) = _build_model_columns()
def dynamic_train(model): """Dynamic train mode. For example: train_data_files: [0301, 0302, 0303, ...] train mode: first take 0301 as train data, 0302 as test data; then keep training take 0302 as train data, 0303 as test data ... """ data_files = list_files(FLAGS.train_data) data_files.sort() assert len(data_files) > 1, 'Dynamic train mode need more than 1 data file' for i in range(len(data_files) - 1): train_data = data_files[i] test_data = data_files[i + 1] tf.compat.v1.logging.info( '=' * 30 + ' START TRAINING DATA: {} '.format(train_data) + '=' * 30 + '\n') for n in range(FLAGS.train_epochs): t0 = time.time() tf.compat.v1.logging.info( 'START TRAIN DATA <{}> <EPOCH {}>'.format(train_data, n + 1)) model.train(input_fn=lambda: input_fn( train_data, FLAGS.image_train_data, 'train', FLAGS.batch_size), hooks=None, steps=None, max_steps=None, saving_listeners=None) tf.compat.v1.logging.info( 'FINISH TRAIN DATA <{}> <EPOCH {}> take {} mins'.format( train_data, n + 1, elapse_time(t0))) print('-' * 80) tf.compat.v1.logging.info( 'START EVALUATE TEST DATA <{}> <EPOCH {}>'.format( test_data, n + 1)) t0 = time.time() results = model.evaluate( input_fn=lambda: input_fn(test_data, FLAGS.image_eval_data, 'eval', FLAGS.batch_size), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path=None, # latest checkpoint in model_dir is used. name=None) tf.compat.v1.logging.info( 'FINISH EVALUATE TEST DATA <{}> <EPOCH {}>: take {} mins'. format(test_data, n + 1, elapse_time(t0))) print('-' * 80) # Display evaluation metrics for key in sorted(results): print('{}: {}'.format(key, results[key]))
def main(unused_argv): print("Using TensorFlow version %s" % tf.__version__) assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed" # if FLAGS.is_distribution: # print("Using distribution tensoflow. Job_name:{} Task_index:{}" # .format(CONFIG.distribution["job_name"], CONFIG.distribution["task_index"])) print('Model type: {}'.format(FLAGS.model_type)) model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type) print('Model directory: {}'.format(model_dir)) model = build_estimator(model_dir, FLAGS.model_type) tf.logging.info('Build estimator: {}'.format(model)) checkpoint_path = FLAGS.checkpoint_path or model.latest_checkpoint() if checkpoint_path is None: raise ValueError( 'No model checkpoint found, please check the model dir.') tf.logging.info('Using model checkpoint: {}'.format(checkpoint_path)) print('\n') tf.logging.info('=' * 30 + ' START TESTING' + '=' * 30) s_time = time.time() results = model.evaluate( input_fn=lambda: input_fn(FLAGS.test_data, FLAGS.image_test_data, 'eval', FLAGS.batch_size), steps=None, # Number of steps for which to evaluate model. hooks=None, checkpoint_path=FLAGS. checkpoint_path, # If None, the latest checkpoint is used. name=None) tf.logging.info('=' * 30 + 'FINISH TESTING, TAKE {}'.format(elapse_time(s_time)) + '=' * 30) # Display evaluation metrics print('-' * 80) for key in sorted(results): print('%s: %s' % (key, results[key]))
def main(unused_argv): print("Using TensorFlow version %s" % tf.__version__) assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed" if FLAGS.data_dir is None: raise ValueError("Must specify prediction data_file by --data_dir") print('Model type: {}'.format(FLAGS.model_type)) model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type) print('Model directory: {}'.format(model_dir)) model = build_custom_estimator(model_dir, FLAGS.model_type) tf.logging.info('Build estimator: {}'.format(model)) tf.logging.info('='*30+'START PREDICTION'+'='*30) t0 = time.time() predictions = model.predict(input_fn=lambda: input_fn(FLAGS.data_dir, 'pred', FLAGS.batch_size), predict_keys=None, hooks=None, checkpoint_path=FLAGS.checkpoint_path) # defaults None to use latest_checkpoint tf.logging.info('='*30+'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0))+'='*30) class_list = [] for pred_dict in predictions: # dict{probabilities, classes, class_ids} # class_id = pred_dict['class_ids'][0] # probability = pred_dict['probabilities'][class_id] # class_list.append([class_id,probability]) # pd.DataFrame(class_list,columns=['class_ids','probabilities']).to_csv('final_prob_train.csv') prob_list = pred_dict['probabilities'] max_prob_list = [(i,x) for i,x in enumerate(prob_list)] #if x>sorted(prob_list)[-4] class_list.append(str(max_prob_list)) pd.DataFrame(class_list,columns=['prob_dict']).to_csv('final_prob.csv')
def main(unused_argv): print("Using TensorFlow version %s" % tf.__version__) assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed" if FLAGS.data_dir is None: raise ValueError("Must specify prediction data_file by --data_dir") print('Model type: {}'.format(FLAGS.model_type)) model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type) print('Model directory: {}'.format(model_dir)) model = build_estimator(model_dir, FLAGS.model_type) tf.logging.info('Build estimator: {}'.format(model)) tf.logging.info('=' * 30 + 'START PREDICTION' + '=' * 30) t0 = time.time() predictions = model.predict(input_fn=lambda: input_fn( FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size), predict_keys=None, hooks=None, checkpoint_path=FLAGS.checkpoint_path ) # defaults None to use latest_checkpoint tf.logging.info('=' * 30 + 'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0)) + '=' * 30) for pred_dict in predictions: # dict{probabilities, classes, class_ids} class_id = pred_dict['class_ids'][0] probability = pred_dict['probabilities'][class_id] print('\nPrediction is "{}" ({:.1f}%)'.format(class_id, 100 * probability))
def main(unused_argv): print("Using TensorFlow version %s" % tf.__version__) assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed" # if FLAGS.is_distribution: # print("Using distribution tensoflow. Job_name:{} Task_index:{}" # .format(CONFIG.distribution["job_name"], CONFIG.distribution["task_index"])) # model info print('Model type: {}'.format(FLAGS.model_type)) model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type) print('Model directory: {}'.format(model_dir)) model = build_estimator(model_dir, FLAGS.model_type) tf.logging.info('Build estimator: {}'.format(model)) checkpoint_path = FLAGS.checkpoint_path or model.latest_checkpoint() if checkpoint_path is None: raise ValueError('No model checkpoint found, please check the model dir.') tf.logging.info('Using model checkpoint: {}'.format(checkpoint_path)) print('-' * 80) tf.logging.info('='*30+' START PREDICTION'+'='*30) t0 = time.time() predictions = model.predict(input_fn=lambda: input_fn(FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size), predict_keys=None, hooks=None, checkpoint_path=checkpoint_path) # defaults None to use latest_checkpoint tf.logging.info('='*30+'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0))+'='*30) for pred_dict in predictions: # dict{probabilities, classes, class_ids} class_id = pred_dict['class_ids'][0] probability = pred_dict['probabilities'][class_id] print('\nPrediction is "{}" ({:.1f}%)'.format(class_id, 100 * probability))
def main(unused_argv): print("Using TensorFlow version %s" % tf.__version__) # assert "1.4" <= tf.__version__, "TensorFlow r1.4 or later is needed" if FLAGS.data_dir is None: raise ValueError("Must specify prediction data_file by --data_dir") print('Model type: {}'.format(FLAGS.model_type)) model_dir = os.path.join(FLAGS.model_dir, FLAGS.model_type) print('Model directory: {}'.format(model_dir)) # model = build_estimator(model_dir, FLAGS.model_type) model = build_custom_estimator(model_dir, FLAGS.model_type) tf.compat.v1.logging.info('Build estimator: {}'.format(model)) # weights and other parameters (e.g. Adagrad) of the model name_ls = model.get_variable_names() print_shape = True total_linear_weights = 0 for name in name_ls: if print_shape: shape = model.get_variable_value(name).shape print(name, "\t", shape) if name[:6] == "linear" and \ (name[-7:] == "weights"or name[-4:] == "bias"): total_linear_weights += np.prod(shape) else: print(name) if print_shape: print("Total parameters in linear model: {}".format( total_linear_weights)) # embedding layer look up sample_embedding = model.get_variable_value( 'dnn/input_from_feature_columns/input_layer/ad_cates_embedding/embedding_weights' ) ids = [10, 20, 30] with tf.compat.v1.Session() as sess: lookup = tf.nn.embedding_lookup(params=sample_embedding, ids=ids).eval() print(lookup) # predictions tf.compat.v1.logging.info('=' * 30 + 'START PREDICTION' + '=' * 30) t0 = time.time() predictions = model.predict(input_fn=lambda: input_fn( FLAGS.data_dir, FLAGS.image_data_dir, 'pred', FLAGS.batch_size), predict_keys=None, hooks=None, checkpoint_path=FLAGS.checkpoint_path ) # defaults None to use latest_checkpoint for pred_dict in predictions: # dict{probabilities, classes, class_ids} class_id = pred_dict['class_ids'][0] probability = pred_dict['probabilities'][class_id] print('\nPrediction is "{}" ({:.1f}%)'.format(class_id, 100 * probability)) tf.compat.v1.logging.info( '=' * 30 + 'FINISH PREDICTION, TAKE {} mins'.format(elapse_time(t0)) + '=' * 30)
def train(model): for n in range(FLAGS.train_epochs): tf.compat.v1.logging.info('=' * 30 + ' START EPOCH {} '.format(n + 1) + '=' * 30 + '\n') train_data_list = list_files(FLAGS.train_data) # dir to file list for f in train_data_list: t0 = time.time() tf.compat.v1.logging.info('<EPOCH {}>: Start training {}'.format(n + 1, f)) model.train( input_fn=lambda: input_fn(f, FLAGS.image_train_data, 'train', FLAGS.batch_size), hooks=None, steps=None, max_steps=None, saving_listeners=None) tf.compat.v1.logging.info('<EPOCH {}>: Finish training {}, take {} mins'.format(n + 1, f, elapse_time(t0)))
def test_input_fn(self): features, labels = input_fn(self.input_csv, 'eval', batch_size=1) with tf.Session() as sess: features, labels = sess.run((features, labels)) # Compare the two features dictionaries. for key in USED_FEATURE_KEY: self.assertTrue(key in features) self.assertEqual(len(features[key]), 1) feature_value = features[key][0] # Convert from bytes to string for Python 3. if isinstance(feature_value, bytes): feature_value = feature_value.decode() self.assertEqual(TEST_INPUT[key], feature_value) self.assertFalse(labels)
def test_input_fn(self): tf.compat.v1.enable_eager_execution() features, labels = input_fn(self.input_csv, None, mode='eval', batch_size=1) # Compare the two features dictionaries. for KEY in USED_FEATURE_KEY: self.assertTrue(KEY in features) self.assertEqual(len(features[KEY][0]), len(TEST_INPUT[KEY])) feature_values = features[KEY][0].numpy() print(KEY, TEST_INPUT[KEY], feature_values) # Convert from bytes to string for Python 3. for i in range(len(TEST_INPUT[KEY])): feature_value = feature_values[i] if isinstance(feature_value, bytes): feature_value = feature_value.decode() if isinstance(feature_value, np.int32): feature_value = str(feature_value) if isinstance(feature_value, np.float32): TEST_INPUT[KEY][i] = np.float32(TEST_INPUT[KEY][i]) self.assertEqual(TEST_INPUT[KEY][i], feature_value) self.assertFalse(labels)
def train_and_eval_api(model): train_spec = tf.estimator.TrainSpec( input_fn=lambda: input_fn(FLAGS.train_data, 'train', FLAGS.batch_size)) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: input_fn(FLAGS.eval_data, 'eval', FLAGS.batch_size)) tf.estimator.train_and_evaluate(model, train_spec, eval_spec)