def _test_build_parsing_transforming_serving_input_fn(self, shape): basedir = tempfile.mkdtemp() raw_metadata = dataset_metadata.DatasetMetadata( schema=_make_raw_schema(shape, should_add_unused_feature=True)) transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel') _write_transform_savedmodel( transform_savedmodel_dir, should_add_unused_feature=True) serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=transform_savedmodel_dir, raw_label_keys=['raw_label'], # Labels are excluded raw_feature_keys=['raw_a', 'raw_b'], convert_scalars_to_vectors=True)) examples = [_create_serialized_example(d) for d in [ {'raw_a': 15, 'raw_b': 6}, {'raw_a': 12, 'raw_b': 17}]] with tf.Graph().as_default(): with tf.Session().as_default() as session: outputs, labels, inputs = serving_input_fn() self.assertItemsEqual( set(outputs.keys()), {'transformed_a', 'transformed_b', 'transformed_label'}) self.assertEqual(labels, None) self.assertEqual(set(inputs.keys()), {'examples'}) feed_inputs = {inputs['examples']: examples} transformed_a, transformed_b = session.run( [outputs['transformed_a'], outputs['transformed_b']], feed_dict=feed_inputs) batch_shape = (len(examples), 1) sparse_batch_shape = batch_shape if not shape: # transformed_b is sparse so _convert_scalars_to_vectors did not fix it sparse_batch_shape = sparse_batch_shape[:1] transformed_b_dict = dict(zip([tuple(x + [0]) for x in transformed_b.indices.tolist()], transformed_b.values.tolist())) else: transformed_b_dict = dict(zip([tuple(x) for x in transformed_b.indices.tolist()], transformed_b.values.tolist())) self.assertEqual(batch_shape, tuple(transformed_a.shape)) self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape)) self.assertEqual(21, transformed_a[0][0]) self.assertEqual(9, transformed_b_dict[(0, 0)]) self.assertEqual(29, transformed_a[1][0]) self.assertEqual(-5, transformed_b_dict[(1, 0)])
def make_experiment(output_dir): """Function that creates an experiment http://goo.gl/HcKHlT. Args: output_dir: The directory where the training output should be written. Returns: A `tf.contrib.learn.Experiment`. """ estimator = tf.contrib.learn.Estimator( model_fn=model_builder(hparams=args), model_dir=output_dir) train_input_fn = make_input_fn( mode=tf.contrib.learn.ModeKeys.TRAIN, eval_type=args.eval_type, data_file_pattern=args.train_data_paths, randomize_input=args.randomize_input, batch_size=args.batch_size, queue_capacity=4 * args.batch_size) eval_input_fn = make_input_fn( mode=tf.contrib.learn.ModeKeys.EVAL, eval_type=args.eval_type, data_file_pattern=args.eval_data_paths, batch_size=args.eval_batch_size, queue_capacity=4 * args.eval_batch_size) raw_metadata = metadata_io.read_metadata(args.raw_metadata_path) # Both ratings and candidate features are not needed for serving. raw_label_keys = [LABEL_RATING_SCORE] # For serving, we only need query features. raw_feature_keys = [QUERY_RATED_MOVIE_IDS, QUERY_RATED_MOVIE_SCORES, QUERY_RATED_GENRE_IDS, QUERY_RATED_GENRE_FREQS, QUERY_RATED_GENRE_AVG_SCORES] serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata, args.transform_savedmodel, raw_label_keys=raw_label_keys, raw_feature_keys=raw_feature_keys)) export_strategy = tf.contrib.learn.utils.make_export_strategy( serving_input_fn, default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE) return tf.contrib.learn.Experiment( estimator=estimator, train_steps=(args.train_steps or args.num_epochs * args.train_set_size // args.batch_size), eval_steps=args.eval_steps, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, eval_metrics=create_evaluation_metrics(args.eval_type), export_strategies=[export_strategy], # Do not remove this is needed until b/36498507 is fixed. min_eval_frequency=1000)
def make_experiment(output_dir): """Function that creates an experiment http://goo.gl/HcKHlT. Args: output_dir: The directory where the training output should be written. Returns: A `tf.contrib.learn.Experiment`. """ estimator = tf.contrib.learn.Estimator( model_fn=model_builder(hparams=args), model_dir=output_dir) train_input_fn = make_input_fn(mode=tf.contrib.learn.ModeKeys.TRAIN, eval_type=args.eval_type, data_file_pattern=args.train_data_paths, randomize_input=args.randomize_input, batch_size=args.batch_size, queue_capacity=4 * args.batch_size) eval_input_fn = make_input_fn(mode=tf.contrib.learn.ModeKeys.EVAL, eval_type=args.eval_type, data_file_pattern=args.eval_data_paths, batch_size=args.eval_batch_size, queue_capacity=4 * args.eval_batch_size) raw_metadata = metadata_io.read_metadata(args.raw_metadata_path) # Both ratings and candidate features are not needed for serving. raw_label_keys = [LABEL_RATING_SCORE] # For serving, we only need query features. raw_feature_keys = [ QUERY_RATED_MOVIE_IDS, QUERY_RATED_MOVIE_SCORES, QUERY_RATED_GENRE_IDS, QUERY_RATED_GENRE_FREQS, QUERY_RATED_GENRE_AVG_SCORES ] serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata, args.transform_savedmodel, raw_label_keys=raw_label_keys, raw_feature_keys=raw_feature_keys)) export_strategy = tf.contrib.learn.utils.make_export_strategy( serving_input_fn, default_output_alternative_key=DEFAULT_OUTPUT_ALTERNATIVE) return tf.contrib.learn.Experiment( estimator=estimator, train_steps=(args.train_steps or args.num_epochs * args.train_set_size // args.batch_size), eval_steps=args.eval_steps, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, eval_metrics=create_evaluation_metrics(args.eval_type), export_strategies=[export_strategy], # Do not remove this is needed until b/36498507 is fixed. min_eval_frequency=1000)
def _test_build_parsing_transforming_serving_input_fn_with_label( self, raw_schema): basedir = tempfile.mkdtemp() raw_metadata = dataset_metadata.DatasetMetadata(schema=raw_schema) transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel') _write_transform_savedmodel(transform_savedmodel_dir) serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=transform_savedmodel_dir, raw_label_keys=[], # Test labels are in output raw_feature_keys=None)) examples = [ _create_serialized_example(d) for d in [{ 'raw_a': 15, 'raw_b': 6, 'raw_label': 1 }, { 'raw_a': 12, 'raw_b': 17, 'raw_label': 2 }] ] with tf.Graph().as_default(): with tf.Session().as_default() as session: outputs, labels, inputs = serving_input_fn() feed_inputs = {inputs['examples']: examples} transformed_a, transformed_b, transformed_label = session.run( [ outputs['transformed_a'], outputs['transformed_b'], outputs['transformed_label'] ], feed_dict=feed_inputs) self.assertEqual(21, transformed_a[0][0]) self.assertEqual(9, transformed_b[0][0]) self.assertEqual(1000, transformed_label[0][0]) self.assertEqual(29, transformed_a[1][0]) self.assertEqual(-5, transformed_b[1][0]) self.assertEqual(2000, transformed_label[1][0]) self.assertEqual( set(outputs.keys()), {'transformed_a', 'transformed_b', 'transformed_label'}) self.assertEqual(labels, None) self.assertEqual(set(inputs.keys()), {'examples'})
def get_serving_input_fn(input_dir): """Creates operations to ingest data for inference Args: input_dir: Directory containing tf.Transform metadata and transform_fn. Returns: A serving input function. """ raw_metadata = metadata_io.read_metadata( posixpath.join(input_dir, constants.RAW_METADATA_DIR)) transform_fn_path = posixpath.join(input_dir, constants.TRANSFORM_FN_DIR) return input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=transform_fn_path, raw_label_keys=[constants.LABEL_COLUMN])
def test_build_parsing_transforming_serving_input_fn(self): basedir = tempfile.mkdtemp() raw_metadata = dataset_metadata.DatasetMetadata( schema=_make_raw_schema()) transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel') _write_transform_savedmodel(transform_savedmodel_dir) serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=transform_savedmodel_dir, raw_label_keys=['raw_label'], raw_feature_keys=['raw_a', 'raw_b'])) examples = [ _create_serialized_example(d) for d in [{ 'raw_a': 15, 'raw_b': 5 }, { 'raw_a': 12, 'raw_b': 17 }] ] with tf.Graph().as_default(): with tf.Session().as_default() as session: outputs, _, inputs = serving_input_fn() feed_inputs = {inputs['examples']: examples} transformed_a, transformed_b = session.run( [outputs['transformed_a'], outputs['transformed_b']], feed_dict=feed_inputs) self.assertEqual(20, transformed_a[0][0]) self.assertEqual(10, transformed_b[0][0]) self.assertEqual(29, transformed_a[1][0]) self.assertEqual(-5, transformed_b[1][0])
def get_experiment(output_dir): """Function that creates an experiment http://goo.gl/HcKHlT. Args: output_dir: The directory where the training output should be written. Returns: A `tf.contrib.learn.Experiment`. """ columns = feature_columns(args.model_type, vocab_sizes, use_crosses) runconfig = tf.contrib.learn.RunConfig() cluster = runconfig.cluster_spec num_table_shards = max(1, runconfig.num_ps_replicas * 3) num_partitions = max( 1, 1 + cluster.num_tasks('worker') if cluster and 'worker' in cluster.jobs else 0) model_dir = os.path.join(output_dir, MODEL_DIR) if args.model_type == LINEAR: estimator = tf.contrib.learn.LinearRegressor( model_dir=model_dir, feature_columns=columns, optimizer=tf.contrib.linear_optimizer.SDCAOptimizer( example_id_column=KEY_FEATURE_COLUMN, symmetric_l2_regularization=args.l2_regularization, num_loss_partitions=num_partitions, # workers num_table_shards=num_table_shards)) # ps elif args.model_type == DEEP: estimator = tf.contrib.learn.DNNRegressor( hidden_units=args.hidden_units, feature_columns=columns, model_dir=model_dir) transformed_metadata = metadata_io.read_metadata( args.transformed_metadata_path) raw_metadata = metadata_io.read_metadata(args.raw_metadata_path) serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata, args.transform_savedmodel, raw_label_keys=[TARGET_FEATURE_COLUMN])) export_strategy = tf.contrib.learn.utils.make_export_strategy( serving_input_fn, exports_to_keep=5, default_output_alternative_key=None) train_input_fn = get_transformed_reader_input_fn( transformed_metadata, args.train_data_paths, args.batch_size, tf.contrib.learn.ModeKeys.TRAIN) eval_input_fn = get_transformed_reader_input_fn( transformed_metadata, args.eval_data_paths, args.batch_size, tf.contrib.learn.ModeKeys.EVAL) return tf.contrib.learn.Experiment( estimator=estimator, train_steps=(args.train_steps or args.num_epochs * args.train_set_size // args.batch_size), eval_steps=args.eval_steps, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, export_strategies=export_strategy, min_eval_frequency=500)
def get_experiment(output_dir): """Function that creates an experiment http://goo.gl/HcKHlT. Args: output_dir: The directory where the training output should be written. Returns: A `tf.contrib.learn.Experiment`. """ columns = feature_columns(args.model_type, vocab_sizes, use_crosses) runconfig = tf.contrib.learn.RunConfig() cluster = runconfig.cluster_spec num_table_shards = max(1, runconfig.num_ps_replicas * 3) num_partitions = max(1, 1 + cluster.num_tasks('worker') if cluster and 'worker' in cluster.jobs else 0) model_dir = os.path.join(output_dir, MODEL_DIR) if args.model_type == LINEAR: estimator = tf.contrib.learn.LinearRegressor( model_dir=model_dir, feature_columns=columns, optimizer=tf.contrib.linear_optimizer.SDCAOptimizer( example_id_column=KEY_FEATURE_COLUMN, symmetric_l2_regularization=args.l2_regularization, num_loss_partitions=num_partitions, # workers num_table_shards=num_table_shards)) # ps elif args.model_type == DEEP: estimator = tf.contrib.learn.DNNRegressor( hidden_units=args.hidden_units, feature_columns=columns, model_dir=model_dir) transformed_metadata = metadata_io.read_metadata( args.transformed_metadata_path) raw_metadata = metadata_io.read_metadata(args.raw_metadata_path) serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata, args.transform_savedmodel, raw_label_keys=[TARGET_FEATURE_COLUMN])) export_strategy = tf.contrib.learn.utils.make_export_strategy( serving_input_fn, exports_to_keep=5, default_output_alternative_key=None) train_input_fn = get_transformed_reader_input_fn( transformed_metadata, args.train_data_paths, args.batch_size, tf.contrib.learn.ModeKeys.TRAIN) eval_input_fn = get_transformed_reader_input_fn( transformed_metadata, args.eval_data_paths, args.batch_size, tf.contrib.learn.ModeKeys.EVAL) return tf.contrib.learn.Experiment( estimator=estimator, train_steps=(args.train_steps or args.num_epochs * args.train_set_size // args.batch_size), eval_steps=args.eval_steps, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, export_strategies=export_strategy, min_eval_frequency=500)
def get_experiment(output_dir): """Function that creates an experiment http://goo.gl/HcKHlT. Args: output_dir: The directory where the training output should be written. Returns: A `tf.contrib.learn.Experiment`. """ wide_columns, deep_columns = get_feature_columns( args.model_type, linear_use_crosses, deep_embedding_size_factor) runconfig = tf.contrib.learn.RunConfig() cluster = runconfig.cluster_spec num_table_shards = max(1, runconfig.num_ps_replicas * 3) num_partitions = max( 1, 1 + cluster.num_tasks('worker') if cluster and 'worker' in cluster.jobs else 0) deep_hidden_units = list( [int(n) for n in args.deep_hidden_units.split(' ')]) if args.model_type == WIDE: estimator = tf.contrib.learn.LinearClassifier( model_dir=output_dir, feature_columns=wide_columns, optimizer=tf.train.FtrlOptimizer( learning_rate=args.linear_learning_rate, l1_regularization_strength=args.linear_l1_regularization, l2_regularization_strength=args.linear_l1_regularization)) elif args.model_type == DEEP: estimator = tf.contrib.learn.DNNClassifier( hidden_units=deep_hidden_units, feature_columns=deep_columns, model_dir=output_dir, dropout=args.deep_dropout, optimizer=tf.train.ProximalAdagradOptimizer( learning_rate=args.deep_learning_rate, initial_accumulator_value=0.1, l1_regularization_strength=args.deep_l1_regularization, l2_regularization_strength=args.deep_l2_regularization, use_locking=False)) elif args.model_type == WIDE_N_DEEP: estimator = tf.contrib.learn.DNNLinearCombinedClassifier( model_dir=output_dir, linear_feature_columns=wide_columns, linear_optimizer=tf.train.FtrlOptimizer( learning_rate=args.linear_learning_rate, l1_regularization_strength=args.linear_l1_regularization, l2_regularization_strength=args.linear_l1_regularization), dnn_feature_columns=deep_columns, dnn_hidden_units=deep_hidden_units, dnn_dropout=args.deep_dropout, dnn_optimizer=tf.train.ProximalAdagradOptimizer( learning_rate=args.deep_learning_rate, initial_accumulator_value=0.1, l1_regularization_strength=args.deep_l1_regularization, l2_regularization_strength=args.deep_l2_regularization, use_locking=False)) transformed_metadata = metadata_io.read_metadata( args.transformed_metadata_path) raw_metadata = metadata_io.read_metadata(args.raw_metadata_path) serving_input_fn = ( input_fn_maker.build_parsing_transforming_serving_input_fn( raw_metadata, args.transform_savedmodel, raw_label_keys=[LABEL_COLUMN])) export_strategy = (tf.contrib.learn.utils.make_export_strategy( serving_input_fn, exports_to_keep=5, default_output_alternative_key=None)) train_input_fn = get_transformed_reader_input_fn( transformed_metadata, args.train_data_paths, args.train_batch_size, tf.contrib.learn.ModeKeys.TRAIN) eval_input_fn = get_transformed_reader_input_fn( transformed_metadata, args.eval_data_paths, args.eval_batch_size, tf.contrib.learn.ModeKeys.EVAL) train_set_size = args.train_set_size eval_metrics = { 'MAP': metric_spec.MetricSpec(metric_fn=map_custom_metric, prediction_key="logistic", weight_key=DISPLAY_ID_COLUMN) } if args.full_evaluation_after_training: eval_steps = int( math.ceil(args.eval_set_size / float(args.eval_batch_size))) min_eval_frequency = None #Adding a metric that compute the MAP over the predictions, considering leaked clicks eval_metrics['MAP_with_Leaked_Clicks'] = metric_spec.MetricSpec( metric_fn=map_with_leak_custom_metric, prediction_key="logistic", weight_key=DISPLAY_ID_AND_IS_LEAK_ENCODED_COLUMN) else: eval_steps = args.eval_steps min_eval_frequency = 2000 return tf.contrib.learn.Experiment( estimator=estimator, train_steps=(args.train_steps or args.num_epochs * train_set_size // args.train_batch_size), eval_steps=eval_steps, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, min_eval_frequency=min_eval_frequency, export_strategies=export_strategy, eval_metrics=eval_metrics)