def _test_build_default_transforming_serving_input_fn( self, shape, feed_input_values): basedir = tempfile.mkdtemp() raw_metadata = dataset_metadata.DatasetMetadata( schema=_make_raw_schema(shape)) transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel') _write_transform_savedmodel(transform_savedmodel_dir) serving_input_fn = ( input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_metadata, raw_label_keys=['raw_label'], raw_feature_keys=['raw_a', 'raw_b'], transform_savedmodel_dir=transform_savedmodel_dir)) with tf.Graph().as_default(): with tf.Session().as_default() as session: outputs, _, inputs = serving_input_fn() feed_inputs = { inputs['raw_a']: feed_input_values[0], inputs['raw_b']: feed_input_values[1] } transformed_a, transformed_b = session.run( [outputs['transformed_a'], outputs['transformed_b']], feed_dict=feed_inputs) self.assertEqual(21, transformed_a[0][0]) self.assertEqual(9, transformed_b[0][0]) self.assertEqual(29, transformed_a[1][0]) self.assertEqual(-5, transformed_b[1][0])
def _test_build_default_transforming_serving_input_fn( self, shape, feed_input_values): basedir = tempfile.mkdtemp() raw_metadata = dataset_metadata.DatasetMetadata( schema=_make_raw_schema(shape, should_add_unused_feature=True)) transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel') _write_transform_savedmodel( transform_savedmodel_dir, should_add_unused_feature=True) serving_input_fn = ( input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_metadata, raw_label_keys=['raw_label'], raw_feature_keys=['raw_a', 'raw_b'], transform_savedmodel_dir=transform_savedmodel_dir, convert_scalars_to_vectors=True)) with tf.Graph().as_default(): with tf.Session().as_default() as session: outputs, labels, inputs = serving_input_fn() self.assertItemsEqual( set(outputs.keys()), {'transformed_a', 'transformed_b', 'transformed_label'}) self.assertEqual(labels, None) self.assertEqual(set(inputs.keys()), {'raw_a', 'raw_b'}) feed_inputs = {inputs['raw_a']: feed_input_values[0], inputs['raw_b']: feed_input_values[1]} transformed_a, transformed_b = session.run( [outputs['transformed_a'], outputs['transformed_b']], feed_dict=feed_inputs) with self.assertRaises(Exception): session.run(outputs['transformed_label']) batch_shape = (len(feed_input_values[0]), 1) sparse_batch_shape = batch_shape if not shape: # transformed_b is sparse so _convert_scalars_to_vectors did not fix it sparse_batch_shape = sparse_batch_shape[:1] transformed_b_dict = dict(zip([tuple(x + [0]) for x in transformed_b.indices.tolist()], transformed_b.values.tolist())) else: transformed_b_dict = dict(zip([tuple(x) for x in transformed_b.indices.tolist()], transformed_b.values.tolist())) self.assertEqual(batch_shape, tuple(transformed_a.shape)) self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape)) self.assertEqual(21, transformed_a[0][0]) self.assertEqual(9, transformed_b_dict[(0, 0)]) self.assertEqual(29, transformed_a[1][0]) self.assertEqual(-5, transformed_b_dict[(1, 0)])
def train_and_evaluate(output_dir): review_column = feature_column.sparse_column_with_integerized_feature( const.REVIEW_COLUMN, bucket_size=vocab_size + 1, combiner='sum') weighted_reviews = feature_column.weighted_sparse_column( review_column, const.REVIEW_WEIGHT) estimator = learn.LinearClassifier( feature_columns=[weighted_reviews], n_classes=2, model_dir=output_dir, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=30)) transformed_metadata = metadata_io.read_metadata( transformed_metadata_dir) raw_metadata = metadata_io.read_metadata(raw_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_file_pattern, training_batch_size=train_batch_size, label_keys=[const.LABEL_COLUMN]) eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_test_file_pattern, training_batch_size=1, label_keys=[const.LABEL_COLUMN]) serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=output_dir + '/transform_fn', raw_label_keys=[], raw_feature_keys=[const.REVIEW_COLUMN]) export_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn, exports_to_keep=5, default_output_alternative_key=None) return tf.contrib.learn.Experiment(estimator=estimator, train_steps=train_num_epochs * num_train_instances / train_batch_size, eval_steps=num_test_instances, train_input_fn=train_input_fn, eval_input_fn=eval_input_fn, export_strategies=export_strategy, min_eval_frequency=500)
def train_and_evaluate(transformed_train_filepattern, transformed_test_filepattern, transformed_metadata_dir, serving_graph_dir): """Train the model on training data and evaluate on test data. Args: transformed_train_filepattern: File pattern for transformed training data shards transformed_test_filepattern: File pattern for transformed test data shards transformed_metadata_dir: Directory containing transformed data metadata serving_graph_dir: Directory to save the serving graph Returns: The results from the estimator's 'evaluate' method """ # Wrap scalars as real valued columns. real_valued_columns = [ feature_column.real_valued_column(key) for key in NUMERIC_COLUMNS ] # Wrap categorical columns. Note the combiner is irrelevant since the input # only has one value set per feature per instance. one_hot_columns = [ feature_column.sparse_column_with_integerized_feature( key, bucket_size=bucket_size, combiner='sum') for key, bucket_size in zip(CATEGORICAL_COLUMNS, BUCKET_SIZES) ] estimator = learn.LinearClassifier(real_valued_columns + one_hot_columns) transformed_metadata = metadata_io.read_metadata(transformed_metadata_dir) train_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_train_filepattern, training_batch_size=TRAIN_BATCH_SIZE, label_keys=[LABEL_COLUMN]) # Estimate the model using the default optimizer. estimator.fit(input_fn=train_input_fn, max_steps=TRAIN_NUM_EPOCHS * NUM_TRAIN_INSTANCES / TRAIN_BATCH_SIZE) # Write the serving graph to disk for use in tf.serving in_columns = [ 'age', 'workclass', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country' ] if not serving_graph_dir is None: serving_input_fn = input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_data_metadata, transform_savedmodel_dir=serving_graph_dir + '/transform_fn', raw_label_keys=[], raw_feature_keys=in_columns) estimator.export_savedmodel(serving_graph_dir, serving_input_fn) # Evaluate model on test dataset. eval_input_fn = input_fn_maker.build_training_input_fn( transformed_metadata, transformed_test_filepattern, training_batch_size=1, label_keys=[LABEL_COLUMN]) return estimator.evaluate(input_fn=eval_input_fn, steps=NUM_TEST_INSTANCES)
def _test_build_default_transforming_serving_input_fn_with_label( self, shape, feed_input_values): # TODO(b/123241798): use TEST_TMPDIR basedir = tempfile.mkdtemp() raw_metadata = dataset_metadata.DatasetMetadata( schema=_make_raw_schema(shape)) transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel') _write_transform_savedmodel(transform_savedmodel_dir) serving_input_fn = ( input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_metadata, raw_label_keys=[], # Test labels are in output raw_feature_keys=None, transform_savedmodel_dir=transform_savedmodel_dir, convert_scalars_to_vectors=True)) with tf.Graph().as_default(): with tf.compat.v1.Session().as_default() as session: outputs, labels, inputs = serving_input_fn() self.assertCountEqual( set(outputs.keys()), {'transformed_a', 'transformed_b', 'transformed_label'}) self.assertEqual(labels, None) self.assertEqual(set(inputs.keys()), {'raw_a', 'raw_b', 'raw_label'}) feed_inputs = { inputs['raw_a']: feed_input_values[0], inputs['raw_b']: feed_input_values[1], inputs['raw_label']: feed_input_values[2] } transformed_a, transformed_b, transformed_label = session.run( [ outputs['transformed_a'], outputs['transformed_b'], outputs['transformed_label'] ], feed_dict=feed_inputs) batch_shape = (len(feed_input_values[0]), 1) sparse_batch_shape = batch_shape if not shape: # transformed_b is sparse so _convert_scalars_to_vectors did not fix it sparse_batch_shape = sparse_batch_shape[:1] transformed_b_dict = dict( zip([tuple(x + [0]) for x in transformed_b.indices.tolist()], transformed_b.values.tolist())) else: transformed_b_dict = dict( zip([tuple(x) for x in transformed_b.indices.tolist()], transformed_b.values.tolist())) self.assertEqual(batch_shape, tuple(transformed_a.shape)) self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape)) self.assertEqual(batch_shape, tuple(transformed_label.shape)) self.assertEqual(21, transformed_a[0][0]) self.assertEqual(9, transformed_b_dict[(0, 0)]) self.assertEqual(1000, transformed_label[0][0]) self.assertEqual(29, transformed_a[1][0]) self.assertEqual(-5, transformed_b_dict[(1, 0)]) self.assertEqual(2000, transformed_label[1][0])
def export_fn(estimator, export_dir_base, checkpoint_path=None, eval_result=None): with ops.Graph().as_default() as g: contrib_variables.create_global_step(g) input_ops = input_fn_maker.build_default_transforming_serving_input_fn( raw_metadata=raw_metadata, transform_savedmodel_dir=os.path.join(args.analysis_output_dir, TRANSFORM_FN_DIR), raw_label_keys=[target_name], raw_feature_keys=csv_header, convert_scalars_to_vectors=True)() model_fn_ops = estimator._call_model_fn( input_ops.features, None, model_fn_lib.ModeKeys.INFER) output_fetch_tensors = make_prediction_output_tensors( args=args, features=features, input_ops=input_ops, model_fn_ops=model_fn_ops, keep_target=keep_target) signature_def_map = { 'serving_default': signature_def_utils.predict_signature_def( input_ops.default_inputs, output_fetch_tensors) } if not checkpoint_path: # Locate the latest checkpoint checkpoint_path = saver.latest_checkpoint(estimator._model_dir) if not checkpoint_path: raise ValueError("Couldn't find trained model at %s." % estimator._model_dir) export_dir = saved_model_export_utils.get_timestamped_export_dir( export_dir_base) with tf_session.Session('') as session: variables.local_variables_initializer() data_flow_ops.tables_initializer() saver_for_restore = saver.Saver(variables.global_variables(), sharded=True) saver_for_restore.restore(session, checkpoint_path) init_op = control_flow_ops.group( variables.local_variables_initializer(), data_flow_ops.tables_initializer()) # Perform the export builder = saved_model_builder.SavedModelBuilder(export_dir) builder.add_meta_graph_and_variables( session, [tag_constants.SERVING], signature_def_map=signature_def_map, assets_collection=ops.get_collection( ops.GraphKeys.ASSET_FILEPATHS), legacy_init_op=init_op) builder.save(False) # Add the extra assets if assets_extra: assets_extra_path = os.path.join( compat.as_bytes(export_dir), compat.as_bytes('assets.extra')) for dest_relative, source in assets_extra.items(): dest_absolute = os.path.join( compat.as_bytes(assets_extra_path), compat.as_bytes(dest_relative)) dest_path = os.path.dirname(dest_absolute) file_io.recursive_create_dir(dest_path) file_io.copy(source, dest_absolute) # only keep the last 3 models saved_model_export_utils.garbage_collect_exports(export_dir_base, exports_to_keep=3) # save the last model to the model folder. # export_dir_base = A/B/intermediate_models/ if keep_target: final_dir = os.path.join(args.job_dir, 'evaluation_model') else: final_dir = os.path.join(args.job_dir, 'model') if file_io.is_directory(final_dir): file_io.delete_recursively(final_dir) file_io.recursive_create_dir(final_dir) recursive_copy(export_dir, final_dir) return export_dir