def test_first_time_chief_generate_nonadaptive(self, import_fn): import_fn.side_effect = _fake_import_fn generator = replay_generator.ReplayGenerator( phoenix_spec=_create_spec('nonadaptive'), metadata=ml_metadata_db.MLMetaData( _create_spec('nonadaptive'), study_owner='test', study_name='test')) call_args = generator.first_time_chief_generate( features='features', input_layer_fn='input_layer', trial_mode='mode', shared_input_tensor='input', shared_lengths='input2', logits_dimension=5, hparams=None, run_config=tf.estimator.RunConfig(model_dir='mydir/5'), is_training=True, trials=[]) self.assertEqual( call_args, { 'features': 'features', 'input_layer_fn': 'input_layer', 'phoenix_spec': _create_spec('nonadaptive'), 'shared_input_tensor': 'input', 'shared_lengths': 'input2', 'is_training': True, 'logits_dimension': 5, 'previous_model_dirs': ['mydir/1', 'mydir/2', 'mydir/3', 'mydir/4'], 'force_freeze': True, 'allow_auxiliary_head': False, 'caller_generator': 'replay_generator', 'my_model_dir': 'mydir/5' })
def test_generator_with_dropouts(self): # Force graph mode with tf.compat.v1.Graph().as_default(): spec = phoenix_spec_pb2.PhoenixSpec( problem_type=phoenix_spec_pb2.PhoenixSpec.DNN) spec.search_type = phoenix_spec_pb2.PhoenixSpec.NONADAPTIVE_RANDOM_SEARCH spec.is_input_shared = True generator = search_candidate_generator.SearchCandidateGenerator( phoenix_spec=spec, metadata=ml_metadata_db.MLMetaData(phoenix_spec=spec, study_name='', study_owner='')) input_tensor = tf.zeros([20, 32, 32, 3]) fake_config = collections.namedtuple('RunConfig', ['model_dir', 'is_chief']) run_config = fake_config(model_dir=flags.FLAGS.test_tmpdir + '/1', is_chief=True) _ = generator.generate( features={}, input_layer_fn=lambda: None, trial_mode=trial_utils.TrialMode.NO_PRIOR, shared_input_tensor=input_tensor, shared_lengths=None, logits_dimension=10, hparams=hp.HParams(initial_architecture=['CONVOLUTION_3X3'], dropout_rate=0.3), run_config=run_config, is_training=True, trials=[]) all_nodes = [ node.name for node in tf.compat.v1.get_default_graph().as_graph_def().node ] self.assertAllInSet(_DROPOUT_GRAPH_NODE, all_nodes)
def test_controller(self, spec, my_id, trials, expected_output): controller_ = controller.InProcessController( phoenix_spec=spec, metadata=ml_metadata_db.MLMetaData(phoenix_spec=spec, study_name="", study_owner="")) generators = controller_.get_generators(my_id, trials) logging.info(generators) self.assertEqual(len(expected_output.keys()), len(generators.keys())) for k, v in generators.items(): self.assertEqual(k, v.instance.generator_name()) self.assertIn(k, expected_output.keys()) self.assertEqual(v.relevant_trials, expected_output[k])
def test_report(self): connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.filename_uri = os.path.join( FLAGS.test_tmpdir, "2") connection_config.sqlite.connection_mode = 3 handler = ml_metadata_db.MLMetaData( None, None, None, connection_config=connection_config) handler.before_generating_trial_model(trial_id=1, model_dir="/tmp/1") handler.report(eval_dictionary={"loss": 0.5}, model_dir="/tmp/1") output = handler.get_completed_trials() self.assertLen(output, 1) output = output[0] self.assertEqual(output.id, 1) self.assertEqual(output.status, "COMPLETED") self.assertEqual(output.model_dir, "/tmp/1") self.assertEqual(output.final_measurement.objective_value, 0.5)
def test_before_generating_trial_model(self): connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.filename_uri = os.path.join( FLAGS.test_tmpdir, "1") connection_config.sqlite.connection_mode = 3 handler = ml_metadata_db.MLMetaData( None, None, None, connection_config=connection_config) handler.before_generating_trial_model(trial_id=1, model_dir="/tmp/1") output = handler._store.get_executions_by_type("Trial") self.assertLen(output, 1) output = output[0] self.assertEqual(output.properties["id"].int_value, 1) self.assertEqual(output.properties["state"].string_value, "RUNNING") self.assertEqual(output.properties["serialized_data"].string_value, "") self.assertEqual(output.properties["model_dir"].string_value, "/tmp/1") self.assertEqual(output.properties["evaluation"].string_value, "")
def test_intermixed_prior_graph(self): # Force graph mode with tf.compat.v1.Graph().as_default(): spec = phoenix_spec_pb2.PhoenixSpec( problem_type=phoenix_spec_pb2.PhoenixSpec.DNN) spec.ensemble_spec.ensemble_search_type = ( ensembling_spec_pb2.EnsemblingSpec. INTERMIXED_NONADAPTIVE_ENSEMBLE_SEARCH) spec.ensemble_spec.intermixed_search.width = 2 spec.ensemble_spec.intermixed_search.try_ensembling_every = 4 spec.ensemble_spec.intermixed_search.num_trials_to_consider = 3 spec.is_input_shared = True generator = prior_generator.PriorGenerator( phoenix_spec=spec, metadata=ml_metadata_db.MLMetaData(phoenix_spec=spec, study_name='', study_owner='')) fake_config = collections.namedtuple('RunConfig', ['model_dir']) # Should be multplication of 4. run_config = fake_config(model_dir=flags.FLAGS.test_tmpdir + '/10000') tf.io.gfile.makedirs(run_config.model_dir) # Best three trials checkpoint are generated. If the generator chooses # the suboptimal (wrong) trials, the test will fail. self._create_checkpoint(['search_generator'], 2) self._create_checkpoint(['search_generator'], 3) self._create_checkpoint(['search_generator'], 5) logits, _ = generator.first_time_chief_generate( features={}, input_layer_fn=lambda: None, trial_mode=trial_utils.TrialMode.ENSEMBLE_SEARCH, shared_input_tensor=tf.zeros([100, 32, 32, 3]), shared_lengths=None, logits_dimension=10, hparams={}, run_config=run_config, is_training=True, trials=trial_utils.create_test_trials_intermixed( flags.FLAGS.test_tmpdir)) self.assertLen(logits, 2) all_nodes = [ node.name for node in tf.compat.v1.get_default_graph().as_graph_def().node ] self.assertAllInSet(_NONADAPTIVE_GRAPH_NODES, all_nodes)
def test_generator_with_distillation_and_intermixed(self): # Force graph mode with tf.compat.v1.Graph().as_default(): spec = phoenix_spec_pb2.PhoenixSpec( problem_type=phoenix_spec_pb2.PhoenixSpec.CNN) spec.is_input_shared = True spec.search_type = phoenix_spec_pb2.PhoenixSpec.NONADAPTIVE_RANDOM_SEARCH spec.ensemble_spec.ensemble_search_type = ( ensembling_spec_pb2.EnsemblingSpec. INTERMIXED_NONADAPTIVE_ENSEMBLE_SEARCH) spec.ensemble_spec.intermixed_search.width = 2 spec.ensemble_spec.intermixed_search.try_ensembling_every = 4 spec.ensemble_spec.intermixed_search.num_trials_to_consider = 3 spec.distillation_spec.distillation_type = ( distillation_spec_pb2.DistillationSpec.DistillationType. MSE_LOGITS) generator = search_candidate_generator.SearchCandidateGenerator( phoenix_spec=spec, metadata=ml_metadata_db.MLMetaData(phoenix_spec=spec, study_name='', study_owner='')) fake_config = collections.namedtuple('RunConfig', ['model_dir', 'is_chief']) run_config = fake_config(model_dir=flags.FLAGS.test_tmpdir + '/10000', is_chief=True) self._create_checkpoint(['search_generator'], 2) self._create_checkpoint(['search_generator'], 3) self._create_checkpoint(['search_generator'], 5) input_tensor = tf.zeros([20, 32, 32, 3]) _ = generator.generate( features={}, input_layer_fn=lambda: None, trial_mode=trial_utils.TrialMode.DISTILLATION, shared_input_tensor=input_tensor, shared_lengths=None, logits_dimension=10, hparams=hp.HParams(initial_architecture=['CONVOLUTION_3X3']), run_config=run_config, is_training=True, trials=trial_utils.create_test_trials_intermixed( flags.FLAGS.test_tmpdir))
def test_get_completed_trials(self): connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.filename_uri = os.path.join( FLAGS.test_tmpdir, "3") connection_config.sqlite.connection_mode = 3 handler = ml_metadata_db.MLMetaData( None, None, None, connection_config=connection_config) handler.before_generating_trial_model(trial_id=1, model_dir="/tmp/1") handler.before_generating_trial_model(trial_id=2, model_dir="/tmp/2") handler.report(eval_dictionary={"loss": 0.1}, model_dir="/tmp/1") handler.before_generating_trial_model(trial_id=3, model_dir="/tmp/3") handler.report(eval_dictionary={"loss": 0.3}, model_dir="/tmp/3") handler.report(eval_dictionary={"loss": 0.2}, model_dir="/tmp/2") output = handler.get_completed_trials() self.assertLen(output, 3) for i in range(3): self.assertEqual(output[i].status, "COMPLETED") self.assertEqual(output[i].model_dir, "/tmp/" + str(output[i].id)) self.assertEqual(output[i].final_measurement.objective_value, float(output[i].id) / 10)
def test_generator_with_snapshot(self): # Force graph mode with tf.compat.v1.Graph().as_default(): spec = phoenix_spec_pb2.PhoenixSpec( problem_type=phoenix_spec_pb2.PhoenixSpec.CNN) spec.search_type = phoenix_spec_pb2.PhoenixSpec.ADAPTIVE_COORDINATE_DESCENT spec.transfer_learning_spec.transfer_learning_type = ( transfer_learning_spec_pb2.TransferLearningSpec. SNAPSHOT_TRANSFER_LEARNING) spec.is_input_shared = True generator = search_candidate_generator.SearchCandidateGenerator( phoenix_spec=spec, metadata=ml_metadata_db.MLMetaData(phoenix_spec=spec, study_name='', study_owner='')) input_tensor = tf.zeros([20, 32, 32, 3]) fake_config = collections.namedtuple('RunConfig', ['model_dir', 'is_chief']) tf.io.gfile.makedirs(flags.FLAGS.test_tmpdir + '/3') run_config = fake_config(model_dir=flags.FLAGS.test_tmpdir + '/3', is_chief=True) self._create_checkpoint(['search_generator'], 2) _ = generator.generate( features={}, input_layer_fn=lambda: None, trial_mode=trial_utils.TrialMode.ENSEMBLE_SEARCH, shared_input_tensor=input_tensor, shared_lengths=None, logits_dimension=10, hparams=hp.HParams(initial_architecture=['CONVOLUTION_3X3'], dropout_rate=0.3, new_block_type='CONVOLUTION_3X3'), run_config=run_config, is_training=True, trials=_create_trials(flags.FLAGS.test_tmpdir)) all_nodes = [ node.name for node in tf.compat.v1.get_default_graph().as_graph_def().node ] self.assertAllInSet(_DROPOUT_GRAPH_NODE, all_nodes)
def test_nonadaptive_prior(self, width, consider): # Force graph mode with tf.compat.v1.Graph().as_default(): spec = phoenix_spec_pb2.PhoenixSpec( problem_type=phoenix_spec_pb2.PhoenixSpec.DNN) spec.ensemble_spec.ensemble_search_type = ( ensembling_spec_pb2.EnsemblingSpec.NONADAPTIVE_ENSEMBLE_SEARCH) spec.ensemble_spec.nonadaptive_search.width = width spec.ensemble_spec.nonadaptive_search.num_trials_to_consider = consider spec.is_input_shared = True generator = prior_generator.PriorGenerator( phoenix_spec=spec, metadata=ml_metadata_db.MLMetaData(phoenix_spec=spec, study_name='', study_owner='')) fake_config = collections.namedtuple('RunConfig', ['model_dir']) run_config = fake_config(model_dir=flags.FLAGS.test_tmpdir + '/10000') tf.io.gfile.makedirs(run_config.model_dir) # Best three trials checkpoint are generated. If the generator chooses # the suboptimal (wrong) trials, the test will fail. self._create_checkpoint(['search_generator'], 3) self._create_checkpoint(['search_generator'], 4) self._create_checkpoint(['search_generator'], 5) logits, _ = generator.first_time_chief_generate( features={}, input_layer_fn=lambda: None, trial_mode=trial_utils.TrialMode.ENSEMBLE_SEARCH, shared_input_tensor=tf.zeros([100, 32, 32, 3]), shared_lengths=None, logits_dimension=10, hparams={}, run_config=run_config, is_training=True, trials=_create_trials(flags.FLAGS.test_tmpdir)) self.assertLen(logits, min(width, consider))
def setUp(self): super(CoordinateDescentTest, self).setUp() self._metadata = ml_metadata_db.MLMetaData(None, None, None)
def main(unused_argv): filename = FLAGS.phoenix_spec_filename spec = phoenix_spec_pb2.PhoenixSpec() with tf.io.gfile.GFile(filename, "r") as f: text_format.Merge(f.read(), spec) dataset_provider = get_dataset_provider() loss_fn, metric_fn, predictions_fn = ( loss_and_metric_and_predictions_fn(dataset_provider)) metadata = None if (FLAGS.optimization_goal != "minimize" or FLAGS.optimization_metric != "loss"): metadata = ml_metadata_db.MLMetaData( phoenix_spec=spec, study_name=FLAGS.experiment_name, study_owner=FLAGS.experiment_owner, optimization_goal=FLAGS.optimization_goal, optimization_metric=FLAGS.optimization_metric) phoenix_instance = phoenix.Phoenix( phoenix_spec=spec, input_layer_fn=dataset_provider.get_input_layer_fn(spec.problem_type), logits_dimension=dataset_provider.number_of_classes(), study_name=FLAGS.experiment_name, study_owner=FLAGS.experiment_owner, loss_fn=loss_fn, metric_fn=metric_fn, predictions_fn=predictions_fn, metadata=metadata) # Replay only!! if spec.HasField("replay"): hparams = hp.HParams.from_proto(spec.replay.towers[0].hparams) run_train_and_eval(hparams, FLAGS.model_dir, phoenix_instance, dataset_provider, train_steps=FLAGS.phoenix_train_steps, eval_steps=FLAGS.phoenix_eval_steps, batch_size=FLAGS.phoenix_batch_size) return 0 tuner_id = FLAGS.tuner_id or "phoenix-tuner-%d" % random.randint( 0, 10000000) hyperparameters = phoenix.Phoenix.get_keras_hyperparameters_space( spec, FLAGS.phoenix_train_steps) if FLAGS.hypertuning_method == "random": oracle = kerastuner.tuners.randomsearch.RandomSearchOracle( objective="loss", max_trials=FLAGS.experiment_max_num_trials, seed=73, hyperparameters=hyperparameters, allow_new_entries=True, tune_new_entries=True) else: oracle = kerastuner.tuners.bayesian.BayesianOptimizationOracle( objective="loss", hyperparameters=hyperparameters, max_trials=FLAGS.experiment_max_num_trials) # pylint: disable=protected-access oracle._set_project_dir(FLAGS.model_dir, FLAGS.experiment_name, overwrite=True) # pylint: enable=protected-access data_provider = get_dataset_provider() while run_parameterized_train_and_eval( phoenix_instance=phoenix_instance, oracle=oracle, tuner_id=tuner_id, root_dir=FLAGS.model_dir, max_trials=FLAGS.experiment_max_num_trials, data_provider=data_provider, train_steps=FLAGS.phoenix_train_steps, eval_steps=FLAGS.phoenix_eval_steps, batch_size=FLAGS.phoenix_batch_size): pass
def setUp(self): super(ConstrainedDescentTest, self).setUp() self._metadata = ml_metadata_db.MLMetaData(None, None, None)
def __init__(self, phoenix_spec, input_layer_fn, study_owner, study_name, head=None, logits_dimension=None, label_vocabulary=None, loss_fn=None, metric_fn=None, predictions_fn=None, metadata=None): """Constructs a Phoenix instance. Args: phoenix_spec: A `PhoenixSpec` proto with the spec for the run. input_layer_fn: A function that converts feature Tensors to input layer. See learning.autolx.model_search.data.Provider.get_input_layer_fn for details. study_owner: A string holding the ldap of the study owner. We use tuner platforms to conduct the various architectures training. This field specifies the study owner. study_name: A string holding the study name. head: A head to use with Phoenix for creating the loss and eval metrics. If no head is given, Phoenix falls back to using the loss_fn and metric_fn. N.B.: Phoenix creates its own EstimatorSpec so everything besides the loss and eval metrics returned by head will be ignored. logits_dimension: An int holding the dimension of the output. Must be provided if head is None. Will be ignored if head is not None. label_vocabulary: List or tuple with labels vocabulary. Needed only if the labels are of type string. This list is used by the loss function if loss_fn is not provided. It is also used in the metric function to create the accuracy metric ops. Use only with multiclass classification problems. loss_fn: A function to compute the loss. Ignored if `head` is not None. Must accept as inputs a `labels` Tensor, a `logits` Tensor, and optionally a `weights` Tensor. `weights` must either be rank 0 or have the same rank as labels. If None, Phoenix defaults to using softmax cross-entropy. metric_fn: Metrics for Tensorboard. Ignored if `head` is not None. metric_fn takes `label` and `predictions` as input, and outputs a dictionary of (tensor, update_op) tuples. `label` is a Tensor (in the single task case) or a dict of Tensors (in the case of multi-task, where the key of the dicts correspond to the task names). `predictions` is a dict of Tensors. In the single task case, it consists of `predictions`, `probabilities`, and `log_probabilities`. In the multi-task case, it consists of the same keys as that of the single task case, but also those corresponding to each task (e.g., predictions/task_name_1). See `metric_fns` for more detail. If `metric_fn` is None, it will include a metric for the number of parameters, accuracy (if logit_dimensions >= 2), and AUC metrics (if logit_dimensions == 2). predictions_fn: A function to convert eval logits to the `predictions` dictionary passed to metric_fn. If `None`, defaults to computing 'predictions', 'probabilities', and 'log_probabilities'. metadata: An object that implements metadata api in learning.adanets.phoenix.metadata.Metadata """ # Check Phoenix preconditions and fail early if any of them are broken. if phoenix_spec.multi_task_spec: # TODO(b/172564129): Add support for head and custom loss_fns in # multi-task. assert not head, "head is not supported for multi-task." if head: msg = "Do not specify {} when using head as head already contains it." assert not logits_dimension, msg.format("logits_dimension") assert not label_vocabulary, msg.format("label_vocabulary") assert not loss_fn, msg.format("loss_fn") assert not metric_fn, msg.format("metric_fn") # Check ensemble search / distillation preconditions. ensemble_spec = phoenix_spec.ensemble_spec distillation_spec = phoenix_spec.distillation_spec if trial_utils.has_distillation( distillation_spec) and trial_utils.has_ensemble_search( ensemble_spec ) and not trial_utils.is_intermixed_ensemble_search(ensemble_spec): ensemble_search_spec = ( ensemble_spec.nonadaptive_search if trial_utils.is_nonadaptive_ensemble_search(ensemble_spec) else ensemble_spec.adaptive_search) if (distillation_spec.minimal_pool_size == ensemble_search_spec.minimal_pool_size): logging.warning("minimal_pool_size is the same for ensemble spec and " "distillation spec, so distillation will be ignored.") self._phoenix_spec = phoenix_spec self._input_layer_fn = input_layer_fn self._ensembler = ensembler.Ensembler(phoenix_spec) self._distiller = distillation.Distiller(phoenix_spec.distillation_spec) self._study_owner = study_owner self._study_name = study_name self._head = head self._logits_dimension = ( self._head.logits_dimension if head else logits_dimension) self._label_vocabulary = label_vocabulary if self._label_vocabulary: assert self._logits_dimension == len(self._label_vocabulary) self._loss_fn = loss_fn or loss_fns.make_multi_class_loss_fn( label_vocabulary=label_vocabulary) self._user_specified_metric_fn = metric_fn self._predictions_fn = (predictions_fn or _default_predictions_fn) if metadata is None: self._metadata = ml_metadata_db.MLMetaData(phoenix_spec, study_name, study_owner) else: self._metadata = metadata self._task_manager = task_manager.TaskManager(phoenix_spec) self._controller = controller.InProcessController( phoenix_spec=phoenix_spec, metadata=self._metadata)