def create_experiment(self): """Create an Experiment object packaging Estimator and Specs. Returns ------- Experiment (NamedTuple) estimator : tf.estimator.Estimator train_spec : tf.estimator.TrainSpec eval_spec : tf.estimator.EvalSpec """ tf.set_random_seed(self.random_seed) # Create Estimator LOGGER.info("Converting Keras model to Estimator.") model_dir = self.path_model + "/checkpoints" estimator = tf.keras.estimator.model_to_estimator(self.model, model_dir=model_dir) # Create Hooks estimator_train_hooks = [hook(estimator) for hook in self.train_hooks if isinstance(hook, EstimatorHookFactory)] estimator_eval_hooks = [hook(estimator) for hook in self.eval_hooks if isinstance(hook, EstimatorHookFactory)] train_hooks = [hk for hk in self.train_hooks if not isinstance(hk, (TensorHookFactory, EstimatorHookFactory))] eval_hooks = [hk for hk in self.eval_hooks if not isinstance(hk, (TensorHookFactory, EstimatorHookFactory))] # Create train specs train_spec = tf.estimator.TrainSpec( input_fn=lambda: self.prepro_fn(self.train_input_fn(), tf.estimator.ModeKeys.TRAIN), hooks=estimator_train_hooks + train_hooks, **self.train_spec, ) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: self.prepro_fn(self.eval_input_fn(), tf.estimator.ModeKeys.EVAL), hooks=estimator_eval_hooks + eval_hooks, **self.eval_spec, ) return Experiment(estimator=estimator, train_spec=train_spec, eval_spec=eval_spec)
def experiment_fn() -> Experiment: # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15 import tensorflow as tf def train_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train") return dataset.shuffle(1000).batch(128).repeat() def eval_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test") return dataset.shuffle(1000).batch(128) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes(), optimizer=lambda: hvd.DistributedOptimizer(tf.train.AdamOptimizer())) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=10, hooks=[hvd.BroadcastGlobalVariablesHook(0)]), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def experiment_fn(hdfs_dir: str) -> Experiment: def convert_to_tensor(x, y): return (tf.convert_to_tensor(value=list(x.values()), dtype=tf.float32), tf.convert_to_tensor(value=y, dtype=tf.int32)) def train_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train") return ( dataset.map(convert_to_tensor).shuffle(1000).batch(128).repeat()) def eval_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test") return (dataset.map(convert_to_tensor).shuffle(1000).batch(128)) model = keras.Sequential() model.add( keras.layers.Dense(units=300, activation="relu", input_shape=(11, ))) model.add(keras.layers.Dense(units=100, activation="relu")) model.add(keras.layers.Dense(units=10, activation="softmax")) model.summary() model.compile(loss='sparse_categorical_crossentropy', optimizer="sgd", metrics=['accuracy']) config = tf.estimator.RunConfig(model_dir=hdfs_dir) estimator = tf.keras.estimator.model_to_estimator(model, config=config) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=100), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def experiment_fn() -> Experiment: train_data, test_data = winequality.get_train_eval_datasets(WINE_EQUALITY_FILE) def train_input_fn(): return (train_data.shuffle(1000) .batch(128) .repeat() .make_one_shot_iterator() .get_next()) def eval_input_fn(): return (test_data.shuffle(1000) .batch(128) .make_one_shot_iterator() .get_next()) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes()) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=10), tf.estimator.EvalSpec( eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def experiment_fn(dataset_path: str) -> Experiment: train_data, test_data = winequality.get_train_eval_datasets(dataset_path) def train_input_fn(): return (train_data.shuffle(1000).batch( 128).repeat().make_one_shot_iterator().get_next()) def eval_input_fn(): return (test_data.shuffle(1000).batch( 128).make_one_shot_iterator().get_next()) fs = check_output( "hdfs getconf -confKey fs.defaultFS".split()).strip().decode() user = pwd.getpwuid(os.getuid()).pw_name config = tf.estimator.RunConfig( tf_random_seed=42, model_dir=f"{fs}/user/{user}/examples/{run_id}") estimator = tf.estimator.LinearClassifier( winequality.get_feature_columns(), n_classes=winequality.get_n_classes(), config=config) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=10), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def test_retry_run_on_yarn(nb_retries, nb_failures): cpt = 0 def fail(*args, **kwargs): if cpt < nb_failures: raise Exception("") else: pass with mock.patch('tf_yarn._setup_pyenvs'), \ mock.patch('tf_yarn._setup_skein_cluster') as mock_setup_skein_cluster, \ mock.patch('tf_yarn._run_on_cluster') as mock_run_on_cluster: mock_run_on_cluster.side_effect = fail gb = 2**10 try: run_on_yarn("path/to/env", lambda: Experiment(None, None, None), task_specs={ "chief": TaskSpec(memory=16 * gb, vcores=16), "worker": TaskSpec(memory=16 * gb, vcores=16, instances=1), "ps": TaskSpec(memory=16 * gb, vcores=16, instances=1) }, nb_retries=nb_retries) except Exception: pass nb_calls = min(nb_retries, nb_failures) + 1 assert mock_run_on_cluster.call_count == nb_calls assert mock_setup_skein_cluster.call_count == nb_calls
def experiment_fn() -> Experiment: def input_fn(): x = tf.constant([[1.0], [2.0], [3.0], [4.0]]) return {"x": x}, x estimator = tf.estimator.Estimator(model_fn=model_fn) train_spec = tf.estimator.TrainSpec(input_fn, max_steps=1) eval_spec = tf.estimator.EvalSpec(input_fn, steps=1) return Experiment(estimator, train_spec, eval_spec)
def experiment_fn() -> Experiment: # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15 import tensorflow as tf def input_fn(): x = tf.constant([[1.0], [2.0], [3.0], [4.0]]) return {"x": x}, x estimator = tf.estimator.Estimator(model_fn=model_fn) train_spec = tf.estimator.TrainSpec(input_fn, max_steps=1) eval_spec = tf.estimator.EvalSpec(input_fn, steps=1) return Experiment(estimator, train_spec, eval_spec)
def create_experiment(self): """Create an Experiment object packaging Estimator and Specs. Returns ------- Experiment (NamedTuple) estimator : tf.estimator.Estimator train_spec : tf.estimator.TrainSpec eval_spec : tf.estimator.EvalSpec """ tf.set_random_seed(self.random_seed) # Create Estimator model_dir = self.path_model + "/checkpoints" estimator = tf.estimator.Estimator( functools.partial( model_fn, pred_fn=self.pred_fn, loss_fn=self.loss_fn, optimizer_fn=self.optimizer_fn, initializer_fn=self.initializer_fn, train_metrics=self.train_metrics, eval_metrics=self.eval_metrics, train_hooks=[hook for hook in self.train_hooks if isinstance(hook, TensorHookFactory)], eval_hooks=[hook for hook in self.eval_hooks if isinstance(hook, TensorHookFactory)], ), model_dir=model_dir, config=tf.estimator.RunConfig( session_config=tf.ConfigProto(**self.config_proto), model_dir=model_dir, **self.run_config ), ) # Create Hooks estimator_train_hooks = [hook(estimator) for hook in self.train_hooks if isinstance(hook, EstimatorHookFactory)] estimator_eval_hooks = [hook(estimator) for hook in self.eval_hooks if isinstance(hook, EstimatorHookFactory)] train_hooks = [hk for hk in self.train_hooks if not isinstance(hk, (TensorHookFactory, EstimatorHookFactory))] eval_hooks = [hk for hk in self.eval_hooks if not isinstance(hk, (TensorHookFactory, EstimatorHookFactory))] # Create train specs train_spec = tf.estimator.TrainSpec( input_fn=lambda: self.prepro_fn(self.train_input_fn(), tf.estimator.ModeKeys.TRAIN), hooks=estimator_train_hooks + train_hooks, **self.train_spec, ) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: self.prepro_fn(self.eval_input_fn(), tf.estimator.ModeKeys.EVAL), hooks=estimator_eval_hooks + eval_hooks, **self.eval_spec, ) return Experiment(estimator=estimator, train_spec=train_spec, eval_spec=eval_spec)
def _experiment_fn(model_dir): print(f"create experiment with model_dir={model_dir}") def model_fn(): return tf.estimator.EstimatorSpec() def train_fn(): return None def eval_fn(): return None return Experiment( tf.estimator.LinearClassifier(feature_columns=[], model_dir=model_dir), tf.estimator.TrainSpec(train_fn), tf.estimator.EvalSpec(eval_fn))
def experiment_fn() -> Experiment: def train_input_fn(): train_data, test_data = winequality.get_train_eval_datasets( WINE_EQUALITY_FILE) return (train_data.shuffle(1000).batch(128).repeat()) estimator = tf.estimator.LinearClassifier( optimizer=DistributedOptimizer( tf.train.FtrlOptimizer(learning_rate=0.1)), feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes()) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000, hooks=[BroadcastGlobalVariablesHook()]) return Experiment(estimator, train_spec, tf.estimator.EvalSpec(lambda: True))
def experiment_fn() -> Experiment: # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15 import tensorflow as tf def train_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train") return (dataset.shuffle(1000).batch(128).repeat()) def eval_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test") return (dataset.shuffle(1000).batch(128)) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=HDFS_DIR, n_classes=winequality.get_n_classes()) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=100), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))