def experiment_fn() -> Experiment: # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15 import tensorflow as tf def train_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train") return dataset.shuffle(1000).batch(128).repeat() def eval_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test") return dataset.shuffle(1000).batch(128) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes(), optimizer=lambda: hvd.DistributedOptimizer(tf.train.AdamOptimizer())) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=10, hooks=[hvd.BroadcastGlobalVariablesHook(0)]), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def experiment_fn() -> Experiment: train_data, test_data = winequality.get_train_eval_datasets(WINE_EQUALITY_FILE) def train_input_fn(): return (train_data.shuffle(1000) .batch(128) .repeat() .make_one_shot_iterator() .get_next()) def eval_input_fn(): return (test_data.shuffle(1000) .batch(128) .make_one_shot_iterator() .get_next()) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes()) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=10), tf.estimator.EvalSpec( eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def experiment_fn(dataset_path: str) -> Experiment: train_data, test_data = winequality.get_train_eval_datasets(dataset_path) def train_input_fn(): return (train_data.shuffle(1000).batch( 128).repeat().make_one_shot_iterator().get_next()) def eval_input_fn(): return (test_data.shuffle(1000).batch( 128).make_one_shot_iterator().get_next()) fs = check_output( "hdfs getconf -confKey fs.defaultFS".split()).strip().decode() user = pwd.getpwuid(os.getuid()).pw_name config = tf.estimator.RunConfig( tf_random_seed=42, model_dir=f"{fs}/user/{user}/examples/{run_id}") estimator = tf.estimator.LinearClassifier( winequality.get_feature_columns(), n_classes=winequality.get_n_classes(), config=config) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=10), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def experiment_fn() -> Experiment: def train_input_fn(): train_data, test_data = winequality.get_train_eval_datasets( WINE_EQUALITY_FILE) return (train_data.shuffle(1000).batch(128).repeat()) estimator = tf.estimator.LinearClassifier( optimizer=DistributedOptimizer( tf.train.FtrlOptimizer(learning_rate=0.1)), feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes()) train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000, hooks=[BroadcastGlobalVariablesHook()]) return Experiment(estimator, train_spec, tf.estimator.EvalSpec(lambda: True))
def experiment_fn() -> Experiment: # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15 import tensorflow as tf def train_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train") return (dataset.shuffle(1000).batch(128).repeat()) def eval_input_fn(): dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test") return (dataset.shuffle(1000).batch(128)) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=HDFS_DIR, n_classes=winequality.get_n_classes()) return Experiment( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=100), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))
def main(): model_dir = "model_dir" num_tilings = 10 num_buckets = 10 batch_size = 32 # build input and evaluation functions train_fn, evaluation_fn = winequality.get_train_eval_datasets_fn( WINE_EQUALITY_FILE) feature_range = winequality.get_feature_range() # --- tile_strategy_boundaries = TileStrategy(feature_range).uniform(num_buckets) tilings = Tilings(tile_strategy_boundaries, num_tilings) # --- input_fn_train = get_input_fn(train_fn, batch_size, tilings) input_fn_eval = get_input_fn(evaluation_fn, batch_size, tilings) # build model function and its necessary params tiled_feature_column_list = TiledFeatureColumns(tilings).get_list() params = { 'feature_columns': tiled_feature_column_list, 'hidden_units': None, 'num_classes': winequality.get_n_classes() } # Final training and evaluation. call tensorboard separately to see how loss function evolves estimator = tf.estimator.Estimator(model_fn=model_fn, params=params, model_dir=model_dir) train_spec = tf.estimator.TrainSpec(input_fn=input_fn_train, max_steps=40000) eval_spec = tf.estimator.EvalSpec(input_fn=input_fn_eval, steps=100, start_delay_secs=0, throttle_secs=30) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
pyenv_zip_path, task_specs={ "worker": TaskSpec(memory="2 GiB", vcores=4, instances=2) }, files=editable_requirements, acls=skein.model.ACLs(enable=True, view_users=['*'])) as cluster_spec: distrib_config = tf.contrib.distribute.DistributeConfig( train_distribute=tf.contrib.distribute.CollectiveAllReduceStrategy( ), eval_distribute=tf.contrib.distribute.CollectiveAllReduceStrategy( ), remote_cluster=cluster_spec) run_config = tf.estimator.RunConfig( experimental_distribute=distrib_config) estimator = tf.estimator.LinearClassifier( feature_columns=winequality.get_feature_columns(), model_dir=f"{HDFS_DIR}", n_classes=winequality.get_n_classes(), optimizer='Adam', config=run_config) tf.estimator.train_and_evaluate( estimator, tf.estimator.TrainSpec(train_input_fn, max_steps=1000), tf.estimator.EvalSpec(eval_input_fn, steps=10, start_delay_secs=0, throttle_secs=30))