def test_tf_column_filter(self): """InputMode.TENSORFLOW TFEstimator saving temporary TFRecords, filtered by input_mapping columns""" # create a Spark DataFrame of training examples (features, labels) trainDF = self.spark.createDataFrame(self.train_examples, ['col1', 'col2']) # and add some extra columns df = trainDF.withColumn('extra1', trainDF.col1) df = df.withColumn('extra2', trainDF.col2) self.assertEqual(len(df.columns), 4) # train model args = {} estimator = TFEstimator(self.get_function('tf/train'), args, export_fn=self.get_function('tf/export')) \ .setInputMapping({'col1': 'x', 'col2': 'y_'}) \ .setInputMode(TFCluster.InputMode.TENSORFLOW) \ .setModelDir(self.model_dir) \ .setExportDir(self.export_dir) \ .setTFRecordDir(self.tfrecord_dir) \ .setClusterSize(self.num_workers) \ .setNumPS(1) \ .setBatchSize(10) estimator.fit(df) self.assertTrue(os.path.isdir(self.model_dir)) self.assertTrue(os.path.isdir(self.tfrecord_dir)) df_tmp = dfutil.loadTFRecords(self.sc, self.tfrecord_dir) self.assertEqual(df_tmp.columns, ['col1', 'col2'])
def test_tf_checkpoint_with_export_fn(self): """InputMode.TENSORFLOW TFEstimator w/ a separate saved_model export function to add placeholders for InputMode.SPARK TFModel inferencing""" # create a Spark DataFrame of training examples (features, labels) trainDF = self.spark.createDataFrame(self.train_examples, ['col1', 'col2']) # train model args = {} estimator = TFEstimator(self.get_function('tf/train'), args, export_fn=self.get_function('tf/export')) \ .setInputMapping( { 'col1': 'x', 'col2': 'y_' }) \ .setInputMode(TFCluster.InputMode.TENSORFLOW) \ .setModelDir(self.model_dir) \ .setExportDir(self.export_dir) \ .setTFRecordDir(self.tfrecord_dir) \ .setClusterSize(self.num_workers) \ .setNumPS(1) \ .setBatchSize(10) model = estimator.fit(trainDF) self.assertTrue(os.path.isdir(self.model_dir)) # create a Spark DataFrame of test examples (features, labels) testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2']) # test model from checkpoint, referencing tensors directly model.setTagSet('test_tag') \ .setInputMapping( { 'c1': 'x' }) \ .setOutputMapping( { 'y': 'cout1', 'y2': 'cout2' }) preds = model.transform(testDF).head( ) # take first/only result, e.g. [ Row(cout=[4.758000373840332])] pred1, pred2 = preds.cout1[0], preds.cout2[0] self.assertAlmostEqual(pred1, np.sum(self.weights), 5) self.assertAlmostEqual(pred2, np.sum(self.weights)**2, 5)
def test_spark_checkpoint(self): """InputMode.SPARK TFEstimator w/ TFModel inferencing directly from model checkpoint""" # create a Spark DataFrame of training examples (features, labels) trainDF = self.spark.createDataFrame(self.train_examples, ['col1', 'col2']) # train model args = {} estimator = TFEstimator(self.get_function('spark/train'), args) \ .setInputMapping( { 'col1': 'x', 'col2': 'y_' }) \ .setModelDir(self.model_dir) \ .setClusterSize(self.num_workers) \ .setNumPS(1) \ .setBatchSize(10) \ .setEpochs(2) model = estimator.fit(trainDF) self.assertTrue(os.path.isdir(self.model_dir)) # create a Spark DataFrame of test examples (features, labels) testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2']) # test model from checkpoint, referencing tensors directly model.setInputMapping( { 'c1': 'x' }) \ .setOutputMapping( { 'y': 'cout' }) preds = model.transform(testDF).head( ) # take first/only result, e.g. [ Row(cout=[4.758000373840332])] pred = preds.cout[0] # unpack scalar from tensor self.assertAlmostEqual(pred, np.sum(self.weights), 5)
def test_spark_saved_model(self): """InputMode.SPARK TFEstimator w/ explicit saved_model export for TFModel inferencing""" # create a Spark DataFrame of training examples (features, labels) trainDF = self.spark.createDataFrame(self.train_examples, ['col1', 'col2']) # train and export model args = {} estimator = TFEstimator(self.get_function('spark/train'), args) \ .setInputMapping({'col1': 'x', 'col2': 'y_'}) \ .setModelDir(self.model_dir) \ .setExportDir(self.export_dir) \ .setClusterSize(self.num_workers) \ .setNumPS(1) \ .setBatchSize(10) \ .setEpochs(2) model = estimator.fit(trainDF) self.assertTrue(os.path.isdir(self.export_dir)) # create a Spark DataFrame of test examples (features, labels) testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2']) # test saved_model using exported signature model.setTagSet('test_tag') \ .setSignatureDefKey('test_key') \ .setInputMapping({'c1': 'features'}) \ .setOutputMapping({'prediction': 'cout'}) preds = model.transform(testDF).head() # take first/only result pred = preds.cout[0] # unpack scalar from tensor expected = np.sum(self.weights) self.assertAlmostEqual(pred, expected, 5) # test saved_model using custom/direct mapping model.setTagSet('test_tag') \ .setSignatureDefKey(None) \ .setInputMapping({'c1': 'x'}) \ .setOutputMapping({'y': 'cout1', 'y2': 'cout2'}) preds = model.transform(testDF).head() # take first/only result pred = preds.cout1[0] # unpack pred scalar from tensor squared_pred = preds.cout2[0] # unpack squared pred from tensor self.assertAlmostEqual(pred, expected, 5) self.assertAlmostEqual(squared_pred, expected * expected, 5)
def test_spark_saved_model(self): """InputMode.SPARK TFEstimator w/ explicit saved_model export for TFModel inferencing""" def _spark_train(args, ctx): """Basic linear regression in a distributed TF cluster using InputMode.SPARK""" import tensorflow as tf from tensorflowonspark import TFNode tf.compat.v1.reset_default_graph() strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() with strategy.scope(): model = Sequential() model.add(Dense(1, activation='linear', input_shape=[2])) model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.2), loss='mse', metrics=['mse']) model.summary() tf_feed = TFNode.DataFeed(ctx.mgr, input_mapping=args.input_mapping) def rdd_generator(): while not tf_feed.should_stop(): batch = tf_feed.next_batch(1) if len(batch['x']) > 0: features = batch['x'][0] label = batch['y_'][0] yield (features, label) else: return ds = tf.data.Dataset.from_generator( rdd_generator, (tf.float32, tf.float32), (tf.TensorShape([2]), tf.TensorShape([1]))) ds = ds.batch(args.batch_size) # disable auto-sharding dataset options = tf.data.Options() options.experimental_distribute.auto_shard = False ds = ds.with_options(options) # only train 90% of each epoch to account for uneven RDD partition sizes steps_per_epoch = 1000 * 0.9 // (args.batch_size * ctx.num_workers) tf.io.gfile.makedirs(args.model_dir) filepath = args.model_dir + "/weights-{epoch:04d}" callbacks = [ tf.keras.callbacks.ModelCheckpoint( filepath=filepath, verbose=1, load_weights_on_restart=True, save_weights_only=True) ] model.fit(ds, epochs=args.epochs, steps_per_epoch=steps_per_epoch, callbacks=callbacks) # This fails with: "NotImplementedError: `fit_generator` is not supported for models compiled with tf.distribute.Strategy" # model.fit_generator(ds, epochs=args.epochs, steps_per_epoch=steps_per_epoch, callbacks=callbacks) if ctx.job_name == 'chief' and args.export_dir: print("exporting model to: {}".format(args.export_dir)) tf.keras.experimental.export_saved_model( model, args.export_dir) tf_feed.terminate() # create a Spark DataFrame of training examples (features, labels) rdd = self.sc.parallelize(self.train_examples, 2) trainDF = rdd.toDF(['col1', 'col2']) # train and export model args = {} estimator = TFEstimator(_spark_train, args) \ .setInputMapping({'col1': 'x', 'col2': 'y_'}) \ .setModelDir(self.model_dir) \ .setExportDir(self.export_dir) \ .setClusterSize(self.num_workers) \ .setNumPS(0) \ .setBatchSize(1) \ .setEpochs(1) model = estimator.fit(trainDF) self.assertTrue(os.path.isdir(self.export_dir)) # create a Spark DataFrame of test examples (features, labels) testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2']) # test saved_model using exported signature model.setTagSet('serve') \ .setSignatureDefKey('serving_default') \ .setInputMapping({'c1': 'dense_input'}) \ .setOutputMapping({'dense': 'cout'}) preds = model.transform(testDF).head() # take first/only result pred = preds.cout[0] # unpack scalar from tensor expected = np.sum(self.weights) self.assertAlmostEqual(pred, expected, 2)
images_labels = sc.textFile(args.images_labels).map(parse) df = spark.createDataFrame(images_labels, ['image', 'label']) df.show() if args.mode == 'train': estimator = TFEstimator(main_fun, args) \ .setInputMapping({'image': 'image', 'label': 'label'}) \ .setModelDir(args.model_dir) \ .setExportDir(args.export_dir) \ .setClusterSize(args.cluster_size) \ .setTensorboard(args.tensorboard) \ .setEpochs(args.epochs) \ .setBatchSize(args.batch_size) \ .setGraceSecs(60) model = estimator.fit(df) else: # args.mode == 'inference': # using a trained/exported model model = TFModel(args) \ .setInputMapping({'image': 'conv2d_input'}) \ .setOutputMapping({'dense_1': 'prediction'}) \ .setSignatureDefKey('serving_default') \ .setExportDir(args.export_dir) \ .setBatchSize(args.batch_size) def argmax_fn(l): return max(range(len(l)), key=lambda i: l[i]) argmax = udf(argmax_fn, IntegerType()) preds = model.transform(df).withColumn('argmax', argmax('prediction'))
def test_spark_sparse_tensor(self): """InputMode.SPARK feeding sparse tensors""" def sparse_train(args, ctx): import tensorflow as tf # reset graph in case we're re-using a Spark python worker (during tests) tf.reset_default_graph() cluster, server = ctx.start_cluster_server(ctx) if ctx.job_name == "ps": server.join() elif ctx.job_name == "worker": with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % ctx.task_index, cluster=cluster)): y_ = tf.placeholder(tf.float32, name='y_label') label = tf.identity(y_, name='label') row_indices = tf.placeholder(tf.int64, name='x_row_indices') col_indices = tf.placeholder(tf.int64, name='x_col_indices') values = tf.placeholder(tf.float32, name='x_values') indices = tf.stack([row_indices[0], col_indices[0]], axis=1) data = values[0] x = tf.SparseTensor(indices=indices, values=data, dense_shape=[args.batch_size, 10]) w = tf.Variable(tf.truncated_normal([10, 1]), name='w') y = tf.sparse_tensor_dense_matmul(x, w, name='y') global_step = tf.train.get_or_create_global_step() cost = tf.reduce_mean(tf.square(y_ - y), name='cost') optimizer = tf.train.GradientDescentOptimizer( 0.1).minimize(cost, global_step) with tf.train.MonitoredTrainingSession( master=server.target, is_chief=(ctx.task_index == 0), checkpoint_dir=args.model_dir, save_checkpoint_steps=20) as sess: tf_feed = ctx.get_data_feed( input_mapping=args.input_mapping) while not sess.should_stop() and not tf_feed.should_stop(): batch = tf_feed.next_batch(args.batch_size) if len(batch['y_label']) > 0: print("batch: {}".format(batch)) feed = { y_: batch['y_label'], row_indices: batch['x_row_indices'], col_indices: batch['x_col_indices'], values: batch['x_values'] } _, pred, trained_weights = sess.run( [optimizer, y, w], feed_dict=feed) print( "trained_weights: {}".format(trained_weights)) # wait for MonitoredTrainingSession to save last checkpoint time.sleep(10) args = {} estimator = TFEstimator(sparse_train, args) \ .setInputMapping({'labels': 'y_label', 'row_indices': 'x_row_indices', 'col_indices': 'x_col_indices', 'values': 'x_values'}) \ .setInputMode(TFCluster.InputMode.SPARK) \ .setModelDir(self.model_dir) \ .setClusterSize(self.num_workers) \ .setNumPS(1) \ .setBatchSize(1) model_weights = np.array( [[1.0, 1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0]]).T examples = [ scipy.sparse.random( 1, 10, density=0.5, ) for i in range(200) ] rdd = self.sc.parallelize(examples).map( lambda e: ((e * model_weights).tolist()[0][0], e.row.tolist(), e.col.tolist(), e.data.tolist())) df = rdd.toDF(["labels", "row_indices", "col_indices", "values"]) df.show(5) model = estimator.fit(df) model.setOutputMapping({ 'label': 'label', 'y/SparseTensorDenseMatMul': 'predictions' }) test_examples = [ scipy.sparse.random( 1, 10, density=0.5, ) for i in range(50) ] test_rdd = self.sc.parallelize(test_examples).map( lambda e: ((e * model_weights).tolist()[0][0], e.row.tolist(), e.col.tolist(), e.data.tolist())) test_df = test_rdd.toDF( ["labels", "row_indices", "col_indices", "values"]) test_df.show(5) preds = model.transform(test_df) preds.show(5)