Esempio n. 1
0
    def test_tf_column_filter(self):
        """InputMode.TENSORFLOW TFEstimator saving temporary TFRecords, filtered by input_mapping columns"""

        # create a Spark DataFrame of training examples (features, labels)
        trainDF = self.spark.createDataFrame(self.train_examples,
                                             ['col1', 'col2'])

        # and add some extra columns
        df = trainDF.withColumn('extra1', trainDF.col1)
        df = df.withColumn('extra2', trainDF.col2)
        self.assertEqual(len(df.columns), 4)

        # train model
        args = {}
        estimator = TFEstimator(self.get_function('tf/train'), args, export_fn=self.get_function('tf/export')) \
                      .setInputMapping({'col1': 'x', 'col2': 'y_'}) \
                      .setInputMode(TFCluster.InputMode.TENSORFLOW) \
                      .setModelDir(self.model_dir) \
                      .setExportDir(self.export_dir) \
                      .setTFRecordDir(self.tfrecord_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(1) \
                      .setBatchSize(10)
        estimator.fit(df)
        self.assertTrue(os.path.isdir(self.model_dir))
        self.assertTrue(os.path.isdir(self.tfrecord_dir))

        df_tmp = dfutil.loadTFRecords(self.sc, self.tfrecord_dir)
        self.assertEqual(df_tmp.columns, ['col1', 'col2'])
Esempio n. 2
0
    def test_tf_checkpoint_with_export_fn(self):
        """InputMode.TENSORFLOW TFEstimator w/ a separate saved_model export function to add placeholders for InputMode.SPARK TFModel inferencing"""

        # create a Spark DataFrame of training examples (features, labels)
        trainDF = self.spark.createDataFrame(self.train_examples,
                                             ['col1', 'col2'])

        # train model
        args = {}
        estimator = TFEstimator(self.get_function('tf/train'), args, export_fn=self.get_function('tf/export')) \
                      .setInputMapping( { 'col1': 'x', 'col2': 'y_' }) \
                      .setInputMode(TFCluster.InputMode.TENSORFLOW) \
                      .setModelDir(self.model_dir) \
                      .setExportDir(self.export_dir) \
                      .setTFRecordDir(self.tfrecord_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(1) \
                      .setBatchSize(10)
        model = estimator.fit(trainDF)
        self.assertTrue(os.path.isdir(self.model_dir))

        # create a Spark DataFrame of test examples (features, labels)
        testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2'])

        # test model from checkpoint, referencing tensors directly
        model.setTagSet('test_tag') \
            .setInputMapping( { 'c1': 'x' }) \
            .setOutputMapping( { 'y': 'cout1', 'y2': 'cout2' })
        preds = model.transform(testDF).head(
        )  # take first/only result, e.g. [ Row(cout=[4.758000373840332])]
        pred1, pred2 = preds.cout1[0], preds.cout2[0]
        self.assertAlmostEqual(pred1, np.sum(self.weights), 5)
        self.assertAlmostEqual(pred2, np.sum(self.weights)**2, 5)
Esempio n. 3
0
    def test_spark_checkpoint(self):
        """InputMode.SPARK TFEstimator w/ TFModel inferencing directly from model checkpoint"""

        # create a Spark DataFrame of training examples (features, labels)
        trainDF = self.spark.createDataFrame(self.train_examples,
                                             ['col1', 'col2'])

        # train model
        args = {}
        estimator = TFEstimator(self.get_function('spark/train'), args) \
                      .setInputMapping( { 'col1': 'x', 'col2': 'y_' }) \
                      .setModelDir(self.model_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(1) \
                      .setBatchSize(10) \
                      .setEpochs(2)
        model = estimator.fit(trainDF)
        self.assertTrue(os.path.isdir(self.model_dir))

        # create a Spark DataFrame of test examples (features, labels)
        testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2'])

        # test model from checkpoint, referencing tensors directly
        model.setInputMapping( { 'c1': 'x' }) \
            .setOutputMapping( { 'y': 'cout' })
        preds = model.transform(testDF).head(
        )  # take first/only result, e.g. [ Row(cout=[4.758000373840332])]
        pred = preds.cout[0]  # unpack scalar from tensor
        self.assertAlmostEqual(pred, np.sum(self.weights), 5)
Esempio n. 4
0
    def test_spark_saved_model(self):
        """InputMode.SPARK TFEstimator w/ explicit saved_model export for TFModel inferencing"""

        # create a Spark DataFrame of training examples (features, labels)
        trainDF = self.spark.createDataFrame(self.train_examples,
                                             ['col1', 'col2'])

        # train and export model
        args = {}
        estimator = TFEstimator(self.get_function('spark/train'), args) \
                      .setInputMapping({'col1': 'x', 'col2': 'y_'}) \
                      .setModelDir(self.model_dir) \
                      .setExportDir(self.export_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(1) \
                      .setBatchSize(10) \
                      .setEpochs(2)
        model = estimator.fit(trainDF)
        self.assertTrue(os.path.isdir(self.export_dir))

        # create a Spark DataFrame of test examples (features, labels)
        testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2'])

        # test saved_model using exported signature
        model.setTagSet('test_tag') \
              .setSignatureDefKey('test_key') \
              .setInputMapping({'c1': 'features'}) \
              .setOutputMapping({'prediction': 'cout'})
        preds = model.transform(testDF).head()  # take first/only result
        pred = preds.cout[0]  # unpack scalar from tensor
        expected = np.sum(self.weights)
        self.assertAlmostEqual(pred, expected, 5)

        # test saved_model using custom/direct mapping
        model.setTagSet('test_tag') \
              .setSignatureDefKey(None) \
              .setInputMapping({'c1': 'x'}) \
              .setOutputMapping({'y': 'cout1', 'y2': 'cout2'})
        preds = model.transform(testDF).head()  # take first/only result
        pred = preds.cout1[0]  # unpack pred scalar from tensor
        squared_pred = preds.cout2[0]  # unpack squared pred from tensor

        self.assertAlmostEqual(pred, expected, 5)
        self.assertAlmostEqual(squared_pred, expected * expected, 5)
Esempio n. 5
0
    def test_spark_saved_model(self):
        """InputMode.SPARK TFEstimator w/ explicit saved_model export for TFModel inferencing"""
        def _spark_train(args, ctx):
            """Basic linear regression in a distributed TF cluster using InputMode.SPARK"""
            import tensorflow as tf
            from tensorflowonspark import TFNode

            tf.compat.v1.reset_default_graph()
            strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

            with strategy.scope():
                model = Sequential()
                model.add(Dense(1, activation='linear', input_shape=[2]))
                model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.2),
                              loss='mse',
                              metrics=['mse'])
                model.summary()

            tf_feed = TFNode.DataFeed(ctx.mgr,
                                      input_mapping=args.input_mapping)

            def rdd_generator():
                while not tf_feed.should_stop():
                    batch = tf_feed.next_batch(1)
                    if len(batch['x']) > 0:
                        features = batch['x'][0]
                        label = batch['y_'][0]
                        yield (features, label)
                    else:
                        return

            ds = tf.data.Dataset.from_generator(
                rdd_generator, (tf.float32, tf.float32),
                (tf.TensorShape([2]), tf.TensorShape([1])))
            ds = ds.batch(args.batch_size)

            # disable auto-sharding dataset
            options = tf.data.Options()
            options.experimental_distribute.auto_shard = False
            ds = ds.with_options(options)

            # only train 90% of each epoch to account for uneven RDD partition sizes
            steps_per_epoch = 1000 * 0.9 // (args.batch_size * ctx.num_workers)

            tf.io.gfile.makedirs(args.model_dir)
            filepath = args.model_dir + "/weights-{epoch:04d}"
            callbacks = [
                tf.keras.callbacks.ModelCheckpoint(
                    filepath=filepath,
                    verbose=1,
                    load_weights_on_restart=True,
                    save_weights_only=True)
            ]

            model.fit(ds,
                      epochs=args.epochs,
                      steps_per_epoch=steps_per_epoch,
                      callbacks=callbacks)

            # This fails with: "NotImplementedError: `fit_generator` is not supported for models compiled with tf.distribute.Strategy"
            # model.fit_generator(ds, epochs=args.epochs, steps_per_epoch=steps_per_epoch, callbacks=callbacks)

            if ctx.job_name == 'chief' and args.export_dir:
                print("exporting model to: {}".format(args.export_dir))
                tf.keras.experimental.export_saved_model(
                    model, args.export_dir)

            tf_feed.terminate()

        # create a Spark DataFrame of training examples (features, labels)
        rdd = self.sc.parallelize(self.train_examples, 2)
        trainDF = rdd.toDF(['col1', 'col2'])

        # train and export model
        args = {}
        estimator = TFEstimator(_spark_train, args) \
                      .setInputMapping({'col1': 'x', 'col2': 'y_'}) \
                      .setModelDir(self.model_dir) \
                      .setExportDir(self.export_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(0) \
                      .setBatchSize(1) \
                      .setEpochs(1)
        model = estimator.fit(trainDF)
        self.assertTrue(os.path.isdir(self.export_dir))

        # create a Spark DataFrame of test examples (features, labels)
        testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2'])

        # test saved_model using exported signature
        model.setTagSet('serve') \
              .setSignatureDefKey('serving_default') \
              .setInputMapping({'c1': 'dense_input'}) \
              .setOutputMapping({'dense': 'cout'})
        preds = model.transform(testDF).head()  # take first/only result
        pred = preds.cout[0]  # unpack scalar from tensor
        expected = np.sum(self.weights)
        self.assertAlmostEqual(pred, expected, 2)
        images_labels = sc.textFile(args.images_labels).map(parse)
        df = spark.createDataFrame(images_labels, ['image', 'label'])

    df.show()

    if args.mode == 'train':
        estimator = TFEstimator(main_fun, args) \
            .setInputMapping({'image': 'image', 'label': 'label'}) \
            .setModelDir(args.model_dir) \
            .setExportDir(args.export_dir) \
            .setClusterSize(args.cluster_size) \
            .setTensorboard(args.tensorboard) \
            .setEpochs(args.epochs) \
            .setBatchSize(args.batch_size) \
            .setGraceSecs(60)
        model = estimator.fit(df)
    else:  # args.mode == 'inference':
        # using a trained/exported model
        model = TFModel(args) \
            .setInputMapping({'image': 'conv2d_input'}) \
            .setOutputMapping({'dense_1': 'prediction'}) \
            .setSignatureDefKey('serving_default') \
            .setExportDir(args.export_dir) \
            .setBatchSize(args.batch_size)

        def argmax_fn(l):
            return max(range(len(l)), key=lambda i: l[i])

        argmax = udf(argmax_fn, IntegerType())

        preds = model.transform(df).withColumn('argmax', argmax('prediction'))
Esempio n. 7
0
    def test_spark_sparse_tensor(self):
        """InputMode.SPARK feeding sparse tensors"""
        def sparse_train(args, ctx):
            import tensorflow as tf

            # reset graph in case we're re-using a Spark python worker (during tests)
            tf.reset_default_graph()

            cluster, server = ctx.start_cluster_server(ctx)
            if ctx.job_name == "ps":
                server.join()
            elif ctx.job_name == "worker":
                with tf.device(
                        tf.train.replica_device_setter(
                            worker_device="/job:worker/task:%d" %
                            ctx.task_index,
                            cluster=cluster)):
                    y_ = tf.placeholder(tf.float32, name='y_label')
                    label = tf.identity(y_, name='label')

                    row_indices = tf.placeholder(tf.int64,
                                                 name='x_row_indices')
                    col_indices = tf.placeholder(tf.int64,
                                                 name='x_col_indices')
                    values = tf.placeholder(tf.float32, name='x_values')
                    indices = tf.stack([row_indices[0], col_indices[0]],
                                       axis=1)
                    data = values[0]

                    x = tf.SparseTensor(indices=indices,
                                        values=data,
                                        dense_shape=[args.batch_size, 10])
                    w = tf.Variable(tf.truncated_normal([10, 1]), name='w')
                    y = tf.sparse_tensor_dense_matmul(x, w, name='y')

                    global_step = tf.train.get_or_create_global_step()
                    cost = tf.reduce_mean(tf.square(y_ - y), name='cost')
                    optimizer = tf.train.GradientDescentOptimizer(
                        0.1).minimize(cost, global_step)

                with tf.train.MonitoredTrainingSession(
                        master=server.target,
                        is_chief=(ctx.task_index == 0),
                        checkpoint_dir=args.model_dir,
                        save_checkpoint_steps=20) as sess:
                    tf_feed = ctx.get_data_feed(
                        input_mapping=args.input_mapping)
                    while not sess.should_stop() and not tf_feed.should_stop():
                        batch = tf_feed.next_batch(args.batch_size)
                        if len(batch['y_label']) > 0:
                            print("batch: {}".format(batch))
                            feed = {
                                y_: batch['y_label'],
                                row_indices: batch['x_row_indices'],
                                col_indices: batch['x_col_indices'],
                                values: batch['x_values']
                            }
                            _, pred, trained_weights = sess.run(
                                [optimizer, y, w], feed_dict=feed)
                            print(
                                "trained_weights: {}".format(trained_weights))

                # wait for MonitoredTrainingSession to save last checkpoint
                time.sleep(10)

        args = {}
        estimator = TFEstimator(sparse_train, args) \
                  .setInputMapping({'labels': 'y_label', 'row_indices': 'x_row_indices', 'col_indices': 'x_col_indices', 'values': 'x_values'}) \
                  .setInputMode(TFCluster.InputMode.SPARK) \
                  .setModelDir(self.model_dir) \
                  .setClusterSize(self.num_workers) \
                  .setNumPS(1) \
                  .setBatchSize(1)

        model_weights = np.array(
            [[1.0, 1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0]]).T
        examples = [
            scipy.sparse.random(
                1,
                10,
                density=0.5,
            ) for i in range(200)
        ]
        rdd = self.sc.parallelize(examples).map(
            lambda e: ((e * model_weights).tolist()[0][0], e.row.tolist(),
                       e.col.tolist(), e.data.tolist()))
        df = rdd.toDF(["labels", "row_indices", "col_indices", "values"])
        df.show(5)
        model = estimator.fit(df)

        model.setOutputMapping({
            'label': 'label',
            'y/SparseTensorDenseMatMul': 'predictions'
        })
        test_examples = [
            scipy.sparse.random(
                1,
                10,
                density=0.5,
            ) for i in range(50)
        ]
        test_rdd = self.sc.parallelize(test_examples).map(
            lambda e: ((e * model_weights).tolist()[0][0], e.row.tolist(),
                       e.col.tolist(), e.data.tolist()))
        test_df = test_rdd.toDF(
            ["labels", "row_indices", "col_indices", "values"])
        test_df.show(5)
        preds = model.transform(test_df)
        preds.show(5)