def test_spark_saved_model(self):
        """InputMode.SPARK TFEstimator w/ explicit saved_model export for TFModel inferencing"""

        # create a Spark DataFrame of training examples (features, labels)
        trainDF = self.spark.createDataFrame(self.train_examples,
                                             ['col1', 'col2'])

        # train and export model
        args = {}
        estimator = TFEstimator(self.get_function('spark/train'), args) \
                      .setInputMapping({'col1': 'x', 'col2': 'y_'}) \
                      .setModelDir(self.model_dir) \
                      .setExportDir(self.export_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(1) \
                      .setBatchSize(10) \
                      .setEpochs(2)
        model = estimator.fit(trainDF)
        self.assertTrue(os.path.isdir(self.export_dir))

        # create a Spark DataFrame of test examples (features, labels)
        testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2'])

        # test saved_model using exported signature
        model.setTagSet('test_tag') \
              .setSignatureDefKey('test_key') \
              .setInputMapping({'c1': 'features'}) \
              .setOutputMapping({'prediction': 'cout'})
        preds = model.transform(testDF).head()  # take first/only result
        pred = preds.cout[0]  # unpack scalar from tensor
        expected = np.sum(self.weights)
        self.assertAlmostEqual(pred, expected, 5)

        # test saved_model using custom/direct mapping
        model.setTagSet('test_tag') \
              .setSignatureDefKey(None) \
              .setInputMapping({'c1': 'x'}) \
              .setOutputMapping({'y': 'cout1', 'y2': 'cout2'})
        preds = model.transform(testDF).head()  # take first/only result
        pred = preds.cout1[0]  # unpack pred scalar from tensor
        squared_pred = preds.cout2[0]  # unpack squared pred from tensor

        self.assertAlmostEqual(pred, expected, 5)
        self.assertAlmostEqual(squared_pred, expected * expected, 5)
Example #2
0
def get_tf_estimator(args):
    """ 返回一个 tensorflow estimator """
    estimator = TFEstimator(softmax_dist.map_fun, args) \
        .setInputMapping({args.feature_alias: args.feature_alias,
                          args.label_name: args.label_name}) \
        .setModelDir(args.model_dir) \
        .setExportDir(args.export_dir) \
        .setClusterSize(args.cluster_size) \
        .setNumPS(args.num_ps) \
        .setProtocol(args.protocol) \
        .setTensorboard(args.tensorboard) \
        .setEpochs(args.epochs) \
        .setBatchSize(args.batch_size) \
        .setSteps(args.steps)
    return estimator
Example #3
0
    def test_spark_saved_model(self):
        """InputMode.SPARK TFEstimator w/ explicit saved_model export for TFModel inferencing"""
        def _spark_train(args, ctx):
            """Basic linear regression in a distributed TF cluster using InputMode.SPARK"""
            import tensorflow as tf
            from tensorflowonspark import TFNode

            tf.compat.v1.reset_default_graph()
            strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

            with strategy.scope():
                model = Sequential()
                model.add(Dense(1, activation='linear', input_shape=[2]))
                model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.2),
                              loss='mse',
                              metrics=['mse'])
                model.summary()

            tf_feed = TFNode.DataFeed(ctx.mgr,
                                      input_mapping=args.input_mapping)

            def rdd_generator():
                while not tf_feed.should_stop():
                    batch = tf_feed.next_batch(1)
                    if len(batch['x']) > 0:
                        features = batch['x'][0]
                        label = batch['y_'][0]
                        yield (features, label)
                    else:
                        return

            ds = tf.data.Dataset.from_generator(
                rdd_generator, (tf.float32, tf.float32),
                (tf.TensorShape([2]), tf.TensorShape([1])))
            ds = ds.batch(args.batch_size)

            # disable auto-sharding dataset
            options = tf.data.Options()
            options.experimental_distribute.auto_shard = False
            ds = ds.with_options(options)

            # only train 90% of each epoch to account for uneven RDD partition sizes
            steps_per_epoch = 1000 * 0.9 // (args.batch_size * ctx.num_workers)

            tf.io.gfile.makedirs(args.model_dir)
            filepath = args.model_dir + "/weights-{epoch:04d}"
            callbacks = [
                tf.keras.callbacks.ModelCheckpoint(
                    filepath=filepath,
                    verbose=1,
                    load_weights_on_restart=True,
                    save_weights_only=True)
            ]

            model.fit(ds,
                      epochs=args.epochs,
                      steps_per_epoch=steps_per_epoch,
                      callbacks=callbacks)

            # This fails with: "NotImplementedError: `fit_generator` is not supported for models compiled with tf.distribute.Strategy"
            # model.fit_generator(ds, epochs=args.epochs, steps_per_epoch=steps_per_epoch, callbacks=callbacks)

            if ctx.job_name == 'chief' and args.export_dir:
                print("exporting model to: {}".format(args.export_dir))
                tf.keras.experimental.export_saved_model(
                    model, args.export_dir)

            tf_feed.terminate()

        # create a Spark DataFrame of training examples (features, labels)
        rdd = self.sc.parallelize(self.train_examples, 2)
        trainDF = rdd.toDF(['col1', 'col2'])

        # train and export model
        args = {}
        estimator = TFEstimator(_spark_train, args) \
                      .setInputMapping({'col1': 'x', 'col2': 'y_'}) \
                      .setModelDir(self.model_dir) \
                      .setExportDir(self.export_dir) \
                      .setClusterSize(self.num_workers) \
                      .setNumPS(0) \
                      .setBatchSize(1) \
                      .setEpochs(1)
        model = estimator.fit(trainDF)
        self.assertTrue(os.path.isdir(self.export_dir))

        # create a Spark DataFrame of test examples (features, labels)
        testDF = self.spark.createDataFrame(self.test_examples, ['c1', 'c2'])

        # test saved_model using exported signature
        model.setTagSet('serve') \
              .setSignatureDefKey('serving_default') \
              .setInputMapping({'c1': 'dense_input'}) \
              .setOutputMapping({'dense': 'cout'})
        preds = model.transform(testDF).head()  # take first/only result
        pred = preds.cout[0]  # unpack scalar from tensor
        expected = np.sum(self.weights)
        self.assertAlmostEqual(pred, expected, 2)
        # create RDD of input data
        def parse(ln):
            vec = [int(x) for x in ln.split(',')]
            return (vec[1:], vec[0])

        images_labels = sc.textFile(args.images_labels).map(parse)
        df = spark.createDataFrame(images_labels, ['image', 'label'])

    df.show()

    if args.mode == 'train':
        estimator = TFEstimator(main_fun, args) \
            .setInputMapping({'image': 'image', 'label': 'label'}) \
            .setModelDir(args.model_dir) \
            .setExportDir(args.export_dir) \
            .setClusterSize(args.cluster_size) \
            .setTensorboard(args.tensorboard) \
            .setEpochs(args.epochs) \
            .setBatchSize(args.batch_size) \
            .setGraceSecs(60)
        model = estimator.fit(df)
    else:  # args.mode == 'inference':
        # using a trained/exported model
        model = TFModel(args) \
            .setInputMapping({'image': 'conv2d_input'}) \
            .setOutputMapping({'dense_1': 'prediction'}) \
            .setSignatureDefKey('serving_default') \
            .setExportDir(args.export_dir) \
            .setBatchSize(args.batch_size)

        def argmax_fn(l):
Example #5
0
    def test_spark_sparse_tensor(self):
        """InputMode.SPARK feeding sparse tensors"""
        def sparse_train(args, ctx):
            import tensorflow as tf

            # reset graph in case we're re-using a Spark python worker (during tests)
            tf.reset_default_graph()

            cluster, server = ctx.start_cluster_server(ctx)
            if ctx.job_name == "ps":
                server.join()
            elif ctx.job_name == "worker":
                with tf.device(
                        tf.train.replica_device_setter(
                            worker_device="/job:worker/task:%d" %
                            ctx.task_index,
                            cluster=cluster)):
                    y_ = tf.placeholder(tf.float32, name='y_label')
                    label = tf.identity(y_, name='label')

                    row_indices = tf.placeholder(tf.int64,
                                                 name='x_row_indices')
                    col_indices = tf.placeholder(tf.int64,
                                                 name='x_col_indices')
                    values = tf.placeholder(tf.float32, name='x_values')
                    indices = tf.stack([row_indices[0], col_indices[0]],
                                       axis=1)
                    data = values[0]

                    x = tf.SparseTensor(indices=indices,
                                        values=data,
                                        dense_shape=[args.batch_size, 10])
                    w = tf.Variable(tf.truncated_normal([10, 1]), name='w')
                    y = tf.sparse_tensor_dense_matmul(x, w, name='y')

                    global_step = tf.train.get_or_create_global_step()
                    cost = tf.reduce_mean(tf.square(y_ - y), name='cost')
                    optimizer = tf.train.GradientDescentOptimizer(
                        0.1).minimize(cost, global_step)

                with tf.train.MonitoredTrainingSession(
                        master=server.target,
                        is_chief=(ctx.task_index == 0),
                        checkpoint_dir=args.model_dir,
                        save_checkpoint_steps=20) as sess:
                    tf_feed = ctx.get_data_feed(
                        input_mapping=args.input_mapping)
                    while not sess.should_stop() and not tf_feed.should_stop():
                        batch = tf_feed.next_batch(args.batch_size)
                        if len(batch['y_label']) > 0:
                            print("batch: {}".format(batch))
                            feed = {
                                y_: batch['y_label'],
                                row_indices: batch['x_row_indices'],
                                col_indices: batch['x_col_indices'],
                                values: batch['x_values']
                            }
                            _, pred, trained_weights = sess.run(
                                [optimizer, y, w], feed_dict=feed)
                            print(
                                "trained_weights: {}".format(trained_weights))

                # wait for MonitoredTrainingSession to save last checkpoint
                time.sleep(10)

        args = {}
        estimator = TFEstimator(sparse_train, args) \
                  .setInputMapping({'labels': 'y_label', 'row_indices': 'x_row_indices', 'col_indices': 'x_col_indices', 'values': 'x_values'}) \
                  .setInputMode(TFCluster.InputMode.SPARK) \
                  .setModelDir(self.model_dir) \
                  .setClusterSize(self.num_workers) \
                  .setNumPS(1) \
                  .setBatchSize(1)

        model_weights = np.array(
            [[1.0, 1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, -1.0]]).T
        examples = [
            scipy.sparse.random(
                1,
                10,
                density=0.5,
            ) for i in range(200)
        ]
        rdd = self.sc.parallelize(examples).map(
            lambda e: ((e * model_weights).tolist()[0][0], e.row.tolist(),
                       e.col.tolist(), e.data.tolist()))
        df = rdd.toDF(["labels", "row_indices", "col_indices", "values"])
        df.show(5)
        model = estimator.fit(df)

        model.setOutputMapping({
            'label': 'label',
            'y/SparseTensorDenseMatMul': 'predictions'
        })
        test_examples = [
            scipy.sparse.random(
                1,
                10,
                density=0.5,
            ) for i in range(50)
        ]
        test_rdd = self.sc.parallelize(test_examples).map(
            lambda e: ((e * model_weights).tolist()[0][0], e.row.tolist(),
                       e.col.tolist(), e.data.tolist()))
        test_df = test_rdd.toDF(
            ["labels", "row_indices", "col_indices", "values"])
        test_df.show(5)
        preds = model.transform(test_df)
        preds.show(5)
Example #6
0
                        help="HDFS path to validation data",
                        type=str)

    (args, rem) = parser.parse_known_args()

    input_mode = TFCluster.InputMode.SPARK if args.input_mode == 'spark' else TFCluster.InputMode.TENSORFLOW

    print("{0} ===== Start".format(datetime.now().isoformat()))

    df = dfutil.loadTFRecords(sc,
                              args.train_data,
                              binary_features=['image/encoded'])
    estimator = TFEstimator(main_fun, sys.argv, export_fn=inception_export.export) \
            .setModelDir(args.train_dir) \
            .setExportDir(args.export_dir) \
            .setTFRecordDir(args.tfrecord_dir) \
            .setClusterSize(args.cluster_size) \
            .setNumPS(args.num_ps) \
            .setInputMode(TFCluster.InputMode.TENSORFLOW) \
            .setTensorboard(args.tensorboard) \

    print("{0} ===== Train".format(datetime.now().isoformat()))
    model = estimator.fit(df)

    print("{0} ===== Inference".format(datetime.now().isoformat()))
    df = dfutil.loadTFRecords(sc,
                              args.validation_data,
                              binary_features=['image/encoded'])
    preds = model.setTagSet(tf.saved_model.tag_constants.SERVING) \
                .setSignatureDefKey(tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY) \
                .setInputMapping({'image/encoded': 'jpegs', 'image/class/label': 'labels'}) \
                .setOutputMapping({'top_5_acc': 'output'}) \
Example #7
0
if args.train:
    # train a model using Spark Estimator fitted to a DataFrame
    print("{0} ===== Estimator.fit()".format(datetime.now().isoformat()))
    # dummy tf args (from imagenet/inception example)
    tf_args = {
        'initial_learning_rate': 0.045,
        'num_epochs_per_decay': 2.0,
        'learning_rate_decay_factor': 0.94
    }
    estimator = TFEstimator(mnist_dist_pipeline.map_fun, args, export_fn=mnist_dist_pipeline.export_fun) \
            .setModelDir(args.model_dir) \
            .setExportDir(args.export_dir) \
            .setClusterSize(args.cluster_size) \
            .setNumPS(args.num_ps) \
            .setInputMode(TFCluster.InputMode.TENSORFLOW) \
            .setTFRecordDir(args.tfrecord_dir) \
            .setProtocol(args.protocol) \
            .setReaders(args.readers) \
            .setTensorboard(args.tensorboard) \
            .setEpochs(args.epochs) \
            .setBatchSize(args.batch_size) \
            .setSteps(args.steps)
    model = estimator.fit(df)
else:
    # use a previously trained/exported model
    model = TFModel(args) \
          .setExportDir(args.export_dir) \
          .setBatchSize(args.batch_size)

# NO INFERENCING
if args.inference_mode == 'none':
else:
  raise Exception("Unsupported format: {}".format(args.format))

# Pipeline API

if args.train:
  # train a model using Spark Estimator fitted to a DataFrame
  print("{0} ===== Estimator.fit()".format(datetime.now().isoformat()))
  # dummy tf args (from imagenet/inception example)
  tf_args = {'initial_learning_rate': 0.045, 'num_epochs_per_decay': 2.0, 'learning_rate_decay_factor': 0.94}
  estimator = TFEstimator(mnist_dist_pipeline.map_fun, tf_args) \
      .setInputMapping({'image': 'image', 'label': 'label'}) \
      .setModelDir(args.model_dir) \
      .setExportDir(args.export_dir) \
      .setClusterSize(args.cluster_size) \
      .setNumPS(args.num_ps) \
      .setProtocol(args.protocol) \
      .setTensorboard(args.tensorboard) \
      .setEpochs(args.epochs) \
      .setBatchSize(args.batch_size) \
      .setSteps(args.steps)
  model = estimator.fit(df)
else:
  # use a previously trained/exported model
  model = TFModel(args) \
      .setExportDir(args.export_dir) \
      .setBatchSize(args.batch_size)

# NO INFERENCING
if args.inference_mode == 'none':
  sys.exit(0)