Python TFCluster.reserve Examples

Programming Language: Python

Namespace/Package Name: com.yahoo.ml.tf

Class/Type: TFCluster

Method/Function: reserve

Examples at hotexamples.com: 5

Python TFCluster.reserve - 5 examples found. These are the top rated real world Python examples of com.yahoo.ml.tf.TFCluster.reserve extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

reserve(5)

run(5)

Frequently Used Methods

reserve (5)

run (5)

Example #1

Show file

  def toNumpy(bytestr):
    example = tf.train.Example()
    example.ParseFromString(bytestr)
    features = example.features.feature
    image = numpy.array(features['image'].int64_list.value)
    label = numpy.array(features['label'].int64_list.value)
    return (image, label)
  dataRDD = images.map(lambda x: toNumpy(str(x[0])))
else:
  if args.format == "csv":
    images = sc.textFile(args.images).map(lambda ln: [int(x) for x in ln.split(',')])
    labels = sc.textFile(args.labels).map(lambda ln: [float(x) for x in ln.split(',')])
  else: # args.format == "pickle":
    images = sc.pickleFile(args.images)
    labels = sc.pickleFile(args.labels)
  print("zipping images and labels")
  dataRDD = images.zip(labels)


cluster = TFCluster.reserve(sc, args.cluster_size, num_ps, args.tensorboard, TFCluster.InputMode.SPARK)
cluster.start(mnist_dist.map_fun, args)
if args.mode == "train":
  cluster.train(dataRDD, args.epochs)
else:
  labelRDD = cluster.inference(dataRDD)
  labelRDD.saveAsTextFile(args.output)
cluster.shutdown()

print("{0} ===== Stop".format(datetime.now().isoformat()))

Example #2

Show file

File: imagenet_eval.py Project: zofuthan/TensorFlowOnSpark

    from inception import inception_eval
    from inception.imagenet_data import ImagenetData

    print("argv:", argv)
    sys.argv = argv

    FLAGS = tf.app.flags.FLAGS
    FLAGS._parse_flags()
    print("FLAGS:", FLAGS.__dict__['__flags'])

    dataset = ImagenetData(subset=FLAGS.subset)
    assert dataset.data_files()
    if tf.gfile.Exists(FLAGS.eval_dir):
        tf.gfile.DeleteRecursively(FLAGS.eval_dir)
    tf.gfile.MakeDirs(FLAGS.eval_dir)

    cluster_spec, server = TFNode.start_cluster_server(ctx, 1, FLAGS.rdma)

    inception_eval.evaluate(dataset)


if __name__ == '__main__':
    sc = SparkContext(conf=SparkConf().setAppName("grid_imagenet_eval"))
    num_executors = int(sc._conf.get("spark.executor.instances"))
    num_ps = 0

    cluster = TFCluster.reserve(sc, num_executors, num_ps, False,
                                TFCluster.InputMode.TENSORFLOW)
    cluster.start(main_fun, sys.argv)
    cluster.shutdown()

Example #3

Show file

File: imagenet_distributed_train.py Project: zofuthan/TensorFlowOnSpark

    parser.add_argument("--input_mode",
                        help="method to ingest data: (spark|tf)",
                        choices=["spark", "tf"],
                        default="tf")
    parser.add_argument("--tensorboard",
                        help="launch tensorboard process",
                        action="store_true")

    (args, rem) = parser.parse_known_args()

    input_mode = TFCluster.InputMode.SPARK if args.input_mode == 'spark' else TFCluster.InputMode.TENSORFLOW

    print("{0} ===== Start".format(datetime.now().isoformat()))
    sc = SparkContext(
        conf=SparkConf().setAppName('imagenet_distributed_train'))
    num_executors = int(sc._conf.get("spark.executor.instances"))
    num_ps = 1

    cluster = TFCluster.reserve(sc, num_executors, num_ps, args.tensorboard,
                                input_mode)
    cluster.start(main_fun, sys.argv)
    if input_mode == TFCluster.InputMode.SPARK:
        dataRDD = sc.newAPIHadoopFile(
            args.input_data,
            "org.tensorflow.hadoop.io.TFRecordFileInputFormat",
            keyClass="org.apache.hadoop.io.BytesWritable",
            valueClass="org.apache.hadoop.io.NullWritable")
        cluster.train(dataRDD, args.epochs)
    cluster.shutdown()
    print("{0} ===== Stop".format(datetime.now().isoformat()))

Example #4

Show file

File: dlnd_image_classification.py Project: carlo379/DeepLearning_ImageClassification

        helper.display_image_predictions(random_test_features,
                                         random_test_labels,
                                         random_test_predictions)


test_model()


# def main():
def main_fun(argv, ctx):
    pass


if __name__ == '__main__':
    # tf.app.run()
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("--tensorboard",
                        help="launch tensorboard process",
                        action="store_true")
    args, rem = parser.parse_known_args()

    sc = SparkContext(conf=SparkConf().setAppName("your_app_name"))
    num_executors = int(sc._conf.get("spark.executor.instances"))
    num_ps = 1
    tensorboard = True

    cluster = TFCluster.reserve(sc, num_executors, num_ps, tensorboard,
                                TFCluster.InputMode.TENSORFLOW)
    cluster.start(main_fun, sys.argv)
    cluster.shutdown()

Example #5

Show file

                sync_optimizer=optimizer if FLAGS.sync_replicas else None)


if __name__ == '__main__':
    import argparse

    sc = SparkContext(conf=SparkConf().setAppName("train_image_classifier"))
    executors = sc._conf.get("spark.executor.instances")
    num_executors = int(executors) if executors is not None else 1

    parser = argparse.ArgumentParser()
    parser.add_argument("--num_ps_tasks",
                        help="number of PS nodes",
                        type=int,
                        default=0)
    parser.add_argument("--tensorboard",
                        help="launch tensorboard process",
                        action="store_true")
    parser.add_argument("--cluster_size",
                        help="number of nodes in the cluster",
                        type=int,
                        default=num_executors)
    (args, rem) = parser.parse_known_args()

    assert (num_executors > args.num_ps_tasks)
    cluster = TFCluster.reserve(sc, args.cluster_size, args.num_ps_tasks,
                                args.tensorboard,
                                TFCluster.InputMode.TENSORFLOW)
    cluster.start(main_fun, sys.argv)
    cluster.shutdown()