Esempio n. 1
0
def main():
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode.startswith("yarn"):
        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
                "set the environment variable HADOOP_CONF_DIR"
        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
            .set("spark.executor.cores", 2) \
            .set("spark.executor.instances", 2) \
            .set("spark.driver.memory", "2g")
        if cluster_mode == "yarn-client":
            sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf)
        else:
            sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf)
    else:
        sc = init_nncontext()

    def model_fn(features, labels, mode):
        from nets import lenet
        slim = tf.contrib.slim
        with slim.arg_scope(lenet.lenet_arg_scope()):
            logits, end_points = lenet.lenet(features, num_classes=10, is_training=True)

        if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN:
            loss = tf.reduce_mean(
                tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

            optimizer = ZooOptimizer(tf.train.AdamOptimizer())
            train_op = optimizer.minimize(loss)
            return tf.estimator.EstimatorSpec(mode, predictions=logits,
                                              loss=loss, train_op=train_op)
        else:
            return tf.estimator.EstimatorSpec(mode, predictions=logits)

    def input_fn(mode):
        if mode == tf.estimator.ModeKeys.TRAIN:
            training_data = get_data("train")
            dataset = TFDataset.from_ndarrays(training_data, batch_size=320)
        elif mode == tf.estimator.ModeKeys.EVAL:
            testing_data = get_data("test")
            dataset = TFDataset.from_ndarrays(testing_data, batch_per_thread=80)
        else:
            images, _ = get_data("test")
            dataset = TFDataset.from_ndarrays(images, batch_per_thread=80)

        return dataset
    estimator = TFEstimator.from_model_fn(model_fn, model_dir="/tmp/estimator")

    estimator.train(input_fn, steps=10)

    metrics = estimator.evaluate(input_fn, ["acc"])
    print(metrics)

    predictions = estimator.predict(input_fn)

    print(predictions.first())
    print("finished...")
    sc.stop()
Esempio n. 2
0
    def test_estimator_for_imageset(self):

        model_fn = self.create_model_fn()
        input_fn = self.create_imageset_input_fn()

        estimator = TFEstimator.from_model_fn(model_fn)
        estimator.train(input_fn, steps=1)
        estimator.evaluate(input_fn, ["acc"])
        results = estimator.predict(input_fn).get_predict().collect()
        assert all(r[1] is not None for r in results)
Esempio n. 3
0
    def test_estimator_without_batch(self):
        def model_fn(features, labels, mode):

            assert features.shape.ndims == 1
            if labels is not None:
                assert labels.shape.ndims == 0

            features = tf.expand_dims(features, axis=0)

            h1 = tf.layers.dense(features, 64, activation=tf.nn.relu)
            h2 = tf.layers.dense(h1, 64, activation=tf.nn.relu)
            logits = tf.layers.dense(h2, 10)

            if mode == tf.estimator.ModeKeys.EVAL or mode == tf.estimator.ModeKeys.TRAIN:
                labels = tf.expand_dims(labels, axis=0)
                loss = tf.reduce_mean(
                    tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                           labels=labels))
                train_op = ZooOptimizer(
                    tf.train.AdamOptimizer()).minimize(loss)
                return tf.estimator.EstimatorSpec(mode,
                                                  train_op=train_op,
                                                  predictions=logits,
                                                  loss=loss)
            else:
                return tf.estimator.EstimatorSpec(mode, predictions=logits)

        def input_fn(mode):
            np.random.seed(20)
            x = np.random.rand(20, 10)
            y = np.random.randint(0, 10, (20))

            rdd_x = self.sc.parallelize(x)
            rdd_y = self.sc.parallelize(y)

            rdd = rdd_x.zip(rdd_y)
            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                dataset = TFDataset.from_rdd(rdd,
                                             features=(tf.float32, [10]),
                                             labels=(tf.int32, []))
            else:
                dataset = TFDataset.from_rdd(rdd_x,
                                             features=(tf.float32, [10]))
            return dataset

        estimator = TFEstimator.from_model_fn(model_fn)

        self.intercept(
            lambda: estimator.train(input_fn, steps=1),
            "The batch_size of TFDataset must be specified when used for training."
        )

        estimator.evaluate(input_fn, ["acc"])
        estimator.predict(input_fn).collect()
Esempio n. 4
0
    def test_init_TFDataset_from_ndarrays(self):

        model_fn = self.create_model_fn()

        def input_fn(mode):
            x = np.random.rand(20, 10)
            y = np.random.randint(0, 10, (20, ))
            if mode == tf.estimator.ModeKeys.TRAIN:
                return TFDataset.from_ndarrays((x, y), batch_size=8)
            elif mode == tf.estimator.ModeKeys.EVAL:
                return TFDataset.from_ndarrays((x, y), batch_per_thread=1)
            else:
                return TFDataset.from_ndarrays(x, batch_per_thread=1)

        estimator = TFEstimator.from_model_fn(model_fn)
        estimator.train(input_fn, 10)
        estimator.evaluate(input_fn, ["acc"])
        estimator.predict(input_fn)
def main(option):
    batch_size = 16 if not option.batch_size else int(option.batch_size)
    cluster_mode = options.cluster_mode
    if cluster_mode.startswith("yarn"):
        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
                "set the environment variable HADOOP_CONF_DIR"
        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
            .set("spark.executor.cores", 2) \
            .set("spark.executor.instances", 2) \
            .set("spark.driver.memory", "2g")
        if cluster_mode == "yarn-client":
            sc = init_nncontext(spark_conf,
                                cluster_mode="yarn-client",
                                hadoop_conf=hadoop_conf)
        else:
            sc = init_nncontext(spark_conf,
                                cluster_mode="yarn-cluster",
                                hadoop_conf=hadoop_conf)
    else:
        sc = init_nncontext()

    def input_fn(mode, params):

        if mode == tf.estimator.ModeKeys.TRAIN:
            image_set = ImageSet.read(params["image_path"],
                                      sc=sc,
                                      with_label=True,
                                      one_based_label=False)
            train_transformer = ChainedPreprocessing([
                ImageBytesToMat(),
                ImageResize(256, 256),
                ImageRandomCrop(224, 224),
                ImageRandomPreprocessing(ImageHFlip(), 0.5),
                ImageChannelNormalize(0.485, 0.456, 0.406, 0.229, 0.224,
                                      0.225),
                ImageMatToTensor(to_RGB=True, format="NHWC"),
                ImageSetToSample(input_keys=["imageTensor"],
                                 target_keys=["label"])
            ])
            feature_set = FeatureSet.image_frame(image_set.to_image_frame())
            feature_set = feature_set.transform(train_transformer)
            feature_set = feature_set.transform(ImageFeatureToSample())
            dataset = TFDataset.from_feature_set(feature_set,
                                                 features=(tf.float32,
                                                           [224, 224, 3]),
                                                 labels=(tf.int32, [1]),
                                                 batch_size=batch_size)
        else:
            raise NotImplementedError

        return dataset

    def model_fn(features, labels, mode, params):
        from nets import inception
        slim = tf.contrib.slim
        labels = tf.squeeze(labels, axis=1)
        with slim.arg_scope(inception.inception_v1_arg_scope()):
            logits, end_points = inception.inception_v1(
                features,
                num_classes=int(params["num_classes"]),
                is_training=True)

        if mode == tf.estimator.ModeKeys.TRAIN:
            loss = tf.reduce_mean(
                tf.losses.sparse_softmax_cross_entropy(logits=logits,
                                                       labels=labels))
            train_op = ZooOptimizer(tf.train.AdamOptimizer()).minimize(loss)
            return tf.estimator.EstimatorSpec(mode,
                                              train_op=train_op,
                                              predictions=logits,
                                              loss=loss)
        else:
            raise NotImplementedError

    estimator = TFEstimator.from_model_fn(model_fn,
                                          params={
                                              "image_path": option.image_path,
                                              "num_classes":
                                              option.num_classes,
                                              "batch_size": option.batch_size
                                          })

    estimator.train(input_fn, steps=100)
    print("finished...")
    sc.stop()
Esempio n. 6
0
    def test_estimator_for_feature_set(self):
        model_fn = self.create_model_fn()
        input_fn = self.create_train_feature_set_input_fn()

        estimator = TFEstimator.from_model_fn(model_fn)
        estimator.train(input_fn, steps=1)
Esempio n. 7
0
 def test_predict(self):
     model_fn = self.create_model_fn()
     input_fn = self.create_input_fn()
     estimator = TFEstimator.from_model_fn(model_fn)
     results = estimator.predict(input_fn).collect()
Esempio n. 8
0
 def test_evaluating(self):
     model_fn = self.create_model_fn()
     input_fn = self.create_input_fn()
     estimator = TFEstimator.from_model_fn(model_fn)
     eval_results = estimator.evaluate(input_fn, ["acc"])
     assert len(eval_results) > 0
Esempio n. 9
0
 def test_training(self):
     model_fn = self.create_model_fn()
     input_fn = self.create_input_fn()
     estimator = TFEstimator.from_model_fn(model_fn)
     estimator.train(input_fn, steps=60000 // 320)
Esempio n. 10
0
        vocabulary = dftrain[feature_name].unique()
        feature_columns.append(
            tf.feature_column.categorical_column_with_vocabulary_list(
                feature_name, vocabulary))

    for feature_name in NUMERIC_COLUMNS:
        feature_columns.append(
            tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

    sc = init_nncontext()

    linear_est = tf.estimator.LinearClassifier(
        feature_columns=feature_columns,
        optimizer=ZooOptimizer(tf.train.FtrlOptimizer(0.2)),
        model_dir="/tmp/estimator/linear")
    zoo_est = TFEstimator(linear_est)
    train_input_fn = make_input_fn(dftrain,
                                   y_train,
                                   mode=tf.estimator.ModeKeys.TRAIN,
                                   batch_size=32)
    zoo_est.train(train_input_fn, steps=200)

    eval_input_fn = make_input_fn(dfeval,
                                  y_eval,
                                  mode=tf.estimator.ModeKeys.EVAL,
                                  batch_per_thread=8)
    eval_result = zoo_est.evaluate(eval_input_fn, ["acc"])
    print(eval_result)

    pred_input_fn = make_input_fn(dfeval,
                                  y_eval,