Esempio n. 1
0
    def test_tfdataset_with_tf_data_dataset_which_requires_table(self):

        keys = [1, 0, -1]
        dataset = tf.data.Dataset.from_tensor_slices([1, 2, -1, 5] * 40)
        table = tf.contrib.lookup.HashTable(
            initializer=tf.contrib.lookup.KeyValueTensorInitializer(
                keys=keys, values=list(reversed(keys))),
            default_value=100)
        dataset = dataset.map(table.lookup)

        def transform(x):
            float_x = tf.to_float(x)
            return float_x, 1

        dataset = dataset.map(transform)
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16)
        seq = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=()),
            tf.keras.layers.Dense(10, activation="softmax")
        ])
        seq.compile(optimizer=tf.keras.optimizers.RMSprop(),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
        model = KerasModel(seq)
        model.fit(dataset)
Esempio n. 2
0
    def test_training_with_ndarry_distributed(self):
        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        model.fit(x, y, batch_size=4, distributed=True)
Esempio n. 3
0
    def test_training_and_validation_with_dataset(self):
        keras_model = self.create_model()
        model = KerasModel(keras_model)

        dataset = self.create_training_dataset()

        model.fit(dataset)
Esempio n. 4
0
    def test_training_with_ndarray(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        model.fit(x, y, batch_size=2)
Esempio n. 5
0
    def test_training_with_validation_data_distributed(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        val_x, val_y = self.create_training_data()

        model.fit(x, y, validation_data=(val_x, val_y), batch_size=4, distributed=True)
Esempio n. 6
0
    def test_training_with_validation_data_distributed_multi_heads(self):

        keras_model = self.create_multi_input_output_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        val_x, val_y = self.create_training_data()
        model.fit([x, x], [y, y],
                  validation_data=([val_x, val_x], [val_y, val_y]),
                  batch_size=4,
                  distributed=True)
Esempio n. 7
0
    def test_evaluate_with_ndarray_distributed(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y, batch_per_thread=1)

        model.fit(x, y, batch_size=4, epochs=10)

        results_after = model.evaluate(x, y, distributed=True, batch_per_thread=1)

        assert results_pre["loss"] > results_after["loss"]
Esempio n. 8
0
    def test_evaluate_with_ndarray(self):

        keras_model = self.create_model()
        model = KerasModel(keras_model)

        x, y = self.create_training_data()

        results_pre = model.evaluate(x, y)

        model.fit(x, y, batch_size=4, epochs=10)

        results_after = model.evaluate(x, y)

        assert results_pre["loss"] > results_after["loss"]
Esempio n. 9
0
    def check_dataset(self, create_ds):

        seq = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(20, )),
            tf.keras.layers.Dense(10, activation="softmax")
        ])

        seq.compile(optimizer=tf.keras.optimizers.RMSprop(),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
        model = KerasModel(seq)

        model.fit(create_ds("train"))
        model.predict(create_ds("predict")).collect()
        model.evaluate(create_ds("evaluate"))
Esempio n. 10
0
    def test_tensorflow_optimizer(self):
        data = tf.keras.layers.Input(shape=[10])

        x = tf.keras.layers.Flatten()(data)
        x = tf.keras.layers.Dense(10, activation='relu')(x)
        predictions = tf.keras.layers.Dense(2, activation='softmax')(x)

        model = tf.keras.models.Model(inputs=data, outputs=predictions)
        model.compile(optimizer=tf.train.AdamOptimizer(),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        keras_model = KerasModel(model)

        x, y = self.create_training_data()

        keras_model.fit(x, y, batch_size=4, distributed=True)
Esempio n. 11
0
    def test_invalid_data_handling(self):
        keras_model = self.create_multi_input_output_model()
        model = KerasModel(keras_model)
        x, y = self.create_training_data()
        val_x, val_y = self.create_training_data()

        # Number doesn't match
        with pytest.raises(AssertionError) as excinfo:
            model.fit([x, x], [y, y, y], batch_size=4, distributed=True)

        assert "model_target number does not match data number" in str(
            excinfo.value)

        # Dict as input
        with pytest.raises(AssertionError) as excinfo:
            model.fit({"input_1": x}, [y, y], batch_size=4, distributed=True)

        assert "all model_input names should exist in data" in str(
            excinfo.value)
Esempio n. 12
0
    def test_tfdataset_with_tf_data_dataset(self):
        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randn(102, 28, 28,
                             1), np.random.randint(0, 10, size=(102, ))))
        dataset = dataset.map(lambda feature, label:
                              (tf.to_float(feature), label))
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16)
        seq = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(10, activation="softmax")
        ])

        seq.compile(optimizer=tf.keras.optimizers.RMSprop(),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])
        model = KerasModel(seq)
        model.fit(dataset)
        dataset = tf.data.Dataset.from_tensor_slices(
            (np.random.randn(102, 28, 28,
                             1), np.random.randint(0, 10, size=(102, ))))
        dataset = dataset.map(lambda feature, label:
                              (tf.to_float(feature), label))
        dataset = TFDataset.from_tf_data_dataset(dataset, batch_per_thread=16)
        model.evaluate(dataset)
Esempio n. 13
0
    def test_gradient_clipping(self):

        data = tf.keras.layers.Input(shape=[10])

        x = tf.keras.layers.Flatten()(data)
        x = tf.keras.layers.Dense(10, activation='relu')(x)
        predictions = tf.keras.layers.Dense(2, activation='softmax')(x)

        model = tf.keras.models.Model(inputs=data, outputs=predictions)
        model.compile(optimizer=tf.keras.optimizers.SGD(lr=1, clipvalue=1e-8),
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model = KerasModel(model)

        pre_weights = model.get_weights()

        dataset = self.create_training_dataset()

        # 5 iterations
        model.fit(dataset)

        current_weight = model.get_weights()

        np.all(np.abs((current_weight[0] - pre_weights[0])) < 1e-7)
Esempio n. 14
0
    def test_dataset_without_batch(self):
        x = np.random.rand(20, 10)
        y = np.random.randint(0, 2, (20))

        rdd_x = self.sc.parallelize(x)
        rdd_y = self.sc.parallelize(y)

        rdd = rdd_x.zip(rdd_y)

        dataset = TFDataset.from_rdd(rdd,
                                     features=(tf.float32, [10]),
                                     labels=(tf.int32, []),
                                     names=["features", "labels"],
                                     val_rdd=rdd)

        keras_model = self.create_model()
        model = KerasModel(keras_model)
        self.intercept(
            lambda: model.fit(dataset), "The batch_size of TFDataset must be" +
            " specified when used in KerasModel fit.")

        dataset = TFDataset.from_rdd(
            rdd,
            features=(tf.float32, [10]),
            labels=(tf.int32, []),
            names=["features", "labels"],
        )
        self.intercept(
            lambda: model.evaluate(dataset),
            "The batch_per_thread of TFDataset must be " +
            "specified when used in KerasModel evaluate.")

        dataset = TFDataset.from_rdd(
            rdd_x,
            features=(tf.float32, [10]),
            names=["features", "labels"],
        )
        self.intercept(
            lambda: model.predict(dataset),
            "The batch_per_thread of TFDataset must be" +
            " specified when used in KerasModel predict.")
Esempio n. 15
0
def main(max_epoch):
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode.startswith("yarn"):
        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
                "set the environment variable HADOOP_CONF_DIR"
        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
            .set("spark.executor.cores", 2) \
            .set("spark.executor.instances", 2) \
            .set("spark.driver.memory", "2g")
        if cluster_mode == "yarn-client":
            _ = init_nncontext(spark_conf,
                               cluster_mode="yarn-client",
                               hadoop_conf=hadoop_conf)
        else:
            _ = init_nncontext(spark_conf,
                               cluster_mode="yarn-cluster",
                               hadoop_conf=hadoop_conf)
    else:
        _ = init_nncontext()

    (training_images_data,
     training_labels_data) = mnist.read_data_sets("/tmp/mnist", "train")
    (testing_images_data,
     testing_labels_data) = mnist.read_data_sets("/tmp/mnist", "test")

    training_images_data = (training_images_data -
                            mnist.TRAIN_MEAN) / mnist.TRAIN_STD
    testing_images_data = (testing_images_data -
                           mnist.TRAIN_MEAN) / mnist.TRAIN_STD

    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer='rmsprop',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    keras_model = KerasModel(model)

    keras_model.fit(training_images_data,
                    training_labels_data,
                    validation_data=(testing_images_data, testing_labels_data),
                    epochs=max_epoch,
                    batch_size=320,
                    distributed=True)

    result = keras_model.evaluate(testing_images_data,
                                  testing_labels_data,
                                  distributed=True,
                                  batch_per_thread=80)

    print(result)
    # >> [0.08865142822265625, 0.9722]

    # the following assert is used for internal testing
    assert result['acc Top1Accuracy'] > 0.95

    keras_model.save_weights("/tmp/mnist_keras.h5")
Esempio n. 16
0
def main(max_epoch):
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode.startswith("yarn"):
        hadoop_conf = os.environ.get("HADOOP_CONF_DIR")
        assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \
                "set the environment variable HADOOP_CONF_DIR"
        spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \
            .set("spark.executor.cores", 2) \
            .set("spark.executor.instances", 2) \
            .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \
            .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \
            .set("spark.driver.memory", "2g")
        if cluster_mode == "yarn-client":
            sc = init_nncontext(spark_conf,
                                cluster_mode="yarn-client",
                                hadoop_conf=hadoop_conf)
        else:
            sc = init_nncontext(spark_conf,
                                cluster_mode="yarn-cluster",
                                hadoop_conf=hadoop_conf)
    else:
        sc = init_nncontext()

    training_rdd = get_data_rdd("train", sc)
    testing_rdd = get_data_rdd("test", sc)

    dataset = TFDataset.from_rdd(training_rdd,
                                 features=(tf.float32, [28, 28, 1]),
                                 labels=(tf.int32, []),
                                 batch_size=320,
                                 val_rdd=testing_rdd)

    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    keras_model = KerasModel(model)

    keras_model.fit(dataset, epochs=max_epoch, distributed=True)

    eval_dataset = TFDataset.from_rdd(testing_rdd,
                                      features=(tf.float32, [28, 28, 1]),
                                      labels=(tf.int32, []),
                                      batch_per_thread=80)
    result = keras_model.evaluate(eval_dataset)

    print(result)
    # >> [0.08865142822265625, 0.9722]

    # the following assert is used for internal testing
    assert result['acc Top1Accuracy'] > 0.95

    model.save_weights("/tmp/mnist_keras.h5")