def test_estimator_graph_pandas_dataframe(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}) est.fit(data=data_shard, batch_size=8, epochs=10, feature_cols=['user', 'item'], label_cols=['label'], validation_data=data_shard) result = est.evaluate(data_shard, feature_cols=['user', 'item'], label_cols=['label']) assert "loss" in result print(result) est = Estimator.from_graph(inputs=[model.user, model.item], outputs=[model.logits]) predictions = est.predict(data_shard, feature_cols=['user', 'item']).collect() print(predictions)
def test_estimator_keras_tensorboard(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) temp = tempfile.mkdtemp() model_dir = os.path.join(temp, "test_model") est = Estimator.from_keras(keras_model=model, model_dir=model_dir) assert est.get_train_summary("Loss") is None assert est.get_validation_summary("Top1Accuracy") is None est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard) train_loss = est.get_train_summary("Loss") assert len(train_loss) > 0 val_scores = est.get_validation_summary("Top1Accuracy") assert len(val_scores) > 0 tf.reset_default_graph() # no model dir model = self.create_model() est = Estimator.from_keras(keras_model=model) log_dir = os.path.join(temp, "log") est.set_tensorboard(log_dir, "test") est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard) assert os.path.exists(os.path.join(log_dir, "test/train")) assert os.path.exists(os.path.join(log_dir, "test/validation")) train_loss = est.get_train_summary("Loss") val_scores = est.get_validation_summary("Loss") assert len(train_loss) > 0 assert len(val_scores) > 0 shutil.rmtree(temp)
def test_estimator_graph_checkpoint(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) temp = tempfile.mkdtemp() model_dir = os.path.join(temp, "test_model") est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}, model_dir=model_dir) est.fit(data=data_shard, batch_size=8, epochs=6, validation_data=data_shard, checkpoint_trigger=SeveralIteration(4)) est.sess.close() tf.reset_default_graph() model = SimpleModel() est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}, model_dir=model_dir) est.load_orca_checkpoint(model_dir) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard) result = est.evaluate(data_shard) assert "loss" in result print(result) shutil.rmtree(temp)
def test_estimator_keras_weights_save_load(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) est = Estimator.from_keras(keras_model=model) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard) eval_result = est.evaluate(data_shard) print(eval_result) temp = tempfile.mkdtemp() model_path = os.path.join(temp, 'test.h5') est.save_keras_weights(model_path) tf.reset_default_graph() model = self.create_model() est = Estimator.from_keras(model) est.load_keras_weights(model_path) data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), } return result data_shard = data_shard.transform_shard(transform) predictions = est.predict(data_shard).collect() assert predictions[0]['prediction'].shape[1] == 2 shutil.rmtree(temp)
def test_estimator_keras_xshards_options(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) est = Estimator.from_keras(keras_model=model) # train with no validation est.fit(data=data_shard, batch_size=8, epochs=10) # train with different optimizer est = Estimator.from_keras(keras_model=model) est.fit(data=data_shard, batch_size=8, epochs=10 ) # train with session config tf_session_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) est = Estimator.from_keras(keras_model=model) est.fit(data=data_shard, batch_size=8, epochs=10, session_config=tf_session_config ) # train with model dir temp = tempfile.mkdtemp() model_dir = os.path.join(temp, "model") est = Estimator.from_keras(keras_model=model, model_dir=model_dir) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard) assert len(os.listdir(model_dir)) > 0 shutil.rmtree(temp)
def test_estimator_keras_dataframe_mem_type(self): tf.reset_default_graph() model = self.create_model() sc = init_nncontext() sqlcontext = SQLContext(sc) file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") df = sqlcontext.read.csv(file_path, header=True, inferSchema=True) from pyspark.sql.functions import array df = df.withColumn('user', array('user')) \ .withColumn('item', array('item')) est = Estimator.from_keras(keras_model=model) OrcaContext.train_data_store = "DISK_2" est.fit(data=df, batch_size=4, epochs=4, feature_cols=['user', 'item'], label_cols=['label'], validation_data=df) eval_result = est.evaluate(df, feature_cols=['user', 'item'], label_cols=['label']) assert 'acc Top1Accuracy' in eval_result prediction_df = est.predict(df, batch_size=4, feature_cols=['user', 'item']) assert 'prediction' in prediction_df.columns predictions = prediction_df.collect() assert len(predictions) == 48 OrcaContext.train_data_store = "DRAM"
def test_estimator_keras_xshards_with_mem_type(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) est = Estimator.from_keras(keras_model=model) OrcaContext.train_data_store = "DISK_2" est.fit(data=data_shard, batch_size=4, epochs=10, validation_data=data_shard ) eval_result = est.evaluate(data_shard) print(eval_result) OrcaContext.train_data_store = "DRAM"
def test_estimator_graph_dataframe(self): tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") sc = init_nncontext() sqlcontext = SQLContext(sc) df = sqlcontext.read.csv(file_path, header=True, inferSchema=True) est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], outputs=[model.logits], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}) est.fit(data=df, batch_size=8, epochs=10, feature_cols=['user', 'item'], label_cols=['label'], validation_data=df) result = est.evaluate(df, batch_size=4, feature_cols=['user', 'item'], label_cols=['label']) print(result) prediction_df = est.predict(df, batch_size=4, feature_cols=['user', 'item']) assert 'prediction' in prediction_df.columns predictions = prediction_df.collect() assert len(predictions) == 48
def test_estimator_graph_fit(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard)
def test_estimator_keras_xshards_checkpoint(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) temp = tempfile.mkdtemp() model_dir = os.path.join(temp, "test_model") est = Estimator.from_keras(keras_model=model, model_dir=model_dir) est.fit(data=data_shard, batch_size=8, epochs=6, validation_data=data_shard, checkpoint_trigger=SeveralIteration(4)) eval_result = est.evaluate(data_shard) print(eval_result) tf.reset_default_graph() model = self.create_model() est = Estimator.from_keras(keras_model=model, model_dir=model_dir) est.load_orca_checkpoint(model_dir) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard, checkpoint_trigger=SeveralIteration(4)) eval_result = est.evaluate(data_shard) print(eval_result) shutil.rmtree(temp)
def test_train_simple(orca_context_fixture): sc = orca_context_fixture temp_dir = tempfile.mkdtemp() try: _write_ndarrays(images=np.random.randn(500, 28, 28, 1).astype(np.float32), labels=np.random.randint(0, 10, (500, )).astype(np.int32), output_path="file://" + temp_dir) dataset = ParquetDataset.read_as_tf("file://" + temp_dir) def preprocess(data): return data['image'], data["label"] dataset = dataset.map(preprocess) import tensorflow as tf model = tf.keras.Sequential([ tf.keras.layers.Conv2D(20, kernel_size=(5, 5), strides=(1, 1), activation='tanh', input_shape=(28, 28, 1), padding='valid'), tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'), tf.keras.layers.Conv2D(50, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid'), tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'), tf.keras.layers.Flatten(), tf.keras.layers.Dense(500, activation='tanh'), tf.keras.layers.Dense(10, activation='softmax'), ]) model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) est = Estimator.from_keras(keras_model=model) est.fit(data=dataset, batch_size=100, epochs=1) finally: shutil.rmtree(temp_dir)
def test_estimator_graph_dataframe_exception(self): tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") sc = init_nncontext() sqlcontext = SQLContext(sc) df = sqlcontext.read.csv(file_path, header=True, inferSchema=True) est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], outputs=[model.logits], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}) with self.assertRaises(Exception) as context: est.fit(data=df, batch_size=8, epochs=10, feature_cols=['user', 'item'], validation_data=df) self.assertTrue( 'label columns is None; it should not be None in training' in str( context.exception)) est.fit(data=df, batch_size=8, epochs=10, feature_cols=['user', 'item'], label_cols=['label']) with self.assertRaises(Exception) as context: predictions = est.predict(df, batch_size=4).collect() self.assertTrue( 'feature columns is None; it should not be None in prediction' in str(context.exception)) with self.assertRaises(Exception) as context: est.fit(data=df, batch_size=8, epochs=10, feature_cols=['user', 'item'], label_cols=['label'], validation_data=[1, 2, 3]) self.assertTrue( 'train data and validation data should be both Spark DataFrame' in str(context.exception))
def _test_estimator_graph_tf_dataset(self, dataset_creator): tf.reset_default_graph() model = SimpleModel() dataset = dataset_creator() est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], outputs=[model.logits], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}) est.fit(data=dataset, batch_size=8, epochs=10, validation_data=dataset) result = est.evaluate(dataset, batch_size=4) assert 'loss' in result
def test_estimator_keras_tf_dataset(self): tf.reset_default_graph() model = self.create_model() dataset = tf.data.Dataset.from_tensor_slices((np.random.randint(0, 200, size=(100, 1)), np.random.randint(0, 50, size=(100, 1)), np.ones(shape=(100,), dtype=np.int32))) dataset = dataset.map(lambda user, item, label: [(user, item), label]) est = Estimator.from_keras(keras_model=model) est.fit(data=dataset, batch_size=8, epochs=10, validation_data=dataset) eval_result = est.evaluate(dataset) assert 'acc Top1Accuracy' in eval_result
def test_estimator_keras_get_model(self): tf.reset_default_graph() model = self.create_model() sc = init_nncontext() sqlcontext = SQLContext(sc) file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") df = sqlcontext.read.csv(file_path, header=True, inferSchema=True) from pyspark.sql.functions import array df = df.withColumn('user', array('user')) \ .withColumn('item', array('item')) est = Estimator.from_keras(keras_model=model) est.fit(data=df, batch_size=4, epochs=4, feature_cols=['user', 'item'], label_cols=['label'], validation_data=df) assert est.get_model() is model
def test_estimator_graph_predict_dataset(self): tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) est = Estimator.from_graph(inputs=[model.user, model.item], outputs=[model.logits]) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), } return result data_shard = data_shard.transform_shard(transform) dataset = Dataset.from_tensor_slices(data_shard) predictions = est.predict(dataset).collect() assert len(predictions) == 48
def test_estimator_keras_with_bigdl_optim_method(self): tf.reset_default_graph() model = self.create_model() dataset = tf.data.Dataset.from_tensor_slices((np.random.randint(0, 200, size=(100, 1)), np.random.randint(0, 50, size=(100, 1)), np.ones(shape=(100,), dtype=np.int32))) dataset = dataset.map(lambda user, item, label: [(user, item), label]) from bigdl.orca.learn.optimizers import SGD from bigdl.orca.learn.optimizers.schedule import Plateau sgd = SGD(learningrate=0.1, learningrate_schedule=Plateau("score", factor=0.1, patience=10, mode="min", )) est = Estimator.from_keras(keras_model=model, optimizer=sgd) est.fit(data=dataset, batch_size=8, epochs=10, validation_data=dataset)
def test_submodel_in_keras_squential(self): mnet = tf.keras.applications.MobileNetV2(input_shape=(160, 160, 3), include_top=False, weights='imagenet') model = tf.keras.Sequential([ mnet, tf.keras.layers.GlobalAveragePooling2D(), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy']) dataset = tf.data.Dataset.from_tensor_slices((np.random.randn(16, 160, 160, 3), np.random.randint(0, 1000, (16, 1)))) est = Estimator.from_keras(keras_model=model) est.fit(data=dataset, batch_size=4, epochs=1, validation_data=dataset)
def test_estimator_keras_xshards_disk_featureset_trigger(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) from bigdl.dllib.optim.optimizer import SeveralIteration from bigdl.dllib.utils.triggers import SeveralIteration as ZSeveralIteration from bigdl.dllib.utils.triggers import MinLoss as ZMinLoss from bigdl.dllib.utils.triggers import TriggerAnd as ZTriggerAnd est = Estimator.from_keras(keras_model=model) OrcaContext.train_data_store = "DISK_2" with self.assertRaises(Exception) as context: est.fit(data=data_shard, batch_size=4, epochs=10, validation_data=data_shard, checkpoint_trigger=SeveralIteration(2)) self.assertTrue('Please use a trigger defined in bigdl.dllib.utils.triggers' in str(context.exception)) est.fit(data=data_shard, batch_size=4, epochs=10, validation_data=data_shard, checkpoint_trigger=ZTriggerAnd(ZSeveralIteration(2), ZMinLoss(0.2))) OrcaContext.train_data_store = "DRAM"
def test_estimator_keras_xshards_clip(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = self.create_model_with_clip() file_path = os.path.join(self.resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy().reshape([-1, 1]), df['item'].to_numpy().reshape([-1, 1])), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) est = Estimator.from_keras(keras_model=model) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard)
def test_estimator_graph_with_bigdl_optim_method(self): import bigdl.orca.data.pandas tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) from bigdl.orca.learn.optimizers import SGD from bigdl.orca.learn.optimizers.schedule import Plateau sgd = SGD(learningrate=0.1, learningrate_schedule=Plateau( "score", factor=0.1, patience=10, mode="min", )) est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], outputs=[model.logits], loss=model.loss, optimizer=sgd, metrics={"loss": model.loss}) est.fit(data=data_shard, batch_size=8, epochs=10, validation_data=data_shard)
def test_estimator_keras_learning_rate_schedule(self): tf.reset_default_graph() # loss = reduce_sum(w) # dloss/dw = 1 model = self.create_model_lr_schedule(0.1, 1, 0.1) dataset = tf.data.Dataset.from_tensor_slices((np.ones((16, 8)), np.zeros((16, 1)))) est = Estimator.from_keras(keras_model=model) weights_before = model.get_weights()[0] est.fit(data=dataset, batch_size=8, epochs=1, validation_data=dataset) sess = tf.keras.backend.get_session() iteartion = sess.run(model.optimizer.iterations) weights_after = model.get_weights()[0] first_step = weights_before - 0.1 second_step = first_step - 0.01 assert iteartion == 2 assert np.allclose(second_step, weights_after)
base_model.trainable = False base_model.summary() model = tf.keras.Sequential([ base_model, keras.layers.GlobalAveragePooling2D(), keras.layers.Dense(1, activation='sigmoid') ]) model.compile(optimizer=tf.keras.optimizers.RMSprop(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy']) model.summary() len(model.trainable_variables) epochs = args.epochs est = Estimator.from_keras(keras_model=model) est.fit(train_dataset, batch_size=batch_size, epochs=epochs, validation_data=validation_dataset) result = est.evaluate(validation_dataset) print(result) base_model.trainable = True # Let's take a look to see how many layers are in the base model print("Number of layers in the base model: ", len(base_model.layers)) # Fine tune from this layer onwards fine_tune_at = 100 # Freeze all the layers before the `fine_tune_at` layer
with tf.name_scope("optimzation"): self.optim = tf.train.AdamOptimizer(1e-3, name='Adam') self.optimizer = self.optim.minimize(self.loss) embedding_size=16 model = NCF(embedding_size, max_user_id, max_item_id) print("INFO NCF model defined success!") batch_size=1280 epochs=1 model_dir='./logs-ncf' # create an Estimator. estimator = Estimator.from_graph( inputs=[model.user, model.item], outputs=[model.class_number], labels=[model.label], loss=model.loss, optimizer=model.optim, model_dir=model_dir, metrics={"loss": model.loss}) print("INFO estimator created success!") estimator.fit(data=train_data, batch_size=batch_size, epochs=epochs, feature_cols=['user', 'item'], label_cols=['label'], validation_data=test_data) print("INFO estimator fit success!") checkpoint_path = os.path.join(model_dir, "NCF.ckpt") estimator.save_tf_checkpoint(checkpoint_path)
init_ray_on_spark=False) elif args.cluster_mode == "yarn": init_orca_context("yarn-client", cores=args.executor_cores, num_nodes=args.num_executor, memory=args.executor_memory, driver_cores=args.driver_cores, driver_memory=args.driver_memory, init_ray_on_spark=False) elif args.cluster_mode == "spark-submit": init_orca_context("spark-submit") train_data, test_data, n_uid, n_mid, n_cat = load_dien_data(args.data_dir) model = build_model(args.model_type, n_uid, n_mid, n_cat, args.lr, args.data_type) [inputs, feature_cols] = align_input_features(model) estimator = Estimator.from_graph(inputs=inputs, outputs=[model.y_hat], labels=[model.target_ph], loss=model.loss, optimizer=model.optim, model_dir=args.model_dir, metrics={'loss': model.loss, 'accuracy': model.accuracy}) estimator.fit(train_data.df, epochs=args.epochs, batch_size=args.batch_size, feature_cols=feature_cols, label_cols=['label'], validation_data=test_data.df) ckpts_dir = os.path.join(args.model_dir, 'ckpts/') if not exists(ckpts_dir): makedirs(ckpts_dir) snapshot_path = ckpts_dir + "ckpt_" + args.model_type estimator.save_tf_checkpoint(snapshot_path) time_train = time.time() print(f"perf training time: {(time_train - time_start):.2f}") result = estimator.evaluate(test_data.df, args.batch_size, feature_cols=feature_cols, label_cols=['label'])
def test_estimator_save_load(self): import bigdl.orca.data.pandas tf.reset_default_graph() # save model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], outputs=[model.logits], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}, sess=None) est.fit(data=data_shard, batch_size=8, epochs=5, validation_data=data_shard) temp = tempfile.mkdtemp() model_checkpoint = os.path.join(temp, 'tmp.ckpt') est.save(model_checkpoint) est.shutdown() tf.reset_default_graph() # load with tf.Session() as sess: model = SimpleModel() est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], outputs=[model.logits], loss=model.loss, metrics={"loss": model.loss}, sess=sess) est.load(model_checkpoint) data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), } return result data_shard = data_shard.transform_shard(transform) predictions = est.predict(data_shard).collect() assert 'prediction' in predictions[0] print(predictions) shutil.rmtree(temp)
def test_estimator_graph_tensorboard(self): tf.reset_default_graph() model = SimpleModel() file_path = os.path.join(resource_path, "orca/learn/ncf.csv") data_shard = bigdl.orca.data.pandas.read_csv(file_path) def transform(df): result = { "x": (df['user'].to_numpy(), df['item'].to_numpy()), "y": df['label'].to_numpy() } return result data_shard = data_shard.transform_shard(transform) temp = tempfile.mkdtemp() # only set model dir, summary generated under model dir model_dir = os.path.join(temp, "test_model") est = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}, model_dir=model_dir) est.fit(data=data_shard, batch_size=8, epochs=5, validation_data=data_shard) train_tp = est.get_train_summary("Throughput") val_scores = est.get_validation_summary("loss") assert len(train_tp) > 0 assert len(val_scores) > 0 # set tensorboard dir to different directory est.set_tensorboard("model", "test") est.fit(data=data_shard, batch_size=8, epochs=5, validation_data=data_shard) train_tp = est.get_train_summary("Throughput") val_scores = est.get_validation_summary("loss") assert len(train_tp) > 0 assert len(val_scores) > 0 # no model dir, no tensorboard dir, no summary saved est2 = Estimator.from_graph(inputs=[model.user, model.item], labels=[model.label], loss=model.loss, optimizer=tf.train.AdamOptimizer(), metrics={"loss": model.loss}) est2.fit(data=data_shard, batch_size=8, epochs=5, validation_data=data_shard) train_tp = est2.get_train_summary("Throughput") val_scores = est2.get_validation_summary("loss") assert train_tp is None assert val_scores is None shutil.rmtree(temp)
else: checkpoint_trigger = SeveralIteration(options.checkpointIteration) def calculate_top_k_accuracy(logits, targets, k=1): values, indices = tf.math.top_k(logits, k=k, sorted=True) y = tf.reshape(targets, [-1, 1]) correct = tf.cast(tf.equal(y, indices), tf.float32) top_k_accuracy = tf.reduce_mean(correct) * k return top_k_accuracy acc = calculate_top_k_accuracy(logits, targets=labels) est = Estimator.from_graph(inputs=images, outputs=logits, labels=labels, loss=loss, optimizer=optim, model_dir="/tmp/logs", metrics={"acc": acc}) if options.resumeTrainingCheckpoint is not None: assert options.resumeTrainingVersion is not None, \ "--resumeTrainingVersion must be specified when --resumeTrainingCheckpoint is." est.load_orca_checkpoint(options.resumeTrainingCheckpoint, options.resumeTrainingVersion) est.fit(data=train_data, batch_size=options.batchSize, epochs=options.maxEpoch, validation_data=val_data, feed_dict={is_training: [True, False]},
def main(cluster_mode, max_epoch, file_path, batch_size, platform, non_interactive): import matplotlib if not non_interactive and platform == "mac": matplotlib.use('qt5agg') if cluster_mode == "local": init_orca_context(cluster_mode="local", cores=4, memory="3g") elif cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="3g") elif cluster_mode == "spark-submit": init_orca_context(cluster_mode="spark-submit") load_data(file_path) img_dir = os.path.join(file_path, "train") label_dir = os.path.join(file_path, "train_masks") # Here we only take the first 1000 files for simplicity df_train = pd.read_csv(os.path.join(file_path, 'train_masks.csv')) ids_train = df_train['img'].map(lambda s: s.split('.')[0]) ids_train = ids_train[:1000] x_train_filenames = [] y_train_filenames = [] for img_id in ids_train: x_train_filenames.append(os.path.join(img_dir, "{}.jpg".format(img_id))) y_train_filenames.append( os.path.join(label_dir, "{}_mask.gif".format(img_id))) x_train_filenames, x_val_filenames, y_train_filenames, y_val_filenames = \ train_test_split(x_train_filenames, y_train_filenames, test_size=0.2, random_state=42) def load_and_process_image(path): array = mpimg.imread(path) result = np.array(Image.fromarray(array).resize(size=(128, 128))) result = result.astype(float) result /= 255.0 return result def load_and_process_image_label(path): array = mpimg.imread(path) result = np.array(Image.fromarray(array).resize(size=(128, 128))) result = np.expand_dims(result[:, :, 1], axis=-1) result = result.astype(float) result /= 255.0 return result train_images = np.stack( [load_and_process_image(filepath) for filepath in x_train_filenames]) train_label_images = np.stack([ load_and_process_image_label(filepath) for filepath in y_train_filenames ]) val_images = np.stack( [load_and_process_image(filepath) for filepath in x_val_filenames]) val_label_images = np.stack([ load_and_process_image_label(filepath) for filepath in y_val_filenames ]) train_shards = XShards.partition({ "x": train_images, "y": train_label_images }) val_shards = XShards.partition({"x": val_images, "y": val_label_images}) # Build the U-Net model def conv_block(input_tensor, num_filters): encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor) encoder = layers.Activation('relu')(encoder) encoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder) encoder = layers.Activation('relu')(encoder) return encoder def encoder_block(input_tensor, num_filters): encoder = conv_block(input_tensor, num_filters) encoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder) return encoder_pool, encoder def decoder_block(input_tensor, concat_tensor, num_filters): decoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor) decoder = layers.concatenate([concat_tensor, decoder], axis=-1) decoder = layers.Activation('relu')(decoder) decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder) decoder = layers.Activation('relu')(decoder) decoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder) decoder = layers.Activation('relu')(decoder) return decoder inputs = layers.Input(shape=(128, 128, 3)) # 128 encoder0_pool, encoder0 = encoder_block(inputs, 16) # 64 encoder1_pool, encoder1 = encoder_block(encoder0_pool, 32) # 32 encoder2_pool, encoder2 = encoder_block(encoder1_pool, 64) # 16 encoder3_pool, encoder3 = encoder_block(encoder2_pool, 128) # 8 center = conv_block(encoder3_pool, 256) # center decoder3 = decoder_block(center, encoder3, 128) # 16 decoder2 = decoder_block(decoder3, encoder2, 64) # 32 decoder1 = decoder_block(decoder2, encoder1, 32) # 64 decoder0 = decoder_block(decoder1, encoder0, 16) # 128 outputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0) net = models.Model(inputs=[inputs], outputs=[outputs]) # Define custom metrics def dice_coeff(y_true, y_pred): smooth = 1. # Flatten y_true_f = tf.reshape(y_true, [-1]) y_pred_f = tf.reshape(y_pred, [-1]) intersection = tf.reduce_sum(y_true_f * y_pred_f) score = (2. * intersection + smooth) / \ (tf.reduce_sum(y_true_f) + tf.reduce_sum(y_pred_f) + smooth) return score # Define custom loss function def dice_loss(y_true, y_pred): loss = 1 - dice_coeff(y_true, y_pred) return loss def bce_dice_loss(y_true, y_pred): loss = losses.binary_crossentropy(y_true, y_pred) + dice_loss( y_true, y_pred) return loss # compile model net.compile(optimizer=tf.keras.optimizers.Adam(2e-3), loss=bce_dice_loss) print(net.summary()) # create an estimator from keras model est = Estimator.from_keras(keras_model=net) # fit with estimator est.fit(data=train_shards, batch_size=batch_size, epochs=max_epoch) # evaluate with estimator result = est.evaluate(val_shards) print(result) # predict with estimator val_shards.cache() val_image_shards = val_shards.transform_shard( lambda val_dict: {"x": val_dict["x"]}) pred_shards = est.predict(data=val_image_shards, batch_size=batch_size) pred = pred_shards.collect()[0]["prediction"] val_image_label = val_shards.collect()[0] val_image = val_image_label["x"] val_label = val_image_label["y"] if not non_interactive: # visualize 5 predicted results plt.figure(figsize=(10, 20)) for i in range(5): img = val_image[i] label = val_label[i] predicted_label = pred[i] plt.subplot(5, 3, 3 * i + 1) plt.imshow(img) plt.title("Input image") plt.subplot(5, 3, 3 * i + 2) plt.imshow(label[:, :, 0], cmap='gray') plt.title("Actual Mask") plt.subplot(5, 3, 3 * i + 3) plt.imshow(predicted_label, cmap='gray') plt.title("Predicted Mask") plt.suptitle("Examples of Input Image, Label, and Prediction") plt.show() stop_orca_context()