def test_tcn_forecaster_xshard_input(self): train_data, val_data, test_data = create_data() print("original", train_data[0].dtype) init_orca_context(cores=4, memory="2g") from zoo.orca.data import XShards def transform_to_dict(data): return {'x': data[0], 'y': data[1]} def transform_to_dict_x(data): return {'x': data[0]} train_data = XShards.partition(train_data).transform_shard( transform_to_dict) val_data = XShards.partition(val_data).transform_shard( transform_to_dict) test_data = XShards.partition(test_data).transform_shard( transform_to_dict_x) for distributed in [True, False]: forecaster = LSTMForecaster(past_seq_len=24, input_feature_num=2, output_feature_num=2, loss="mae", lr=0.01, distributed=distributed) forecaster.fit(train_data, epochs=2) distributed_pred = forecaster.predict(test_data) distributed_eval = forecaster.evaluate(val_data) stop_orca_context()
def orca_context_fixture(): from zoo.orca import init_orca_context, stop_orca_context init_orca_context(cores=8, init_ray_on_spark=True, object_store_memory="1g") yield stop_orca_context()
def test_forecast_tcmf_distributed(self): input = dict({'id': self.id, 'y': self.data}) from zoo.orca import init_orca_context, stop_orca_context init_orca_context(cores=4, spark_log_level="INFO", init_ray_on_spark=True, object_store_memory="1g") self.model.fit(input, num_workers=4, **self.fit_params) with tempfile.TemporaryDirectory() as tempdirname: self.model.save(tempdirname) loaded_model = TCMFForecaster.load(tempdirname, is_xshards_distributed=False) yhat = self.model.predict(horizon=self.horizon, num_workers=4) yhat_loaded = loaded_model.predict(horizon=self.horizon, num_workers=4) yhat_id = yhat_loaded["id"] np.testing.assert_equal(yhat_id, self.id) yhat = yhat["prediction"] yhat_loaded = yhat_loaded["prediction"] assert yhat.shape == (self.num_samples, self.horizon) np.testing.assert_equal(yhat, yhat_loaded) self.model.fit_incremental({'y': self.data_new}) yhat_incr = self.model.predict(horizon=self.horizon) yhat_incr = yhat_incr["prediction"] assert yhat_incr.shape == (self.num_samples, self.horizon) np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, yhat, yhat_incr) target_value = dict({"y": self.data_new}) assert self.model.evaluate(target_value=target_value, metric=['mse']) stop_orca_context()
def test_forecast_tcmf_distributed(self): model = TCMFForecaster(y_iters=1, init_FX_epoch=1, max_FX_epoch=1, max_TCN_epoch=1, alt_iters=2) horizon = np.random.randint(1, 50) # construct data id = np.arange(300) data = np.random.rand(300, 480) input = dict({'id': id, 'y': data}) from zoo.orca import init_orca_context, stop_orca_context init_orca_context(cores=4, spark_log_level="INFO", init_ray_on_spark=True, object_store_memory="1g") model.fit(input, num_workers=4) with tempfile.TemporaryDirectory() as tempdirname: model.save(tempdirname) loaded_model = TCMFForecaster.load(tempdirname, distributed=False) yhat = model.predict(x=None, horizon=horizon, num_workers=4) yhat_loaded = loaded_model.predict(x=None, horizon=horizon, num_workers=4) yhat_id = yhat_loaded["id"] assert (yhat_id == id).all() yhat = yhat["prediction"] yhat_loaded = yhat_loaded["prediction"] assert yhat.shape == (300, horizon) np.testing.assert_equal(yhat, yhat_loaded) target_value = np.random.rand(300, horizon) target_value = dict({"y": target_value}) assert model.evaluate(x=None, target_value=target_value, metric=['mse']) stop_orca_context()
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--dir', default='/tmp/data', metavar='N', help='the folder store mnist data') parser.add_argument('--batch-size', type=int, default=256, metavar='N', help='input batch size for training per executor(default: 256)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing per executor(default: 1000)') parser.add_argument('--epochs', type=int, default=2, metavar='N', help='number of epochs to train (default: 2)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument('--cluster_mode', type=str, default="local", help='The mode for the Spark cluster. local or yarn.') args = parser.parse_args() torch.manual_seed(args.seed) train_loader = torch.utils.data.DataLoader( datasets.MNIST(args.dir, train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader( datasets.MNIST(args.dir, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=args.test_batch_size, shuffle=False) if args.cluster_mode == "local": init_orca_context(cores=1, memory="2g") elif args.cluster_mode == "yarn": init_orca_context( cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g", driver_memory="10g", driver_cores=1, conf={"spark.rpc.message.maxSize": "1024", "spark.task.maxFailures": "1", "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1"}) model = LeNet() model.train() criterion = nn.NLLLoss() adam = torch.optim.Adam(model.parameters(), args.lr) est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion) est.fit(data=train_loader, epochs=args.epochs, validation_data=test_loader, validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch()) result = est.evaluate(data=test_loader, validation_metrics=[Accuracy()]) for r in result: print(str(r)) stop_orca_context()
def tf2_estimator(): from zoo.orca.learn.tf2.estimator import Estimator # import ray init_orca_context(cluster_mode="local", cores=4, memory="3g") print("running tf2 estimator") imdb = keras.datasets.imdb (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=1000) # print(train_data) word_index = imdb.get_word_index() word_index = {k: (v + 3) for k, v in word_index.items()} word_index["<PAD>"] = 0 word_index["<START>"] = 1 word_index["<UNK>"] = 2 # unknown word_index["<UNUSED>"] = 3 train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding='post', maxlen=256) test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding='post', maxlen=256) model = keras.Sequential() model.add(keras.layers.Embedding(1000, 16)) model.add(keras.layers.GlobalAveragePooling1D()) model.add(keras.layers.Dense(16, activation=tf.nn.relu)) model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid)) model.summary() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) x_val = train_data[:1000] partial_x_train = train_data[1000:] y_val = train_labels[:1000] partial_y_train = train_labels[1000:] train_dataset = tf.data.Dataset.from_tensor_slices((partial_x_train, partial_y_train)) validation_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) est = Estimator.from_keras(model_creator=model) est.fit(data=train_dataset, batch_size=512, epochs=100, validation_data=validation_dataset) results = est.evaluate(validation_dataset) print(results) est.save('work/saved_model') est.get_train_summary(tag='Loss') est.get_validation_summary(tag='Top1Accuracy') stop_orca_context()
def main(): parser = argparse.ArgumentParser(description='PyTorch Tensorboard Example') parser.add_argument('--cluster_mode', type=str, default="local", help='The cluster mode, such as local, yarn or k8s.') args = parser.parse_args() if args.cluster_mode == "local": init_orca_context() elif args.cluster_mode == "yarn": init_orca_context(cluster_mode=args.cluster_mode, cores=4, num_nodes=2) writer = SummaryWriter('runs/fashion_mnist_experiment_1') # constant for classes classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot') # plot some random training images dataiter = iter(train_data_creator(config={})) images, labels = dataiter.next() # create grid of images img_grid = torchvision.utils.make_grid(images) # show images matplotlib_imshow(img_grid, one_channel=True) # write to tensorboard writer.add_image('four_fashion_mnist_images', img_grid) # inspect the model using tensorboard writer.add_graph(model_creator(config={}), images) writer.close() # training loss vs. epochs criterion = nn.CrossEntropyLoss() orca_estimator = Estimator.from_torch(model=model_creator, optimizer=optimizer_creator, loss=criterion, backend="torch_distributed") stats = orca_estimator.fit(train_data_creator, epochs=5, batch_size=4) for stat in stats: writer.add_scalar("training_loss", stat['train_loss'], stat['epoch']) print("Train stats: {}".format(stats)) val_stats = orca_estimator.evaluate(validation_data_creator) print("Validation stats: {}".format(val_stats)) orca_estimator.shutdown() stop_orca_context()
def main(max_epoch): sc = init_orca_context(cores=4, memory="2g") # get DataSet # as_supervised returns tuple (img, label) instead of dict {'image': img, 'label':label} mnist_train = tfds.load(name="mnist", split="train", as_supervised=True) mnist_test = tfds.load(name="mnist", split="test", as_supervised=True) # Normalizes images, unit8 -> float32 def normalize_img(image, label): return tf.cast(image, tf.float32) / 255., label mnist_train = mnist_train.map( normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) mnist_test = mnist_test.map( normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) model = tf.keras.Sequential([ tf.keras.layers.Conv2D(20, kernel_size=(5, 5), strides=(1, 1), activation='tanh', input_shape=(28, 28, 1), padding='valid'), tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'), tf.keras.layers.Conv2D(50, kernel_size=(5, 5), strides=(1, 1), activation='tanh', padding='valid'), tf.keras.layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='valid'), tf.keras.layers.Flatten(), tf.keras.layers.Dense(500, activation='tanh'), tf.keras.layers.Dense(10, activation='softmax'), ]) model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) est = Estimator.from_keras(keras_model=model) est.fit(data=mnist_train, batch_size=320, epochs=max_epoch, validation_data=mnist_test) result = est.evaluate(mnist_test) print(result) est.save_keras_model("/tmp/mnist_keras.h5") stop_orca_context()
def test_tcn_forecaster_distributed(self): train_data, val_data, test_data = create_data() init_orca_context(cores=4, memory="2g") forecaster = Seq2SeqForecaster(past_seq_len=24, future_seq_len=5, input_feature_num=1, output_feature_num=1, loss="mae", lr=0.01, distributed=True) forecaster.fit(train_data, epochs=2) distributed_pred = forecaster.predict(test_data[0]) distributed_eval = forecaster.evaluate(val_data) model = forecaster.get_model() assert isinstance(model, torch.nn.Module) forecaster.to_local() local_pred = forecaster.predict(test_data[0]) local_eval = forecaster.evaluate(val_data) np.testing.assert_almost_equal(distributed_pred, local_pred, decimal=5) try: import onnx import onnxruntime local_pred_onnx = forecaster.predict_with_onnx(test_data[0]) local_eval_onnx = forecaster.evaluate_with_onnx(val_data) np.testing.assert_almost_equal(distributed_pred, local_pred_onnx, decimal=5) except ImportError: pass model = forecaster.get_model() assert isinstance(model, torch.nn.Module) stop_orca_context()
def main(max_epoch): sc = init_orca_context(cores=4, memory="2g") # get DataSet mnist_train = tfds.load(name="mnist", split="train") mnist_test = tfds.load(name="mnist", split="test") # Normalizes images def normalize_img(data): data['image'] = tf.cast(data["image"], tf.float32) / 255. return data mnist_train = mnist_train.map( normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) mnist_test = mnist_test.map( normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE) # tensorflow inputs images = tf.placeholder(dtype=tf.float32, shape=(None, 28, 28, 1)) # tensorflow labels labels = tf.placeholder(dtype=tf.int32, shape=(None, )) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) acc = accuracy(logits, labels) # create an estimator est = Estimator.from_graph(inputs=images, outputs=logits, labels=labels, loss=loss, optimizer=tf.train.AdamOptimizer(), metrics={"acc": acc}) est.fit(data=mnist_train, batch_size=320, epochs=max_epoch, validation_data=mnist_test) result = est.evaluate(mnist_test) print(result) est.save_tf_checkpoint("/tmp/lenet/model") stop_orca_context()
def orca_context_fixture(request): import os from zoo.orca import OrcaContext, init_orca_context, stop_orca_context OrcaContext._eager_mode = True access_key_id = os.getenv("AWS_ACCESS_KEY_ID") secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") if access_key_id is not None and secret_access_key is not None: env = {"AWS_ACCESS_KEY_ID": access_key_id, "AWS_SECRET_ACCESS_KEY": secret_access_key} else: env = None sc = init_orca_context(cores=4, spark_log_level="INFO", env=env, object_store_memory="1g") yield sc stop_orca_context()
def test_tcn_forecaster_distributed(self): train_data, val_data, test_data = create_data() from zoo.orca import init_orca_context, stop_orca_context init_orca_context(cores=4, memory="2g") forecaster = TCNForecaster(past_seq_len=24, future_seq_len=5, input_feature_num=1, output_feature_num=1, kernel_size=3, lr=0.01, distributed=True) forecaster.fit(train_data[0], train_data[1], epochs=2) distributed_pred = forecaster.predict(test_data[0]) distributed_eval = forecaster.evaluate(val_data[0], val_data[1]) forecaster.to_local() local_pred = forecaster.predict(test_data[0]) local_eval = forecaster.evaluate(val_data[0], val_data[1]) np.testing.assert_almost_equal(distributed_pred, local_pred, decimal=5) try: import onnx import onnxruntime local_pred_onnx = forecaster.predict_with_onnx(test_data[0]) local_eval_onnx = forecaster.evaluate_with_onnx( val_data[0], val_data[1]) np.testing.assert_almost_equal(distributed_pred, local_pred_onnx, decimal=5) except ImportError: pass stop_orca_context()
def orca_context_fixture(): sc = init_orca_context(cores=8) def to_array_(v): return v.toArray().tolist() def flatten_(v): result = [] for elem in v: result.extend(elem.toArray().tolist()) return result spark = SparkSession(sc) spark.udf.register("to_array", to_array_, ArrayType(DoubleType())) spark.udf.register("flatten", flatten_, ArrayType(DoubleType())) yield stop_orca_context()
def setUp(self): """ setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ self.sc = init_orca_context(cores=4) def to_array_(v): return v.toArray().tolist() def flatten_(v): result = [] for elem in v: result.extend(elem.toArray().tolist()) return result self.spark = SparkSession(self.sc) self.spark.udf.register("to_array", to_array_, ArrayType(DoubleType())) self.spark.udf.register("flatten", flatten_, ArrayType(DoubleType()))
def setUp(self) -> None: from zoo.orca import init_orca_context init_orca_context(cores=8, init_ray_on_spark=True)
import torch.optim as optim from zoo.orca import init_orca_context, stop_orca_context from zoo.orca.learn.pytorch import Estimator from zoo.orca.learn.metrics import Accuracy from zoo.orca.learn.trigger import EveryEpoch parser = argparse.ArgumentParser(description='PyTorch Cifar10 Example') parser.add_argument('--cluster_mode', type=str, default="local", help='The cluster mode, such as local, yarn or k8s.') args = parser.parse_args() if args.cluster_mode == "local": init_orca_context(memory="4g") elif args.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=2, driver_memory="4g", conf={ "spark.rpc.message.maxSize": "1024", "spark.task.maxFailures": "1", "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1" }) transform = transforms.Compose([ transforms.ToTensor(),
def tf_estimator(): from zoo.orca.learn.tf.estimator import Estimator init_orca_context(cluster_mode="local", cores=4, memory="3g") os.environ["HDF5_USE_FILE_LOCKING"] = 'FALSE' print("running tf estimator") imdb = keras.datasets.imdb (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=1000) # print(train_data) word_index = imdb.get_word_index() word_index = {k: (v + 3) for k, v in word_index.items()} word_index["<PAD>"] = 0 word_index["<START>"] = 1 word_index["<UNK>"] = 2 # unknown word_index["<UNUSED>"] = 3 train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding='post', maxlen=256) test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding='post', maxlen=256) model = keras.Sequential() model.add(keras.layers.Embedding(1000, 16)) model.add(keras.layers.GlobalAveragePooling1D()) model.add(keras.layers.Dense(16, activation=tf.nn.relu)) model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid)) model.summary() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) x_val = train_data[:1000] partial_x_train = train_data[1000:] y_val = train_labels[:1000] partial_y_train = train_labels[1000:] train_dataset = tf.data.Dataset.from_tensor_slices((partial_x_train, partial_y_train)) validation_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) est = Estimator.from_keras(keras_model=model) est.set_constant_gradient_clipping(0.1, 0.2) est.fit(data=train_dataset, batch_size=512, epochs=5, validation_data=validation_dataset) results = est.evaluate(validation_dataset) print(results) est.clear_gradient_clipping() est.set_l2_norm_gradient_clipping(0.1) est.fit(data=train_dataset, batch_size=512, epochs=5, validation_data=validation_dataset) results = est.evaluate(validation_dataset) print(results) est.save('work/saved_model') print("save API finished") # est.save_tf_checkpoint('work/checkpoint') # est.load_tf_checkpoint('work/checkpoint') print("checkpoint save and load API finished") est.save_keras_model('work/keras_model') est.save_keras_weights('work/keras_weights') print("keras model and weights save API finished") # est.load_keras_model('work/keras_model') # est.load_keras_weights('work') print("keras model and weights load API finished") est.get_train_summary(tag='Loss') est.get_validation_summary(tag='Top1Accuracy') # Estimator.load(est, model_path='work/') # Has not been implemented # resutls = est.predict(validation_dataset) # print(results) stop_orca_context()
help="The number of cores you want to use for prediction on local." "You should only parse this arg if you set predict_local to true.") parser.add_argument( "--num_predict_workers", type=int, default=4, help="The number of workers you want to use for prediction on local. " "You should only parse this arg if you set predict_local to true.") if __name__ == "__main__": args = parser.parse_args() num_nodes = 1 if args.cluster_mode == "local" else args.num_workers init_orca_context(cluster_mode=args.cluster_mode, cores=args.cores, num_nodes=num_nodes, memory=args.memory, init_ray_on_spark=True) if not args.use_dummy_data: assert args.data_dir is not None, "--data_dir must be provided if not using dummy data" logger.info('Initalizing TCMFForecaster.') model = TCMFForecaster( vbsize=128, hbsize=256, num_channels_X=[32, 32, 32, 32, 32, 1], num_channels_Y=[32, 32, 32, 32, 32, 1], kernel_size=7, dropout=0.2, rank=64,
# create an estimator est = Estimator.from_graph(inputs=images, outputs=logits, labels=labels, loss=loss, optimizer=tf.train.AdamOptimizer(), metrics={"acc": acc}) est.fit(data=train_dataset, batch_size=320, epochs=max_epoch, validation_data=val_dataset) result = est.evaluate(val_dataset) print(result) est.save_tf_checkpoint("/tmp/lenet/model") if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cluster_mode', type=str, default="local", help='The mode for the Spark cluster. local or yarn.') args = parser.parse_args() if args.cluster_mode == "local": init_orca_context(cluster_mode="local", cores=4) elif args.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="6g") main(5) stop_orca_context()
parser.add_argument('-l', '--learning_rate', type=float, default=0.02, help='Learning rate for the LeNet model.') parser.add_argument( '--log_interval', type=int, default=20, help='The number of batches to wait before logging throughput and ' 'metrics information during the training process.') opt = parser.parse_args() num_nodes = 1 if opt.cluster_mode == "local" else opt.num_workers init_orca_context(cluster_mode=opt.cluster_mode, cores=opt.cores, num_nodes=num_nodes) config = create_config( optimizer="sgd", optimizer_params={'learning_rate': opt.learning_rate}, log_interval=opt.log_interval, seed=42) estimator = Estimator.from_mxnet(config=config, model_creator=get_model, loss_creator=get_loss, validation_metrics_creator=get_metrics, num_workers=opt.num_workers, num_servers=opt.num_servers, eval_metrics_creator=get_metrics) estimator.fit(data=get_train_data_iter,
from tensorflow import keras import argparse import numpy as np from tensorflow.python.keras.datasets import imdb from tensorflow.python.keras.preprocessing import sequence from zoo.orca import init_orca_context, stop_orca_context # from zoo.orca.learn.tf2.estimator import Estimator parser = argparse.ArgumentParser() parser.add_argument('--cluster_mode', type=str, default="local", help='The mode for the Spark cluster. local or yarn.') args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode == "local": init_orca_context(cluster_mode="local", cores=4, memory="3g") elif cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="3g", \ conf={"spark.executor.extraJavaOptions": "-Xss512m", "spark.driver.extraJavaOptions": "-Xss512m"}) max_features = 20000 max_len = 200 print('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Pad sequences (samples x time)') x_train = sequence.pad_sequences(x_train, maxlen=max_len)
def orca_context_fixture(): from zoo.orca import init_orca_context, stop_orca_context sc = init_orca_context(cores=8) yield stop_orca_context()
parser.add_argument("--use_dummy_data", action='store_true', default=False, help="Whether to use dummy data") parser.add_argument("--benchmark", action='store_true', default=False) parser.add_argument("--enable_numa_binding", action='store_true', default=False) if __name__ == "__main__": args = parser.parse_args() num_nodes = 1 if args.cluster_mode == "local" else args.worker_num init_orca_context(cluster_mode=args.cluster_mode, cores=args.cores, num_nodes=num_nodes, memory=args.memory, init_ray_on_spark=True, enable_numa_binding=args.enable_numa_binding) if not args.use_dummy_data: assert args.data_dir is not None, "--data_dir must be provided if not using dummy data" if not os.path.exists(args.log_dir): os.mkdir(args.log_dir) from zoo.orca.learn.tf2 import Estimator import tensorflow as tf global_batch_size = args.worker_num * args.batch_size_per_worker base_batch_size = 256
raw_df = pd.read_csv("data/data.csv") df = pd.DataFrame(pd.to_datetime(raw_df.StartTime)) df['AvgRate'] = \ raw_df.AvgRate.apply(lambda x: float(x[:-4]) if x.endswith("Mbps") else float(x[:-4]) * 1000) df["total"] = raw_df["total"] df.set_index("StartTime", inplace=True) full_idx = pd.date_range(start=df.index.min(), end=df.index.max(), freq='2H') df = df.reindex(full_idx) drop_dts, drop_len = get_drop_dates_and_len(df) df = rm_missing_weeks(drop_dts, drop_len, df) df.ffill(inplace=True) df.index.name = "datetime" df = df.reset_index() init_orca_context(cores=4, memory="4g", init_ray_on_spark=True) from zoo.zouwu.autots.forecast import AutoTSTrainer from zoo.automl.config.recipe import * trainer = AutoTSTrainer(dt_col="datetime", target_col=["AvgRate", "total"], horizon=1, extra_features_col=None) look_back = (36, 84) from zoo.automl.common.util import train_val_test_split train_df, val_df, test_df = train_val_test_split(df, val_ratio=0.1, test_ratio=0.1, look_back=look_back[0])
def compute_gradients(self, weights): self.net.variables.set_flat(weights) xs, ys = self.mnist.train.next_batch(self.batch_size) return self.net.compute_gradients(xs, ys) if __name__ == "__main__": args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode == "yarn": sc = init_orca_context( cluster_mode=cluster_mode, cores=args.executor_cores, memory=args.executor_memory, init_ray_on_spark=True, num_executors=args.num_workers, driver_memory=args.driver_memory, driver_cores=args.driver_cores, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray, object_store_memory=args.object_store_memory, additional_archive="MNIST_data.zip#MNIST_data") ray_ctx = OrcaContext.get_ray_context() elif cluster_mode == "local": sc = init_orca_context(cores=args.driver_cores) ray_ctx = OrcaContext.get_ray_context() else: print( "init_orca_context failed. cluster_mode should be either 'local' or 'yarn' but got " + cluster_mode) # Create a parameter server.
def setUp(self): """ setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ self.sc = init_orca_context(cores=4)
help="The number of workers to run on each node") parser.add_argument('--k8s_master', type=str, default="", help="The k8s master. " "It should be k8s://https://<k8s-apiserver-host>: " "<k8s-apiserver-port>.") parser.add_argument("--container_image", type=str, default="", help="The runtime k8s image. " "You can change it with your k8s image.") parser.add_argument('--k8s_driver_host', type=str, default="", help="The k8s driver localhost.") parser.add_argument('--k8s_driver_port', type=str, default="", help="The k8s driver port.") args = parser.parse_args() if args.cluster_mode == "local": init_orca_context(cluster_mode="local", cores=args.cores, num_nodes=args.num_nodes, memory=args.memory) elif args.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", cores=args.cores, num_nodes=args.num_nodes, memory=args.memory) elif args.cluster_mode == "k8s": if not args.k8s_master or not args.container_image \ or not args.k8s_driver_host or not args.k8s_driver_port: parser.print_help() parser.error('k8s_master, container_image,' 'k8s_driver_host/port are required not to be empty') init_orca_context(cluster_mode="k8s", master=args.k8s_master, container_image=args.container_image, num_nodes=args.num_nodes, cores=args.cores, conf={"spark.driver.host": args.k8s_driver_host, "spark.driver.port": args.k8s_driver_port}) train_example(workers_per_node=args.workers_per_node)
steps_per_epoch=60000 // batch_size, validation_data_creator=val_data_creator, validation_steps=10000 // batch_size) print(stats) est.save("/tmp/mnist_keras.ckpt") est.restore("/tmp/mnist_keras.ckpt") stats = est.evaluate(val_data_creator, steps=10000 // batch_size) print(stats) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--cluster_mode', type=str, default="local", help='The mode for the Spark cluster. local or yarn.') parser.add_argument('--max_epoch', type=int, default=5, help='max epoch') args = parser.parse_args() if args.cluster_mode == "local": init_orca_context(cluster_mode="local", cores=4, init_ray_on_spark=True) elif args.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, init_ray_on_spark=True, driver_memory="6g") main(args.max_epoch)
def get_data(): def get_linear_data(a, b, size): x = np.arange(0, 10, 10 / size, dtype=np.float32) y = a*x + b return x, y train_x, train_y = get_linear_data(2, 5, 1000) val_x, val_y = get_linear_data(2, 5, 400) data = (train_x, train_y) validation_data = (val_x, val_y) return data, validation_data if __name__ == "__main__": # 1. the way to enable auto tuning model from creators. init_orca_context(init_ray_on_spark=True) modelBuilder = PytorchModelBuilder(model_creator=model_creator, optimizer_creator=optimizer_creator, loss_creator=loss_creator) searcher = SearchEngineFactory.create_engine(backend="ray", logs_dir="~/zoo_automl_logs", resources_per_trial={"cpu": 2}, name="demo") # pass input data, modelbuilder and recipe into searcher.compile. Note that if user doesn't pass # feature transformer, the default identity feature transformer will be used. data, validation_data = get_data() searcher.compile(data=data, validation_data=validation_data, model_builder=modelBuilder,
import pickle as pkl from optparse import OptionParser from zoo.orca import init_orca_context, stop_orca_context, OrcaContext from pyspark.sql.functions import udf, col from zoo.friesian.feature import FeatureTable, StringIndex from pyspark.sql.types import StringType, IntegerType, ArrayType, FloatType if __name__ == "__main__": parser = OptionParser() parser.add_option("--meta", dest="meta_file") parser.add_option("--review", dest="review_file") parser.add_option("--output", dest="output") (options, args) = parser.parse_args(sys.argv) begin = time.time() sc = init_orca_context("local") spark = OrcaContext.get_spark_session() # read review datavi run.sh transaction_df = spark.read.json(options.review_file).select( ['reviewerID', 'asin', 'unixReviewTime']) \ .withColumnRenamed('reviewerID', 'user') \ .withColumnRenamed('asin', 'item') \ .withColumnRenamed('unixReviewTime', 'time')\ .dropna("any").persist(storageLevel=StorageLevel.DISK_ONLY) transaction_tbl = FeatureTable(transaction_df) print("review_tbl, ", transaction_tbl.size()) # read meta data def get_category(x): cat = x[0][-1] if x[0][-1] is not None else "default"