def test_s2s_forecaster_xshard_input(self): train_data, val_data, test_data = create_data() print("original", train_data[0].dtype) init_orca_context(cores=4, memory="2g") from bigdl.orca.data import XShards def transform_to_dict(data): return {'x': data[0], 'y': data[1]} def transform_to_dict_x(data): return {'x': data[0]} train_data = XShards.partition(train_data).transform_shard( transform_to_dict) val_data = XShards.partition(val_data).transform_shard( transform_to_dict) test_data = XShards.partition(test_data).transform_shard( transform_to_dict_x) for distributed in [True, False]: forecaster = Seq2SeqForecaster(past_seq_len=24, future_seq_len=5, input_feature_num=1, output_feature_num=1, loss="mae", lr=0.01, distributed=distributed) forecaster.fit(train_data, epochs=2) distributed_pred = forecaster.predict(test_data) distributed_eval = forecaster.evaluate(val_data) stop_orca_context()
def test_forecast_tcmf_distributed(self): input = dict({'id': self.id, 'y': self.data}) from bigdl.orca import init_orca_context, stop_orca_context init_orca_context(cores=4, spark_log_level="INFO", init_ray_on_spark=True, object_store_memory="1g") self.model.fit(input, num_workers=4, **self.fit_params) with tempfile.TemporaryDirectory() as tempdirname: self.model.save(tempdirname) loaded_model = TCMFForecaster.load(tempdirname, is_xshards_distributed=False) yhat = self.model.predict(horizon=self.horizon, num_workers=4) yhat_loaded = loaded_model.predict(horizon=self.horizon, num_workers=4) yhat_id = yhat_loaded["id"] np.testing.assert_equal(yhat_id, self.id) yhat = yhat["prediction"] yhat_loaded = yhat_loaded["prediction"] assert yhat.shape == (self.num_samples, self.horizon) np.testing.assert_equal(yhat, yhat_loaded) self.model.fit_incremental({'y': self.data_new}) yhat_incr = self.model.predict(horizon=self.horizon) yhat_incr = yhat_incr["prediction"] assert yhat_incr.shape == (self.num_samples, self.horizon) np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, yhat, yhat_incr) target_value = dict({"y": self.data_new}) assert self.model.evaluate(target_value=target_value, metric=['mse']) stop_orca_context()
def test_s2s_forecaster_distributed(self): train_data, val_data, test_data = create_data() init_orca_context(cores=4, memory="2g") forecaster = Seq2SeqForecaster(past_seq_len=24, future_seq_len=5, input_feature_num=1, output_feature_num=1, loss="mae", lr=0.01, distributed=True) forecaster.fit(train_data, epochs=2) distributed_pred = forecaster.predict(test_data[0]) distributed_eval = forecaster.evaluate(val_data) model = forecaster.get_model() assert isinstance(model, torch.nn.Module) forecaster.to_local() local_pred = forecaster.predict(test_data[0]) local_eval = forecaster.evaluate(val_data) np.testing.assert_almost_equal(distributed_pred, local_pred, decimal=5) try: import onnx import onnxruntime local_pred_onnx = forecaster.predict_with_onnx(test_data[0]) local_eval_onnx = forecaster.evaluate_with_onnx(val_data) np.testing.assert_almost_equal(distributed_pred, local_pred_onnx, decimal=5) except ImportError: pass model = forecaster.get_model() assert isinstance(model, torch.nn.Module) stop_orca_context()
def friesian_context_fixture(request): import os from bigdl.orca import OrcaContext, init_orca_context, stop_orca_context OrcaContext._eager_mode = True access_key_id = os.getenv("AWS_ACCESS_KEY_ID") secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY") if access_key_id is not None and secret_access_key is not None: env = { "AWS_ACCESS_KEY_ID": access_key_id, "AWS_SECRET_ACCESS_KEY": secret_access_key } else: env = None sc = init_orca_context(cores=4, spark_log_level="INFO", env=env) yield sc stop_orca_context()
def orca_context_fixture(): sc = init_orca_context(cores=8) def to_array_(v): return v.toArray().tolist() def flatten_(v): result = [] for elem in v: result.extend(elem.toArray().tolist()) return result spark = SparkSession(sc) spark.udf.register("to_array", to_array_, ArrayType(DoubleType())) spark.udf.register("flatten", flatten_, ArrayType(DoubleType())) yield stop_orca_context()
def setUp(self): """ setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ self.sc = init_orca_context(cores=4) def to_array_(v): return v.toArray().tolist() def flatten_(v): result = [] for elem in v: result.extend(elem.toArray().tolist()) return result self.spark = SparkSession(self.sc) self.spark.udf.register("to_array", to_array_, ArrayType(DoubleType())) self.spark.udf.register("flatten", flatten_, ArrayType(DoubleType()))
parser.add_argument('--data_num', type=int, default=12, help="The number of dummy data.") parser.add_argument('--batch_size', type=int, default=4, help="The batch size of inference.") parser.add_argument('--memory', type=str, default="2g", help="The executor memory size.") args = parser.parse_args() if args.cluster_mode == "local": init_orca_context(cores=args.core_num, memory=args.memory) elif args.cluster_mode.startswith("yarn"): init_orca_context(cluster_mode=args.cluster_mode, cores=args.core_num, num_nodes=args.executor_num, memory=args.memory) elif args.cluster_mode == "spark-submit": init_orca_context(cluster_mode=args.cluster_mode) images = [ cv2.imread(file) for file in glob.glob(args.image_folder + "/*.jpg") ] images = [crop(img, 416, 416) for img in images] image_num = len(images) copy_time = math.ceil(args.data_num / image_num) images = images * copy_time
def main(): anchors = yolo_anchors anchor_masks = yolo_anchor_masks parser = argparse.ArgumentParser() parser.add_argument("--data_dir", dest="data_dir", help="Required. The path where data locates.") parser.add_argument( "--output_data", dest="output_data", default=tempfile.mkdtemp(), help="Required. The path where voc parquet data locates.") parser.add_argument("--data_year", dest="data_year", default="2009", help="Required. The voc data date.") parser.add_argument("--split_name_train", dest="split_name_train", default="train", help="Required. Split name.") parser.add_argument("--split_name_test", dest="split_name_test", default="val", help="Required. Split name.") parser.add_argument("--names", dest="names", help="Required. The path where class names locates.") parser.add_argument("--weights", dest="weights", default="./checkpoints/yolov3.weights", help="Required. The path where weights locates.") parser.add_argument("--checkpoint", dest="checkpoint", default="./checkpoints/yolov3.tf", help="Required. The path where checkpoint locates.") parser.add_argument( "--checkpoint_folder", dest="checkpoint_folder", default="./checkpoints", help="Required. The path where saved checkpoint locates.") parser.add_argument("--epochs", dest="epochs", type=int, default=2, help="Required. epochs.") parser.add_argument("--batch_size", dest="batch_size", type=int, default=16, help="Required. epochs.") parser.add_argument( "--cluster_mode", dest="cluster_mode", default="local", help="Required. Run on local/yarn/k8s/spark-submit mode.") parser.add_argument("--class_num", dest="class_num", type=int, default=20, help="Required. class num.") parser.add_argument( "--worker_num", type=int, default=1, help="The number of slave nodes to be used in the cluster." "You can change it depending on your own cluster setting.") parser.add_argument( "--cores", type=int, default=4, help="The number of cpu cores you want to use on each node. " "You can change it depending on your own cluster setting.") parser.add_argument( "--memory", type=str, default="20g", help="The memory you want to use on each node. " "You can change it depending on your own cluster setting.") parser.add_argument( "--object_store_memory", type=str, default="10g", help="The memory you want to use on each node. " "You can change it depending on your own cluster setting.") parser.add_argument("--enable_numa_binding", dest="enable_numa_binding", default=False, help="enable_numa_binding") parser.add_argument('--k8s_master', type=str, default="", help="The k8s master. " "It should be k8s://https://<k8s-apiserver-host>: " "<k8s-apiserver-port>.") parser.add_argument("--container_image", type=str, default="", help="The runtime k8s image. ") parser.add_argument('--k8s_driver_host', type=str, default="", help="The k8s driver localhost.") parser.add_argument('--k8s_driver_port', type=str, default="", help="The k8s driver port.") parser.add_argument('--nfs_mount_path', type=str, default="", help="nfs mount path") options = parser.parse_args() if options.cluster_mode == "local": init_orca_context(cluster_mode="local", cores=options.cores, num_nodes=options.worker_num, memory=options.memory, init_ray_on_spark=True, object_store_memory=options.object_store_memory) elif options.cluster_mode == "k8s": init_orca_context( cluster_mode="k8s", master=options.k8s_master, container_image=options.container_image, init_ray_on_spark=True, enable_numa_binding=options.enable_numa_binding, num_nodes=options.worker_num, cores=options.cores, memory=options.memory, object_store_memory=options.object_store_memory, conf={ "spark.driver.host": options.driver_host, "spark.driver.port": options.driver_port, "spark.kubernetes.executor.volumes.persistentVolumeClaim." "nfsvolumeclaim.options.claimName": "nfsvolumeclaim", "spark.kubernetes.executor.volumes.persistentVolumeClaim." "nfsvolumeclaim.mount.path": options.nfs_mount_path, "spark.kubernetes.driver.volumes.persistentVolumeClaim." "nfsvolumeclaim.options.claimName": "nfsvolumeclaim", "spark.kubernetes.driver.volumes.persistentVolumeClaim." "nfsvolumeclaim.mount.path": options.nfs_mount_path }) elif options.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", cores=options.cores, num_nodes=options.worker_num, memory=options.memory, init_ray_on_spark=True, enable_numa_binding=options.enable_numa_binding, object_store_memory=options.object_store_memory) elif options.cluster_mode == "spark-submit": init_orca_context(cluster_mode="spark-submit") # convert yolov3 weights yolo = YoloV3(classes=80) load_darknet_weights(yolo, options.weights) yolo.save_weights(options.checkpoint) def model_creator(config): model = YoloV3(DEFAULT_IMAGE_SIZE, training=True, classes=options.class_num) anchors = yolo_anchors anchor_masks = yolo_anchor_masks model_pretrained = YoloV3(DEFAULT_IMAGE_SIZE, training=True, classes=80) model_pretrained.load_weights(options.checkpoint) model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) optimizer = tf.keras.optimizers.Adam(lr=1e-3) loss = [ YoloLoss(anchors[mask], classes=options.class_num) for mask in anchor_masks ] model.compile(optimizer=optimizer, loss=loss, run_eagerly=False) return model # prepare data class_map = { name: idx for idx, name in enumerate(open(options.names).read().splitlines()) } dataset_path = os.path.join(options.data_dir, "VOCdevkit") voc_train_path = os.path.join(options.output_data, "train_dataset") voc_val_path = os.path.join(options.output_data, "val_dataset") write_parquet(format="voc", voc_root_path=dataset_path, output_path="file://" + voc_train_path, splits_names=[(options.data_year, options.split_name_train)], classes=class_map) write_parquet(format="voc", voc_root_path=dataset_path, output_path="file://" + voc_val_path, splits_names=[(options.data_year, options.split_name_test)], classes=class_map) output_types = { "image": tf.string, "label": tf.float32, "image_id": tf.string } output_shapes = {"image": (), "label": (None, 5), "image_id": ()} def train_data_creator(config, batch_size): train_dataset = read_parquet(format="tf_dataset", path=voc_train_path, output_types=output_types, output_shapes=output_shapes) train_dataset = train_dataset.map( lambda data_dict: (data_dict["image"], data_dict["label"])) train_dataset = train_dataset.map(parse_data_train) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(batch_size) train_dataset = train_dataset.map(lambda x, y: ( transform_images(x, DEFAULT_IMAGE_SIZE), transform_targets(y, anchors, anchor_masks, DEFAULT_IMAGE_SIZE))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) return train_dataset def val_data_creator(config, batch_size): val_dataset = read_parquet(format="tf_dataset", path=voc_val_path, output_types=output_types, output_shapes=output_shapes) val_dataset = val_dataset.map(lambda data_dict: (data_dict["image"], data_dict["label"])) val_dataset = val_dataset.map(parse_data_train) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( transform_images(x, DEFAULT_IMAGE_SIZE), transform_targets(y, anchors, anchor_masks, DEFAULT_IMAGE_SIZE))) return val_dataset callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint(options.checkpoint_folder + '/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] trainer = Estimator.from_keras(model_creator=model_creator) trainer.fit(train_data_creator, epochs=options.epochs, batch_size=options.batch_size, steps_per_epoch=3473 // options.batch_size, callbacks=callbacks, validation_data=val_data_creator, validation_steps=3581 // options.batch_size) stop_orca_context()
raw_df = pd.read_csv("data/data.csv") df = pd.DataFrame(pd.to_datetime(raw_df.StartTime)) df['AvgRate'] = \ raw_df.AvgRate.apply(lambda x: float(x[:-4]) if x.endswith("Mbps") else float(x[:-4]) * 1000) df["total"] = raw_df["total"] df.set_index("StartTime", inplace=True) full_idx = pd.date_range(start=df.index.min(), end=df.index.max(), freq='2H') df = df.reindex(full_idx) drop_dts, drop_len = get_drop_dates_and_len(df) df = rm_missing_weeks(drop_dts, drop_len, df) df.ffill(inplace=True) df.index.name = "datetime" df = df.reset_index() init_orca_context(cores=4, memory="4g", init_ray_on_spark=True) from bigdl.chronos.autots.deprecated.forecast import AutoTSTrainer from bigdl.chronos.autots.deprecated.config.recipe import * trainer = AutoTSTrainer(dt_col="datetime", target_col=["AvgRate", "total"], horizon=1, extra_features_col=None) look_back = (36, 84) from bigdl.chronos.autots.deprecated.preprocessing.utils import train_val_test_split train_df, val_df, test_df = train_val_test_split(df, val_ratio=0.1, test_ratio=0.1,
def transform_label(tbl): gen_label = lambda x: 1 if max(x) > 0 else 0 tbl = tbl.apply(in_col=timestamp_cols, out_col="label", func=gen_label, dtype="int") return tbl if __name__ == '__main__': args = _parse_args() if args.cluster_mode == "local": init_orca_context("local", cores=args.executor_cores, memory=args.executor_memory) elif args.cluster_mode == "standalone": init_orca_context("standalone", master=args.master, cores=args.executor_cores, num_nodes=args.num_executor, memory=args.executor_memory, driver_cores=args.driver_cores, driver_memory=args.driver_memory, conf=conf) elif args.cluster_mode == "yarn": init_orca_context("yarn-client", cores=args.executor_cores, num_nodes=args.num_executor, memory=args.executor_memory,
parser = config_option_parser() (options, args) = parser.parse_args(sys.argv) if options.folder: write_tfrecord(format="imagenet", imagenet_path=options.folder, output_path=options.imagenet) train_data = train_data_creator( config={"data_dir": os.path.join(options.imagenet, "train")}) val_data = val_data_creator( config={"data_dir": os.path.join(options.imagenet, "validation")}) num_nodes = 1 if options.cluster_mode == "local" else options.worker_num init_orca_context(cluster_mode=options.cluster_mode, cores=options.cores, num_nodes=num_nodes, memory=options.memory) images = tf.placeholder(dtype=tf.float32, shape=(None, 224, 224, 3)) labels = tf.placeholder(dtype=tf.int32, shape=(None)) is_training = tf.placeholder(dtype=tf.bool, shape=()) with slim.arg_scope( inception_v1.inception_v1_arg_scope(weight_decay=0.0, use_batch_norm=False)): logits, end_points = inception_v1.inception_v1(images, dropout_keep_prob=0.6, num_classes=1000, is_training=is_training) probabilities = tf.nn.softmax(logits) print("probabilities", probabilities)
args = parser.parse_args() cluster_mode = args.cluster_mode dataset_dir = args.data_dir if not exists(dataset_dir): makedirs(dataset_dir) zip_file = tf.keras.utils.get_file( origin= "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip", fname="cats_and_dogs_filtered.zip", extract=True, cache_dir=dataset_dir) base_dir, _ = os.path.splitext(zip_file) if cluster_mode == "local": init_orca_context(cluster_mode="local", cores=4, memory="3g") elif cluster_mode == "yarn": additional = "datasets/cats_and_dogs_filtered.zip#" + dataset_dir init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="3g", additional_archive=additional) elif cluster_mode == "spark-submit": init_orca_context(cluster_mode="spark-submit") else: print( "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got " + cluster_mode) train_dir = os.path.join(base_dir, 'train')
parser.add_argument("--epochs", type=int, default=2, help="Max number of epochs to train in each trial.") parser.add_argument( "--n_sampling", type=int, default=1, help="Number of times to sample from the search_space.") args = parser.parse_args() # init_orca_context num_nodes = 1 if args.cluster_mode == "local" else args.num_workers init_orca_context(cluster_mode=args.cluster_mode, cores=args.cores, memory=args.memory, num_nodes=num_nodes, init_ray_on_spark=True) tsdata_train, tsdata_val, tsdata_test = get_tsdata() autoest = AutoTSEstimator(model='lstm', search_space="normal", past_seq_len=40, future_seq_len=1, cpus_per_trial=2, metric='mse', name='auto_lstm') tsppl = autoest.fit(data=tsdata_train, validation_data=tsdata_val, epochs=args.epochs,
def orca_context_fixture(): from bigdl.orca import init_orca_context, stop_orca_context init_orca_context(cores=8, init_ray_on_spark=True, object_store_memory="1g") yield stop_orca_context()
parser.add_argument('-n', '--num_workers', type=int, default=2, help='The number of MXNet workers to be launched.') parser.add_argument('-s', '--num_servers', type=int, help='The number of MXNet servers to be launched. If not specified, ' 'default to be equal to the number of workers.') parser.add_argument('-b', '--batch_size', type=int, default=100, help='The number of samples per gradient update for each worker.') parser.add_argument('-e', '--epochs', type=int, default=10, help='The number of epochs to train the model.') parser.add_argument('-l', '--learning_rate', type=float, default=0.02, help='Learning rate for the LeNet model.') parser.add_argument('--log_interval', type=int, default=20, help='The number of batches to wait before logging throughput and ' 'metrics information during the training process.') opt = parser.parse_args() num_nodes = 1 if opt.cluster_mode == "local" else opt.num_workers init_orca_context(cluster_mode=opt.cluster_mode, cores=opt.cores, num_nodes=num_nodes) config = create_config(optimizer="sgd", optimizer_params={'learning_rate': opt.learning_rate}, log_interval=opt.log_interval, seed=42) estimator = Estimator.from_mxnet(config=config, model_creator=get_model, loss_creator=get_loss, validation_metrics_creator=get_metrics, num_workers=opt.num_workers, num_servers=opt.num_servers, eval_metrics_creator=get_metrics) estimator.fit(data=get_train_data_iter, validation_data=get_test_data_iter, epochs=opt.epochs, batch_size=opt.batch_size) estimator.shutdown() stop_orca_context()
default="local", help='The mode for the Spark cluster. local, yarn or spark-submit.') (options, args) = parser.parse_args(sys.argv) if not options.model_path: parser.print_help() parser.error('model_path is required') if not options.image_path: parser.print_help() parser.error('image_path is required') cluster_mode = options.cluster_mode if cluster_mode == "local": sc = init_orca_context(memory="3g") elif cluster_mode.startswith("yarn"): if cluster_mode == "yarn-client": sc = init_orca_context(cluster_mode="yarn-client", num_nodes=2, memory="3g") else: sc = init_orca_context(cluster_mode="yarn-cluster", num_nodes=2, memory="3g") elif cluster_mode == "spark-submit": sc = init_orca_context(cluster_mode="spark-submit") else: print( "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got " + cluster_mode)
type=int, default=2, help='The number of workers to be launched.') parser.add_argument( '-m', '--mode', type=str, default='gridrandom', choices=['gridrandom', 'skopt', 'sigopt'], help='Search algorithms', ) opt = parser.parse_args() if opt.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=opt.num_workers, cores=opt.cores, init_ray_on_spark=True) else: init_orca_context(cluster_mode=opt.cluster_mode, cores=opt.cores, init_ray_on_spark=True) import pandas as pd df = pd.read_csv(opt.path, encoding='latin-1') df.rename(columns={ " FIPS": "FIPS", "Age-Adjusted Incidence Rate(Ê) - cases per 100,000": "Age-Adjusted Incidence Rate", "Recent 5-Year Trend () in Incidence Rates": "Recent 5-Year Trend"
help='The mode of spark cluster.') parser.add_argument('--backend', type=str, default="bigdl", help='The backend of PyTorch Estimator; ' 'bigdl, torch_distributed and spark are supported.') parser.add_argument('--data_dir', type=str, default="./dataset", help='The path of datesets.') opt = parser.parse_args() print(opt) if opt.cluster_mode == "local": init_orca_context() elif opt.cluster_mode == "yarn": additional = None if not exists( "dataset/BSDS300.zip") else "dataset/BSDS300.zip#dataset" init_orca_context(cluster_mode="yarn-client", cores=4, num_nodes=2, additional_archive=additional) elif opt.cluster_mode == "spark-submit": init_orca_context(cluster_mode="spark-submit") else: print( "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got " + opt.cluster_mode)
# Compute an update and push it to the parameter server. xs, ys = mnist.train.next_batch(batch_size) gradients = net.compute_update(xs, ys) ps.push.remote(keys, gradients) if __name__ == "__main__": args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode == "yarn": sc = init_orca_context( cluster_mode=cluster_mode, cores=args.executor_cores, memory=args.executor_memory, init_ray_on_spark=True, num_executors=args.num_workers, driver_memory=args.driver_memory, driver_cores=args.driver_cores, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray, extra_python_lib=args.extra_python_lib, object_store_memory=args.object_store_memory, additional_archive="MNIST_data.zip#MNIST_data") ray_ctx = OrcaContext.get_ray_context() elif cluster_mode == "local": sc = init_orca_context(cores=args.driver_cores) ray_ctx = OrcaContext.get_ray_context() elif cluster_mode == "spark-submit": sc = init_orca_context(cluster_mode=cluster_mode) ray_ctx = OrcaContext.get_ray_context() else: print( "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got "
default=2, help="The number of cores you want to allocate for each trial.") parser.add_argument('--epochs', type=int, default=1, help="The number of epochs in each trial.") parser.add_argument('--trials', type=int, default=4, help="The number of searching trials.") args = parser.parse_args() if args.cluster_mode == "local": init_orca_context(cluster_mode="local", cores=args.cores, num_nodes=args.num_nodes, memory=args.memory, init_ray_on_spark=True) elif args.cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", cores=args.cores, memory=args.memory, init_ray_on_spark=True) elif args.cluster_mode == "k8s": if not args.k8s_master or not args.container_image \ or not args.k8s_driver_host or not args.k8s_driver_port: parser.print_help() parser.error('k8s_master, container_image,' 'k8s_driver_host/port are required not to be empty') init_orca_context(cluster_mode="k8s", master=args.k8s_master,
# define arg parser object parser = argparse.ArgumentParser(description='ncf dataframe programming') parser.add_argument('--cluster_mode', type=str, default='local', help='Optional values: local, yarn, k8s.') parser.add_argument('--master', type=str, default='master', help='In k8s mode, the parameter master must be passed in.') parser.add_argument('--image_name_k8s', type=str, default='image_name_k8s', help='In k8s mode, the parameter image_name_k8s must be passed in.') args = parser.parse_args() cluster_mode = args.cluster_mode master = args.master image_name_k8s = args.image_name_k8s # recommended to set it to True when running Analytics Zoo in Jupyter notebook OrcaContext.log_output = True # (this will display terminal's stdout and stderr in the Jupyter notebook). if cluster_mode == "local": init_orca_context(cluster_mode="local", cores=1) # run in local mode elif cluster_mode == "yarn": init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="6g") # run on Hadoop YARN cluster elif cluster_mode == "k8s": init_orca_context(cluster_mode="k8s", master=master, container_image=image_name_k8s, num_nodes=1, memory="128g", cores=4) # run in local mode print("INFO 1 cluster_mode_init_success!") # Read in the dataset, and do a little preprocessing new_rating_files="/ppml/trusted-big-data-ml/work/data/ml-1m/ratings_new.dat.2" if not os.path.exists(new_rating_files): print("INFO ERROR ratings_new.dat does not exist") exit(1) # read csv
type=str, default="local", help='The mode for the Spark cluster. local, yarn or spark-submit.') args = parser.parse_args() cluster_mode = args.cluster_mode conf = { "spark.executor.extraJavaOptions": "-Xss512m", "spark.driver.extraJavaOptions": "-Xss512m" } max_features = 2000 max_len = 200 if cluster_mode == "local": sc = init_orca_context(cluster_mode="local", cores=8, memory="100g", driver_memory="20g", conf=conf) elif cluster_mode.startswith("yarn"): if cluster_mode == "yarn_client": sc = init_orca_context(cluster_mode="yarn-client", num_nodes=8, cores=8, memory="100g", driver_memory="20g", conf=conf) else: sc = init_orca_context(cluster_mode="yarn-cluster", num_nodes=8, cores=8, memory="100g",
parser.add_option('--early_stopping', type=int, default=3, dest="early_stopping") parser.add_option('--hidden_units', dest="hidden_units", type=str, help='hidden units for deep mlp', default="1024, 1024") (options, args) = parser.parse_args(sys.argv) options.hidden_units = [int(x) for x in options.hidden_units.split(',')] if options.cluster_mode == "local": init_orca_context("local", cores=options.executor_cores, memory=options.executor_memory, init_ray_on_spark=True) elif options.cluster_mode == "standalone": init_orca_context("standalone", master=options.master, cores=options.executor_cores, num_nodes=options.num_executor, memory=options.executor_memory, driver_cores=options.driver_cores, driver_memory=options.driver_memory, conf=conf, init_ray_on_spark=True) elif options.cluster_mode == "yarn": init_orca_context("yarn-client", cores=options.executor_cores, num_nodes=options.num_executor,
help="The extra executor memory to store some data." "You can change it depending on your own cluster setting.") parser.add_argument("--object_store_memory", type=str, default="4g", help="The memory to store data on local." "You can change it depending on your own cluster setting.") if __name__ == "__main__": args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode.startswith("yarn"): if cluster_mode == "yarn-client": sc = init_orca_context(cluster_mode="yarn-client", cores=args.executor_cores, memory=args.executor_memory, init_ray_on_spark=True, driver_memory=args.driver_memory, driver_cores=args.driver_cores, num_executors=args.slave_num, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray, object_store_memory=args.object_store_memory) else: sc = init_orca_context(cluster_mode="yarn-cluster", cores=args.executor_cores, memory=args.executor_memory, init_ray_on_spark=True, driver_memory=args.driver_memory, driver_cores=args.driver_cores, num_executors=args.slave_num, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray, object_store_memory=args.object_store_memory) ray_ctx = OrcaContext.get_ray_context()
parser.add_argument("--use_dummy_data", action='store_true', default=False, help="Whether to use dummy data") parser.add_argument("--benchmark", action='store_true', default=False) parser.add_argument("--enable_numa_binding", action='store_true', default=False) if __name__ == "__main__": args = parser.parse_args() num_nodes = 1 if args.cluster_mode == "local" else args.worker_num init_orca_context(cluster_mode=args.cluster_mode, cores=args.cores, num_nodes=num_nodes, memory=args.memory, init_ray_on_spark=True, enable_numa_binding=args.enable_numa_binding) if not args.use_dummy_data: assert args.data_dir is not None, "--data_dir must be provided if not using dummy data" if not os.path.exists(args.log_dir): os.mkdir(args.log_dir) from bigdl.orca.learn.tf2 import Estimator import tensorflow as tf global_batch_size = args.worker_num * args.batch_size_per_worker base_batch_size = 256
def setUp(self) -> None: from bigdl.orca import init_orca_context init_orca_context(cores=4, init_ray_on_spark=True)
def setUp(self): """ setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ self.sc = init_orca_context(cores=4)
help='The executor memory.') parser.add_argument('--num_executor', type=int, default=8, help='The number of executor.') parser.add_argument('--driver_cores', type=int, default=4, help='The driver core number.') parser.add_argument('--driver_memory', type=str, default="36g", help='The driver memory.') parser.add_argument('--lr', default=0.001, type=float, help='learning rate') parser.add_argument('--epochs', default=5, type=int, help='train epoch') parser.add_argument('--batch_size', default=8000, type=int, help='batch size') parser.add_argument('--model_dir', default='snapshot', type=str, help='snapshot directory name (default: snapshot)') args = parser.parse_args() if args.cluster_mode == "local": sc = init_orca_context("local", init_ray_on_spark=True) elif args.cluster_mode == "standalone": sc = init_orca_context("standalone", master=args.master, cores=args.executor_cores, num_nodes=args.num_executor, memory=args.executor_memory, driver_cores=args.driver_cores, driver_memory=args.driver_memory, init_ray_on_spark=True) elif args.cluster_mode == "yarn": sc = init_orca_context("yarn-client", cores=args.executor_cores, num_nodes=args.num_executor, memory=args.executor_memory, driver_cores=args.driver_cores, driver_memory=args.driver_memory, object_store_memory="10g", init_ray_on_spark=True) elif args.cluster_mode == "spark-submit": sc = init_orca_context("spark-submit")
save_path = os.path.join(save_path, "saved_data") elif mode == "test": save_path = os.path.join(save_path, "saved_data_test") else: raise ValueError("mode should be either train or test") print("Saving {} data files to {}".format(mode, save_path)) data_tbl.write_parquet(save_path) else: data_tbl.compute() return data_tbl if __name__ == "__main__": args = _parse_args() if args.cluster_mode == "local": init_orca_context("local", cores=args.cores, memory=args.memory) elif args.cluster_mode == "standalone": init_orca_context("standalone", master=args.master, cores=args.cores, num_nodes=args.num_nodes, memory=args.memory, driver_cores=args.driver_cores, driver_memory=args.driver_memory, conf=conf) elif args.cluster_mode == "yarn": init_orca_context("yarn-client", cores=args.cores, num_nodes=args.num_nodes, memory=args.memory, driver_cores=args.driver_cores,
parser.add_argument('--epochs', type=int, default=2, help="Max number of epochs to train in each trial.") parser.add_argument('--workers_per_node', type=int, default=1, help="the number of worker you want to use." "The value defaults to 1. The param is only effective" "when distributed is set to True.") args = parser.parse_args() num_nodes = 1 if args.cluster_mode == 'local' else args.num_workers init_orca_context(cluster_mode=args.cluster_mode, cores=args.cores, memory=args.memory, num_nodes=num_nodes) tsdata_train, tsdata_test = get_tsdata() x_train, y_train = tsdata_train.to_numpy() x_test, y_test = tsdata_test.to_numpy() forecaster = Seq2SeqForecaster(past_seq_len=100, future_seq_len=10, input_feature_num=x_train.shape[-1], output_feature_num=2, metrics=['mse'], distributed=True, workers_per_node=args.workers_per_node, seed=0)