def test_s2s_forecaster_xshard_input(self):
        train_data, val_data, test_data = create_data()
        print("original", train_data[0].dtype)
        init_orca_context(cores=4, memory="2g")
        from bigdl.orca.data import XShards

        def transform_to_dict(data):
            return {'x': data[0], 'y': data[1]}

        def transform_to_dict_x(data):
            return {'x': data[0]}

        train_data = XShards.partition(train_data).transform_shard(
            transform_to_dict)
        val_data = XShards.partition(val_data).transform_shard(
            transform_to_dict)
        test_data = XShards.partition(test_data).transform_shard(
            transform_to_dict_x)
        for distributed in [True, False]:
            forecaster = Seq2SeqForecaster(past_seq_len=24,
                                           future_seq_len=5,
                                           input_feature_num=1,
                                           output_feature_num=1,
                                           loss="mae",
                                           lr=0.01,
                                           distributed=distributed)
            forecaster.fit(train_data, epochs=2)
            distributed_pred = forecaster.predict(test_data)
            distributed_eval = forecaster.evaluate(val_data)
        stop_orca_context()
예제 #2
0
    def test_forecast_tcmf_distributed(self):
        input = dict({'id': self.id, 'y': self.data})

        from bigdl.orca import init_orca_context, stop_orca_context

        init_orca_context(cores=4,
                          spark_log_level="INFO",
                          init_ray_on_spark=True,
                          object_store_memory="1g")
        self.model.fit(input, num_workers=4, **self.fit_params)

        with tempfile.TemporaryDirectory() as tempdirname:
            self.model.save(tempdirname)
            loaded_model = TCMFForecaster.load(tempdirname,
                                               is_xshards_distributed=False)
        yhat = self.model.predict(horizon=self.horizon, num_workers=4)
        yhat_loaded = loaded_model.predict(horizon=self.horizon, num_workers=4)
        yhat_id = yhat_loaded["id"]
        np.testing.assert_equal(yhat_id, self.id)
        yhat = yhat["prediction"]
        yhat_loaded = yhat_loaded["prediction"]
        assert yhat.shape == (self.num_samples, self.horizon)
        np.testing.assert_equal(yhat, yhat_loaded)

        self.model.fit_incremental({'y': self.data_new})
        yhat_incr = self.model.predict(horizon=self.horizon)
        yhat_incr = yhat_incr["prediction"]
        assert yhat_incr.shape == (self.num_samples, self.horizon)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal,
                                 yhat, yhat_incr)

        target_value = dict({"y": self.data_new})
        assert self.model.evaluate(target_value=target_value, metric=['mse'])
        stop_orca_context()
    def test_s2s_forecaster_distributed(self):
        train_data, val_data, test_data = create_data()

        init_orca_context(cores=4, memory="2g")

        forecaster = Seq2SeqForecaster(past_seq_len=24,
                                       future_seq_len=5,
                                       input_feature_num=1,
                                       output_feature_num=1,
                                       loss="mae",
                                       lr=0.01,
                                       distributed=True)

        forecaster.fit(train_data, epochs=2)
        distributed_pred = forecaster.predict(test_data[0])
        distributed_eval = forecaster.evaluate(val_data)

        model = forecaster.get_model()
        assert isinstance(model, torch.nn.Module)

        forecaster.to_local()
        local_pred = forecaster.predict(test_data[0])
        local_eval = forecaster.evaluate(val_data)

        np.testing.assert_almost_equal(distributed_pred, local_pred, decimal=5)

        try:
            import onnx
            import onnxruntime
            local_pred_onnx = forecaster.predict_with_onnx(test_data[0])
            local_eval_onnx = forecaster.evaluate_with_onnx(val_data)
            np.testing.assert_almost_equal(distributed_pred,
                                           local_pred_onnx,
                                           decimal=5)
        except ImportError:
            pass

        model = forecaster.get_model()
        assert isinstance(model, torch.nn.Module)

        stop_orca_context()
예제 #4
0
def friesian_context_fixture(request):
    import os
    from bigdl.orca import OrcaContext, init_orca_context, stop_orca_context
    OrcaContext._eager_mode = True
    access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
    secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
    if access_key_id is not None and secret_access_key is not None:
        env = {
            "AWS_ACCESS_KEY_ID": access_key_id,
            "AWS_SECRET_ACCESS_KEY": secret_access_key
        }
    else:
        env = None
    sc = init_orca_context(cores=4, spark_log_level="INFO", env=env)
    yield sc
    stop_orca_context()
예제 #5
0
def orca_context_fixture():
    sc = init_orca_context(cores=8)

    def to_array_(v):
        return v.toArray().tolist()

    def flatten_(v):
        result = []
        for elem in v:
            result.extend(elem.toArray().tolist())
        return result

    spark = SparkSession(sc)
    spark.udf.register("to_array", to_array_, ArrayType(DoubleType()))
    spark.udf.register("flatten", flatten_, ArrayType(DoubleType()))
    yield
    stop_orca_context()
예제 #6
0
    def setUp(self):
        """ setup any state tied to the execution of the given method in a
        class.  setup_method is invoked for every test method of a class.
        """
        self.sc = init_orca_context(cores=4)

        def to_array_(v):
            return v.toArray().tolist()

        def flatten_(v):
            result = []
            for elem in v:
                result.extend(elem.toArray().tolist())
            return result

        self.spark = SparkSession(self.sc)
        self.spark.udf.register("to_array", to_array_, ArrayType(DoubleType()))
        self.spark.udf.register("flatten", flatten_, ArrayType(DoubleType()))
예제 #7
0
    parser.add_argument('--data_num',
                        type=int,
                        default=12,
                        help="The number of dummy data.")
    parser.add_argument('--batch_size',
                        type=int,
                        default=4,
                        help="The batch size of inference.")
    parser.add_argument('--memory',
                        type=str,
                        default="2g",
                        help="The executor memory size.")
    args = parser.parse_args()

    if args.cluster_mode == "local":
        init_orca_context(cores=args.core_num, memory=args.memory)
    elif args.cluster_mode.startswith("yarn"):
        init_orca_context(cluster_mode=args.cluster_mode,
                          cores=args.core_num,
                          num_nodes=args.executor_num,
                          memory=args.memory)
    elif args.cluster_mode == "spark-submit":
        init_orca_context(cluster_mode=args.cluster_mode)

    images = [
        cv2.imread(file) for file in glob.glob(args.image_folder + "/*.jpg")
    ]
    images = [crop(img, 416, 416) for img in images]
    image_num = len(images)
    copy_time = math.ceil(args.data_num / image_num)
    images = images * copy_time
예제 #8
0
def main():
    anchors = yolo_anchors
    anchor_masks = yolo_anchor_masks

    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir",
                        dest="data_dir",
                        help="Required. The path where data locates.")
    parser.add_argument(
        "--output_data",
        dest="output_data",
        default=tempfile.mkdtemp(),
        help="Required. The path where voc parquet data locates.")
    parser.add_argument("--data_year",
                        dest="data_year",
                        default="2009",
                        help="Required. The voc data date.")
    parser.add_argument("--split_name_train",
                        dest="split_name_train",
                        default="train",
                        help="Required. Split name.")
    parser.add_argument("--split_name_test",
                        dest="split_name_test",
                        default="val",
                        help="Required. Split name.")
    parser.add_argument("--names",
                        dest="names",
                        help="Required. The path where class names locates.")
    parser.add_argument("--weights",
                        dest="weights",
                        default="./checkpoints/yolov3.weights",
                        help="Required. The path where weights locates.")
    parser.add_argument("--checkpoint",
                        dest="checkpoint",
                        default="./checkpoints/yolov3.tf",
                        help="Required. The path where checkpoint locates.")
    parser.add_argument(
        "--checkpoint_folder",
        dest="checkpoint_folder",
        default="./checkpoints",
        help="Required. The path where saved checkpoint locates.")
    parser.add_argument("--epochs",
                        dest="epochs",
                        type=int,
                        default=2,
                        help="Required. epochs.")
    parser.add_argument("--batch_size",
                        dest="batch_size",
                        type=int,
                        default=16,
                        help="Required. epochs.")
    parser.add_argument(
        "--cluster_mode",
        dest="cluster_mode",
        default="local",
        help="Required. Run on local/yarn/k8s/spark-submit mode.")
    parser.add_argument("--class_num",
                        dest="class_num",
                        type=int,
                        default=20,
                        help="Required. class num.")
    parser.add_argument(
        "--worker_num",
        type=int,
        default=1,
        help="The number of slave nodes to be used in the cluster."
        "You can change it depending on your own cluster setting.")
    parser.add_argument(
        "--cores",
        type=int,
        default=4,
        help="The number of cpu cores you want to use on each node. "
        "You can change it depending on your own cluster setting.")
    parser.add_argument(
        "--memory",
        type=str,
        default="20g",
        help="The memory you want to use on each node. "
        "You can change it depending on your own cluster setting.")
    parser.add_argument(
        "--object_store_memory",
        type=str,
        default="10g",
        help="The memory you want to use on each node. "
        "You can change it depending on your own cluster setting.")
    parser.add_argument("--enable_numa_binding",
                        dest="enable_numa_binding",
                        default=False,
                        help="enable_numa_binding")
    parser.add_argument('--k8s_master',
                        type=str,
                        default="",
                        help="The k8s master. "
                        "It should be k8s://https://<k8s-apiserver-host>: "
                        "<k8s-apiserver-port>.")
    parser.add_argument("--container_image",
                        type=str,
                        default="",
                        help="The runtime k8s image. ")
    parser.add_argument('--k8s_driver_host',
                        type=str,
                        default="",
                        help="The k8s driver localhost.")
    parser.add_argument('--k8s_driver_port',
                        type=str,
                        default="",
                        help="The k8s driver port.")
    parser.add_argument('--nfs_mount_path',
                        type=str,
                        default="",
                        help="nfs mount path")

    options = parser.parse_args()

    if options.cluster_mode == "local":
        init_orca_context(cluster_mode="local",
                          cores=options.cores,
                          num_nodes=options.worker_num,
                          memory=options.memory,
                          init_ray_on_spark=True,
                          object_store_memory=options.object_store_memory)
    elif options.cluster_mode == "k8s":
        init_orca_context(
            cluster_mode="k8s",
            master=options.k8s_master,
            container_image=options.container_image,
            init_ray_on_spark=True,
            enable_numa_binding=options.enable_numa_binding,
            num_nodes=options.worker_num,
            cores=options.cores,
            memory=options.memory,
            object_store_memory=options.object_store_memory,
            conf={
                "spark.driver.host":
                options.driver_host,
                "spark.driver.port":
                options.driver_port,
                "spark.kubernetes.executor.volumes.persistentVolumeClaim."
                "nfsvolumeclaim.options.claimName":
                "nfsvolumeclaim",
                "spark.kubernetes.executor.volumes.persistentVolumeClaim."
                "nfsvolumeclaim.mount.path":
                options.nfs_mount_path,
                "spark.kubernetes.driver.volumes.persistentVolumeClaim."
                "nfsvolumeclaim.options.claimName":
                "nfsvolumeclaim",
                "spark.kubernetes.driver.volumes.persistentVolumeClaim."
                "nfsvolumeclaim.mount.path":
                options.nfs_mount_path
            })
    elif options.cluster_mode == "yarn":
        init_orca_context(cluster_mode="yarn-client",
                          cores=options.cores,
                          num_nodes=options.worker_num,
                          memory=options.memory,
                          init_ray_on_spark=True,
                          enable_numa_binding=options.enable_numa_binding,
                          object_store_memory=options.object_store_memory)
    elif options.cluster_mode == "spark-submit":
        init_orca_context(cluster_mode="spark-submit")
    # convert yolov3 weights
    yolo = YoloV3(classes=80)
    load_darknet_weights(yolo, options.weights)
    yolo.save_weights(options.checkpoint)

    def model_creator(config):
        model = YoloV3(DEFAULT_IMAGE_SIZE,
                       training=True,
                       classes=options.class_num)
        anchors = yolo_anchors
        anchor_masks = yolo_anchor_masks

        model_pretrained = YoloV3(DEFAULT_IMAGE_SIZE,
                                  training=True,
                                  classes=80)
        model_pretrained.load_weights(options.checkpoint)

        model.get_layer('yolo_darknet').set_weights(
            model_pretrained.get_layer('yolo_darknet').get_weights())
        freeze_all(model.get_layer('yolo_darknet'))

        optimizer = tf.keras.optimizers.Adam(lr=1e-3)
        loss = [
            YoloLoss(anchors[mask], classes=options.class_num)
            for mask in anchor_masks
        ]
        model.compile(optimizer=optimizer, loss=loss, run_eagerly=False)
        return model

    # prepare data
    class_map = {
        name: idx
        for idx, name in enumerate(open(options.names).read().splitlines())
    }
    dataset_path = os.path.join(options.data_dir, "VOCdevkit")
    voc_train_path = os.path.join(options.output_data, "train_dataset")
    voc_val_path = os.path.join(options.output_data, "val_dataset")

    write_parquet(format="voc",
                  voc_root_path=dataset_path,
                  output_path="file://" + voc_train_path,
                  splits_names=[(options.data_year, options.split_name_train)],
                  classes=class_map)
    write_parquet(format="voc",
                  voc_root_path=dataset_path,
                  output_path="file://" + voc_val_path,
                  splits_names=[(options.data_year, options.split_name_test)],
                  classes=class_map)

    output_types = {
        "image": tf.string,
        "label": tf.float32,
        "image_id": tf.string
    }
    output_shapes = {"image": (), "label": (None, 5), "image_id": ()}

    def train_data_creator(config, batch_size):
        train_dataset = read_parquet(format="tf_dataset",
                                     path=voc_train_path,
                                     output_types=output_types,
                                     output_shapes=output_shapes)
        train_dataset = train_dataset.map(
            lambda data_dict: (data_dict["image"], data_dict["label"]))
        train_dataset = train_dataset.map(parse_data_train)
        train_dataset = train_dataset.shuffle(buffer_size=512)
        train_dataset = train_dataset.batch(batch_size)
        train_dataset = train_dataset.map(lambda x, y: (
            transform_images(x, DEFAULT_IMAGE_SIZE),
            transform_targets(y, anchors, anchor_masks, DEFAULT_IMAGE_SIZE)))
        train_dataset = train_dataset.prefetch(
            buffer_size=tf.data.experimental.AUTOTUNE)
        return train_dataset

    def val_data_creator(config, batch_size):
        val_dataset = read_parquet(format="tf_dataset",
                                   path=voc_val_path,
                                   output_types=output_types,
                                   output_shapes=output_shapes)
        val_dataset = val_dataset.map(lambda data_dict:
                                      (data_dict["image"], data_dict["label"]))
        val_dataset = val_dataset.map(parse_data_train)
        val_dataset = val_dataset.batch(batch_size)
        val_dataset = val_dataset.map(lambda x, y: (
            transform_images(x, DEFAULT_IMAGE_SIZE),
            transform_targets(y, anchors, anchor_masks, DEFAULT_IMAGE_SIZE)))
        return val_dataset

    callbacks = [
        ReduceLROnPlateau(verbose=1),
        EarlyStopping(patience=3, verbose=1),
        ModelCheckpoint(options.checkpoint_folder + '/yolov3_train_{epoch}.tf',
                        verbose=1,
                        save_weights_only=True),
        TensorBoard(log_dir='logs')
    ]

    trainer = Estimator.from_keras(model_creator=model_creator)

    trainer.fit(train_data_creator,
                epochs=options.epochs,
                batch_size=options.batch_size,
                steps_per_epoch=3473 // options.batch_size,
                callbacks=callbacks,
                validation_data=val_data_creator,
                validation_steps=3581 // options.batch_size)
    stop_orca_context()
예제 #9
0
raw_df = pd.read_csv("data/data.csv")

df = pd.DataFrame(pd.to_datetime(raw_df.StartTime))
df['AvgRate'] = \
    raw_df.AvgRate.apply(lambda x: float(x[:-4]) if x.endswith("Mbps") else float(x[:-4]) * 1000)
df["total"] = raw_df["total"]
df.set_index("StartTime", inplace=True)
full_idx = pd.date_range(start=df.index.min(), end=df.index.max(), freq='2H')
df = df.reindex(full_idx)
drop_dts, drop_len = get_drop_dates_and_len(df)
df = rm_missing_weeks(drop_dts, drop_len, df)
df.ffill(inplace=True)
df.index.name = "datetime"
df = df.reset_index()

init_orca_context(cores=4, memory="4g", init_ray_on_spark=True)

from bigdl.chronos.autots.deprecated.forecast import AutoTSTrainer
from bigdl.chronos.autots.deprecated.config.recipe import *

trainer = AutoTSTrainer(dt_col="datetime",
                        target_col=["AvgRate", "total"],
                        horizon=1,
                        extra_features_col=None)

look_back = (36, 84)
from bigdl.chronos.autots.deprecated.preprocessing.utils import train_val_test_split

train_df, val_df, test_df = train_val_test_split(df,
                                                 val_ratio=0.1,
                                                 test_ratio=0.1,
예제 #10
0

def transform_label(tbl):
    gen_label = lambda x: 1 if max(x) > 0 else 0
    tbl = tbl.apply(in_col=timestamp_cols,
                    out_col="label",
                    func=gen_label,
                    dtype="int")
    return tbl


if __name__ == '__main__':
    args = _parse_args()
    if args.cluster_mode == "local":
        init_orca_context("local",
                          cores=args.executor_cores,
                          memory=args.executor_memory)
    elif args.cluster_mode == "standalone":
        init_orca_context("standalone",
                          master=args.master,
                          cores=args.executor_cores,
                          num_nodes=args.num_executor,
                          memory=args.executor_memory,
                          driver_cores=args.driver_cores,
                          driver_memory=args.driver_memory,
                          conf=conf)
    elif args.cluster_mode == "yarn":
        init_orca_context("yarn-client",
                          cores=args.executor_cores,
                          num_nodes=args.num_executor,
                          memory=args.executor_memory,
예제 #11
0
    parser = config_option_parser()
    (options, args) = parser.parse_args(sys.argv)

    if options.folder:
        write_tfrecord(format="imagenet",
                       imagenet_path=options.folder,
                       output_path=options.imagenet)

    train_data = train_data_creator(
        config={"data_dir": os.path.join(options.imagenet, "train")})
    val_data = val_data_creator(
        config={"data_dir": os.path.join(options.imagenet, "validation")})

    num_nodes = 1 if options.cluster_mode == "local" else options.worker_num
    init_orca_context(cluster_mode=options.cluster_mode,
                      cores=options.cores,
                      num_nodes=num_nodes,
                      memory=options.memory)

    images = tf.placeholder(dtype=tf.float32, shape=(None, 224, 224, 3))
    labels = tf.placeholder(dtype=tf.int32, shape=(None))
    is_training = tf.placeholder(dtype=tf.bool, shape=())

    with slim.arg_scope(
            inception_v1.inception_v1_arg_scope(weight_decay=0.0,
                                                use_batch_norm=False)):
        logits, end_points = inception_v1.inception_v1(images,
                                                       dropout_keep_prob=0.6,
                                                       num_classes=1000,
                                                       is_training=is_training)
        probabilities = tf.nn.softmax(logits)
        print("probabilities", probabilities)
예제 #12
0
args = parser.parse_args()
cluster_mode = args.cluster_mode

dataset_dir = args.data_dir
if not exists(dataset_dir):
    makedirs(dataset_dir)
zip_file = tf.keras.utils.get_file(
    origin=
    "https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip",
    fname="cats_and_dogs_filtered.zip",
    extract=True,
    cache_dir=dataset_dir)
base_dir, _ = os.path.splitext(zip_file)

if cluster_mode == "local":
    init_orca_context(cluster_mode="local", cores=4, memory="3g")
elif cluster_mode == "yarn":
    additional = "datasets/cats_and_dogs_filtered.zip#" + dataset_dir
    init_orca_context(cluster_mode="yarn-client",
                      num_nodes=2,
                      cores=2,
                      driver_memory="3g",
                      additional_archive=additional)
elif cluster_mode == "spark-submit":
    init_orca_context(cluster_mode="spark-submit")
else:
    print(
        "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got "
        + cluster_mode)

train_dir = os.path.join(base_dir, 'train')
    parser.add_argument("--epochs",
                        type=int,
                        default=2,
                        help="Max number of epochs to train in each trial.")
    parser.add_argument(
        "--n_sampling",
        type=int,
        default=1,
        help="Number of times to sample from the search_space.")
    args = parser.parse_args()

    # init_orca_context
    num_nodes = 1 if args.cluster_mode == "local" else args.num_workers
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      memory=args.memory,
                      num_nodes=num_nodes,
                      init_ray_on_spark=True)
    tsdata_train, tsdata_val, tsdata_test = get_tsdata()

    autoest = AutoTSEstimator(model='lstm',
                              search_space="normal",
                              past_seq_len=40,
                              future_seq_len=1,
                              cpus_per_trial=2,
                              metric='mse',
                              name='auto_lstm')

    tsppl = autoest.fit(data=tsdata_train,
                        validation_data=tsdata_val,
                        epochs=args.epochs,
예제 #14
0
def orca_context_fixture():
    from bigdl.orca import init_orca_context, stop_orca_context
    init_orca_context(cores=8, init_ray_on_spark=True,
                      object_store_memory="1g")
    yield
    stop_orca_context()
예제 #15
0
    parser.add_argument('-n', '--num_workers', type=int, default=2,
                        help='The number of MXNet workers to be launched.')
    parser.add_argument('-s', '--num_servers', type=int,
                        help='The number of MXNet servers to be launched. If not specified, '
                        'default to be equal to the number of workers.')
    parser.add_argument('-b', '--batch_size', type=int, default=100,
                        help='The number of samples per gradient update for each worker.')
    parser.add_argument('-e', '--epochs', type=int, default=10,
                        help='The number of epochs to train the model.')
    parser.add_argument('-l', '--learning_rate', type=float, default=0.02,
                        help='Learning rate for the LeNet model.')
    parser.add_argument('--log_interval', type=int, default=20,
                        help='The number of batches to wait before logging throughput and '
                             'metrics information during the training process.')
    opt = parser.parse_args()

    num_nodes = 1 if opt.cluster_mode == "local" else opt.num_workers
    init_orca_context(cluster_mode=opt.cluster_mode, cores=opt.cores, num_nodes=num_nodes)

    config = create_config(optimizer="sgd",
                           optimizer_params={'learning_rate': opt.learning_rate},
                           log_interval=opt.log_interval, seed=42)
    estimator = Estimator.from_mxnet(config=config, model_creator=get_model,
                                     loss_creator=get_loss, validation_metrics_creator=get_metrics,
                                     num_workers=opt.num_workers, num_servers=opt.num_servers,
                                     eval_metrics_creator=get_metrics)
    estimator.fit(data=get_train_data_iter, validation_data=get_test_data_iter,
                  epochs=opt.epochs, batch_size=opt.batch_size)
    estimator.shutdown()
    stop_orca_context()
    
예제 #16
0
        default="local",
        help='The mode for the Spark cluster. local, yarn or spark-submit.')

    (options, args) = parser.parse_args(sys.argv)

    if not options.model_path:
        parser.print_help()
        parser.error('model_path is required')

    if not options.image_path:
        parser.print_help()
        parser.error('image_path is required')

    cluster_mode = options.cluster_mode
    if cluster_mode == "local":
        sc = init_orca_context(memory="3g")
    elif cluster_mode.startswith("yarn"):
        if cluster_mode == "yarn-client":
            sc = init_orca_context(cluster_mode="yarn-client",
                                   num_nodes=2,
                                   memory="3g")
        else:
            sc = init_orca_context(cluster_mode="yarn-cluster",
                                   num_nodes=2,
                                   memory="3g")
    elif cluster_mode == "spark-submit":
        sc = init_orca_context(cluster_mode="spark-submit")
    else:
        print(
            "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got "
            + cluster_mode)
예제 #17
0
                        type=int,
                        default=2,
                        help='The number of workers to be launched.')
    parser.add_argument(
        '-m',
        '--mode',
        type=str,
        default='gridrandom',
        choices=['gridrandom', 'skopt', 'sigopt'],
        help='Search algorithms',
    )

    opt = parser.parse_args()
    if opt.cluster_mode == "yarn":
        init_orca_context(cluster_mode="yarn-client",
                          num_nodes=opt.num_workers,
                          cores=opt.cores,
                          init_ray_on_spark=True)
    else:
        init_orca_context(cluster_mode=opt.cluster_mode,
                          cores=opt.cores,
                          init_ray_on_spark=True)

    import pandas as pd
    df = pd.read_csv(opt.path, encoding='latin-1')
    df.rename(columns={
        " FIPS":
        "FIPS",
        "Age-Adjusted Incidence Rate(Ê) - cases per 100,000":
        "Age-Adjusted Incidence Rate",
        "Recent 5-Year Trend (ˆ) in Incidence Rates":
        "Recent 5-Year Trend"
예제 #18
0
                    help='The mode of spark cluster.')
parser.add_argument('--backend',
                    type=str,
                    default="bigdl",
                    help='The backend of PyTorch Estimator; '
                    'bigdl, torch_distributed and spark are supported.')
parser.add_argument('--data_dir',
                    type=str,
                    default="./dataset",
                    help='The path of datesets.')
opt = parser.parse_args()

print(opt)

if opt.cluster_mode == "local":
    init_orca_context()
elif opt.cluster_mode == "yarn":
    additional = None if not exists(
        "dataset/BSDS300.zip") else "dataset/BSDS300.zip#dataset"
    init_orca_context(cluster_mode="yarn-client",
                      cores=4,
                      num_nodes=2,
                      additional_archive=additional)
elif opt.cluster_mode == "spark-submit":
    init_orca_context(cluster_mode="spark-submit")
else:
    print(
        "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got "
        + opt.cluster_mode)

        # Compute an update and push it to the parameter server.
        xs, ys = mnist.train.next_batch(batch_size)
        gradients = net.compute_update(xs, ys)
        ps.push.remote(keys, gradients)


if __name__ == "__main__":
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode == "yarn":
        sc = init_orca_context(
            cluster_mode=cluster_mode,
            cores=args.executor_cores,
            memory=args.executor_memory,
            init_ray_on_spark=True,
            num_executors=args.num_workers,
            driver_memory=args.driver_memory,
            driver_cores=args.driver_cores,
            extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
            extra_python_lib=args.extra_python_lib,
            object_store_memory=args.object_store_memory,
            additional_archive="MNIST_data.zip#MNIST_data")
        ray_ctx = OrcaContext.get_ray_context()
    elif cluster_mode == "local":
        sc = init_orca_context(cores=args.driver_cores)
        ray_ctx = OrcaContext.get_ray_context()
    elif cluster_mode == "spark-submit":
        sc = init_orca_context(cluster_mode=cluster_mode)
        ray_ctx = OrcaContext.get_ray_context()
    else:
        print(
            "init_orca_context failed. cluster_mode should be one of 'local', 'yarn' and 'spark-submit' but got "
예제 #20
0
        default=2,
        help="The number of cores you want to allocate for each trial.")
    parser.add_argument('--epochs',
                        type=int,
                        default=1,
                        help="The number of epochs in each trial.")
    parser.add_argument('--trials',
                        type=int,
                        default=4,
                        help="The number of searching trials.")

    args = parser.parse_args()
    if args.cluster_mode == "local":
        init_orca_context(cluster_mode="local",
                          cores=args.cores,
                          num_nodes=args.num_nodes,
                          memory=args.memory,
                          init_ray_on_spark=True)
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode="yarn-client",
                          cores=args.cores,
                          memory=args.memory,
                          init_ray_on_spark=True)
    elif args.cluster_mode == "k8s":
        if not args.k8s_master or not args.container_image \
                or not args.k8s_driver_host or not args.k8s_driver_port:
            parser.print_help()
            parser.error('k8s_master, container_image,'
                         'k8s_driver_host/port are required not to be empty')
        init_orca_context(cluster_mode="k8s",
                          master=args.k8s_master,
예제 #21
0
# define arg parser object
parser = argparse.ArgumentParser(description='ncf dataframe programming')
parser.add_argument('--cluster_mode', type=str, default='local', help='Optional values: local, yarn, k8s.')
parser.add_argument('--master', type=str, default='master', help='In k8s mode, the parameter master must be passed in.')
parser.add_argument('--image_name_k8s', type=str, default='image_name_k8s', help='In k8s mode, the parameter image_name_k8s must be passed in.')

args = parser.parse_args()
cluster_mode = args.cluster_mode
master = args.master
image_name_k8s = args.image_name_k8s

# recommended to set it to True when running Analytics Zoo in Jupyter notebook
OrcaContext.log_output = True # (this will display terminal's stdout and stderr in the Jupyter notebook).

if cluster_mode == "local":
    init_orca_context(cluster_mode="local", cores=1) # run in local mode
elif cluster_mode == "yarn":
    init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="6g") # run on Hadoop YARN cluster
elif cluster_mode == "k8s":
    init_orca_context(cluster_mode="k8s", master=master,
            container_image=image_name_k8s, num_nodes=1, memory="128g", cores=4) # run in local mode

print("INFO 1 cluster_mode_init_success!")

# Read in the dataset, and do a little preprocessing
new_rating_files="/ppml/trusted-big-data-ml/work/data/ml-1m/ratings_new.dat.2"
if not os.path.exists(new_rating_files):
        print("INFO ERROR ratings_new.dat does not exist")
        exit(1)

# read csv
예제 #22
0
    type=str,
    default="local",
    help='The mode for the Spark cluster. local, yarn or spark-submit.')
args = parser.parse_args()
cluster_mode = args.cluster_mode
conf = {
    "spark.executor.extraJavaOptions": "-Xss512m",
    "spark.driver.extraJavaOptions": "-Xss512m"
}
max_features = 2000
max_len = 200

if cluster_mode == "local":
    sc = init_orca_context(cluster_mode="local",
                           cores=8,
                           memory="100g",
                           driver_memory="20g",
                           conf=conf)
elif cluster_mode.startswith("yarn"):
    if cluster_mode == "yarn_client":
        sc = init_orca_context(cluster_mode="yarn-client",
                               num_nodes=8,
                               cores=8,
                               memory="100g",
                               driver_memory="20g",
                               conf=conf)
    else:
        sc = init_orca_context(cluster_mode="yarn-cluster",
                               num_nodes=8,
                               cores=8,
                               memory="100g",
예제 #23
0
    parser.add_option('--early_stopping',
                      type=int,
                      default=3,
                      dest="early_stopping")
    parser.add_option('--hidden_units',
                      dest="hidden_units",
                      type=str,
                      help='hidden units for deep mlp',
                      default="1024, 1024")

    (options, args) = parser.parse_args(sys.argv)
    options.hidden_units = [int(x) for x in options.hidden_units.split(',')]

    if options.cluster_mode == "local":
        init_orca_context("local",
                          cores=options.executor_cores,
                          memory=options.executor_memory,
                          init_ray_on_spark=True)
    elif options.cluster_mode == "standalone":
        init_orca_context("standalone",
                          master=options.master,
                          cores=options.executor_cores,
                          num_nodes=options.num_executor,
                          memory=options.executor_memory,
                          driver_cores=options.driver_cores,
                          driver_memory=options.driver_memory,
                          conf=conf,
                          init_ray_on_spark=True)
    elif options.cluster_mode == "yarn":
        init_orca_context("yarn-client",
                          cores=options.executor_cores,
                          num_nodes=options.num_executor,
                    help="The extra executor memory to store some data."
                         "You can change it depending on your own cluster setting.")
parser.add_argument("--object_store_memory", type=str, default="4g",
                    help="The memory to store data on local."
                         "You can change it depending on your own cluster setting.")

if __name__ == "__main__":
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode.startswith("yarn"):
        if cluster_mode == "yarn-client":
            sc = init_orca_context(cluster_mode="yarn-client",
                                cores=args.executor_cores,
                                memory=args.executor_memory,
                                init_ray_on_spark=True,
                                driver_memory=args.driver_memory,
                                driver_cores=args.driver_cores,
                                num_executors=args.slave_num,
                                extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
                                object_store_memory=args.object_store_memory)
        else:
            sc = init_orca_context(cluster_mode="yarn-cluster",
                                cores=args.executor_cores,
                                memory=args.executor_memory,
                                init_ray_on_spark=True,
                                driver_memory=args.driver_memory,
                                driver_cores=args.driver_cores,
                                num_executors=args.slave_num,
                                extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
                                object_store_memory=args.object_store_memory)
        ray_ctx = OrcaContext.get_ray_context()
예제 #25
0
parser.add_argument("--use_dummy_data",
                    action='store_true',
                    default=False,
                    help="Whether to use dummy data")
parser.add_argument("--benchmark", action='store_true', default=False)
parser.add_argument("--enable_numa_binding",
                    action='store_true',
                    default=False)

if __name__ == "__main__":

    args = parser.parse_args()
    num_nodes = 1 if args.cluster_mode == "local" else args.worker_num
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      num_nodes=num_nodes,
                      memory=args.memory,
                      init_ray_on_spark=True,
                      enable_numa_binding=args.enable_numa_binding)

    if not args.use_dummy_data:
        assert args.data_dir is not None, "--data_dir must be provided if not using dummy data"

    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)

    from bigdl.orca.learn.tf2 import Estimator
    import tensorflow as tf

    global_batch_size = args.worker_num * args.batch_size_per_worker

    base_batch_size = 256
 def setUp(self) -> None:
     from bigdl.orca import init_orca_context
     init_orca_context(cores=4, init_ray_on_spark=True)
예제 #27
0
 def setUp(self):
     """ setup any state tied to the execution of the given method in a
     class.  setup_method is invoked for every test method of a class.
     """
     self.sc = init_orca_context(cores=4)
예제 #28
0
                        help='The executor memory.')
    parser.add_argument('--num_executor', type=int, default=8,
                        help='The number of executor.')
    parser.add_argument('--driver_cores', type=int, default=4,
                        help='The driver core number.')
    parser.add_argument('--driver_memory', type=str, default="36g",
                        help='The driver memory.')
    parser.add_argument('--lr', default=0.001, type=float, help='learning rate')
    parser.add_argument('--epochs', default=5, type=int, help='train epoch')
    parser.add_argument('--batch_size', default=8000, type=int, help='batch size')
    parser.add_argument('--model_dir', default='snapshot', type=str,
                        help='snapshot directory name (default: snapshot)')
    args = parser.parse_args()

    if args.cluster_mode == "local":
        sc = init_orca_context("local", init_ray_on_spark=True)
    elif args.cluster_mode == "standalone":
        sc = init_orca_context("standalone", master=args.master,
                               cores=args.executor_cores, num_nodes=args.num_executor,
                               memory=args.executor_memory,
                               driver_cores=args.driver_cores, driver_memory=args.driver_memory,
                               init_ray_on_spark=True)
    elif args.cluster_mode == "yarn":
        sc = init_orca_context("yarn-client", cores=args.executor_cores,
                               num_nodes=args.num_executor, memory=args.executor_memory,
                               driver_cores=args.driver_cores, driver_memory=args.driver_memory,
                               object_store_memory="10g",
                               init_ray_on_spark=True)
    elif args.cluster_mode == "spark-submit":
        sc = init_orca_context("spark-submit")
예제 #29
0
            save_path = os.path.join(save_path, "saved_data")
        elif mode == "test":
            save_path = os.path.join(save_path, "saved_data_test")
        else:
            raise ValueError("mode should be either train or test")
        print("Saving {} data files to {}".format(mode, save_path))
        data_tbl.write_parquet(save_path)
    else:
        data_tbl.compute()
    return data_tbl


if __name__ == "__main__":
    args = _parse_args()
    if args.cluster_mode == "local":
        init_orca_context("local", cores=args.cores, memory=args.memory)
    elif args.cluster_mode == "standalone":
        init_orca_context("standalone",
                          master=args.master,
                          cores=args.cores,
                          num_nodes=args.num_nodes,
                          memory=args.memory,
                          driver_cores=args.driver_cores,
                          driver_memory=args.driver_memory,
                          conf=conf)
    elif args.cluster_mode == "yarn":
        init_orca_context("yarn-client",
                          cores=args.cores,
                          num_nodes=args.num_nodes,
                          memory=args.memory,
                          driver_cores=args.driver_cores,
예제 #30
0
    parser.add_argument('--epochs',
                        type=int,
                        default=2,
                        help="Max number of epochs to train in each trial.")
    parser.add_argument('--workers_per_node',
                        type=int,
                        default=1,
                        help="the number of worker you want to use."
                        "The value defaults to 1. The param is only effective"
                        "when distributed is set to True.")

    args = parser.parse_args()
    num_nodes = 1 if args.cluster_mode == 'local' else args.num_workers
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      memory=args.memory,
                      num_nodes=num_nodes)

    tsdata_train, tsdata_test = get_tsdata()
    x_train, y_train = tsdata_train.to_numpy()
    x_test, y_test = tsdata_test.to_numpy()

    forecaster = Seq2SeqForecaster(past_seq_len=100,
                                   future_seq_len=10,
                                   input_feature_num=x_train.shape[-1],
                                   output_feature_num=2,
                                   metrics=['mse'],
                                   distributed=True,
                                   workers_per_node=args.workers_per_node,
                                   seed=0)