def test_tcn_forecaster_xshard_input(self):
        train_data, val_data, test_data = create_data()
        print("original", train_data[0].dtype)
        init_orca_context(cores=4, memory="2g")
        from zoo.orca.data import XShards

        def transform_to_dict(data):
            return {'x': data[0], 'y': data[1]}

        def transform_to_dict_x(data):
            return {'x': data[0]}

        train_data = XShards.partition(train_data).transform_shard(
            transform_to_dict)
        val_data = XShards.partition(val_data).transform_shard(
            transform_to_dict)
        test_data = XShards.partition(test_data).transform_shard(
            transform_to_dict_x)
        for distributed in [True, False]:
            forecaster = LSTMForecaster(past_seq_len=24,
                                        input_feature_num=2,
                                        output_feature_num=2,
                                        loss="mae",
                                        lr=0.01,
                                        distributed=distributed)
            forecaster.fit(train_data, epochs=2)
            distributed_pred = forecaster.predict(test_data)
            distributed_eval = forecaster.evaluate(val_data)
        stop_orca_context()
Ejemplo n.º 2
0
def orca_context_fixture():
    from zoo.orca import init_orca_context, stop_orca_context
    init_orca_context(cores=8,
                      init_ray_on_spark=True,
                      object_store_memory="1g")
    yield
    stop_orca_context()
    def test_forecast_tcmf_distributed(self):
        input = dict({'id': self.id, 'y': self.data})

        from zoo.orca import init_orca_context, stop_orca_context

        init_orca_context(cores=4, spark_log_level="INFO", init_ray_on_spark=True,
                          object_store_memory="1g")
        self.model.fit(input, num_workers=4, **self.fit_params)

        with tempfile.TemporaryDirectory() as tempdirname:
            self.model.save(tempdirname)
            loaded_model = TCMFForecaster.load(tempdirname, is_xshards_distributed=False)
        yhat = self.model.predict(horizon=self.horizon, num_workers=4)
        yhat_loaded = loaded_model.predict(horizon=self.horizon, num_workers=4)
        yhat_id = yhat_loaded["id"]
        np.testing.assert_equal(yhat_id, self.id)
        yhat = yhat["prediction"]
        yhat_loaded = yhat_loaded["prediction"]
        assert yhat.shape == (self.num_samples, self.horizon)
        np.testing.assert_equal(yhat, yhat_loaded)

        self.model.fit_incremental({'y': self.data_new})
        yhat_incr = self.model.predict(horizon=self.horizon)
        yhat_incr = yhat_incr["prediction"]
        assert yhat_incr.shape == (self.num_samples, self.horizon)
        np.testing.assert_raises(AssertionError, np.testing.assert_array_equal, yhat, yhat_incr)

        target_value = dict({"y": self.data_new})
        assert self.model.evaluate(target_value=target_value, metric=['mse'])
        stop_orca_context()
    def test_forecast_tcmf_distributed(self):
        model = TCMFForecaster(y_iters=1,
                               init_FX_epoch=1,
                               max_FX_epoch=1,
                               max_TCN_epoch=1,
                               alt_iters=2)
        horizon = np.random.randint(1, 50)
        # construct data
        id = np.arange(300)
        data = np.random.rand(300, 480)
        input = dict({'id': id, 'y': data})

        from zoo.orca import init_orca_context, stop_orca_context

        init_orca_context(cores=4, spark_log_level="INFO", init_ray_on_spark=True,
                          object_store_memory="1g")
        model.fit(input, num_workers=4)

        with tempfile.TemporaryDirectory() as tempdirname:
            model.save(tempdirname)
            loaded_model = TCMFForecaster.load(tempdirname, distributed=False)
        yhat = model.predict(x=None, horizon=horizon, num_workers=4)
        yhat_loaded = loaded_model.predict(x=None, horizon=horizon, num_workers=4)
        yhat_id = yhat_loaded["id"]
        assert (yhat_id == id).all()
        yhat = yhat["prediction"]
        yhat_loaded = yhat_loaded["prediction"]
        assert yhat.shape == (300, horizon)
        np.testing.assert_equal(yhat, yhat_loaded)
        target_value = np.random.rand(300, horizon)
        target_value = dict({"y": target_value})
        assert model.evaluate(x=None, target_value=target_value, metric=['mse'])
        stop_orca_context()
Ejemplo n.º 5
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--dir', default='/tmp/data', metavar='N',
                        help='the folder store mnist data')
    parser.add_argument('--batch-size', type=int, default=256, metavar='N',
                        help='input batch size for training per executor(default: 256)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing per executor(default: 1000)')
    parser.add_argument('--epochs', type=int, default=2, metavar='N',
                        help='number of epochs to train (default: 2)')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    parser.add_argument('--cluster_mode', type=str, default="local",
                        help='The mode for the Spark cluster. local or yarn.')
    args = parser.parse_args()

    torch.manual_seed(args.seed)

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(args.dir, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(args.dir, train=False,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.test_batch_size, shuffle=False)

    if args.cluster_mode == "local":
        init_orca_context(cores=1, memory="2g")
    elif args.cluster_mode == "yarn":
        init_orca_context(
            cluster_mode="yarn-client", cores=4, num_nodes=2, memory="2g",
            driver_memory="10g", driver_cores=1,
            conf={"spark.rpc.message.maxSize": "1024",
                  "spark.task.maxFailures": "1",
                  "spark.driver.extraJavaOptions": "-Dbigdl.failure.retryTimes=1"})

    model = LeNet()
    model.train()
    criterion = nn.NLLLoss()

    adam = torch.optim.Adam(model.parameters(), args.lr)
    est = Estimator.from_torch(model=model, optimizer=adam, loss=criterion)
    est.fit(data=train_loader, epochs=args.epochs, validation_data=test_loader,
            validation_metrics=[Accuracy()], checkpoint_trigger=EveryEpoch())
    result = est.evaluate(data=test_loader, validation_metrics=[Accuracy()])
    for r in result:
        print(str(r))
    stop_orca_context()
Ejemplo n.º 6
0
def tf2_estimator():
    
    from zoo.orca.learn.tf2.estimator import Estimator
    # import ray
    init_orca_context(cluster_mode="local", cores=4, memory="3g")

    print("running tf2 estimator")
    
    imdb = keras.datasets.imdb
    (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=1000)
    # print(train_data)
    word_index = imdb.get_word_index()
    word_index = {k: (v + 3) for k, v in word_index.items()}
    word_index["<PAD>"] = 0
    word_index["<START>"] = 1
    word_index["<UNK>"] = 2  # unknown
    word_index["<UNUSED>"] = 3

    train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding='post',
                                                            maxlen=256)

    test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding='post',
                                                            maxlen=256)

    model = keras.Sequential()
    model.add(keras.layers.Embedding(1000, 16))
    model.add(keras.layers.GlobalAveragePooling1D())
    model.add(keras.layers.Dense(16, activation=tf.nn.relu))
    model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))

    model.summary()

    model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['acc'])
    
    x_val = train_data[:1000]
    partial_x_train = train_data[1000:]

    y_val = train_labels[:1000]
    partial_y_train = train_labels[1000:]

    train_dataset = tf.data.Dataset.from_tensor_slices((partial_x_train, partial_y_train))
    validation_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    est = Estimator.from_keras(model_creator=model)
    est.fit(data=train_dataset, batch_size=512, epochs=100, validation_data=validation_dataset)
    results = est.evaluate(validation_dataset)
    print(results)
    est.save('work/saved_model')
    est.get_train_summary(tag='Loss')
    est.get_validation_summary(tag='Top1Accuracy')


    stop_orca_context()
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch Tensorboard Example')

    parser.add_argument('--cluster_mode',
                        type=str,
                        default="local",
                        help='The cluster mode, such as local, yarn or k8s.')
    args = parser.parse_args()
    if args.cluster_mode == "local":
        init_orca_context()
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode=args.cluster_mode, cores=4, num_nodes=2)

    writer = SummaryWriter('runs/fashion_mnist_experiment_1')
    # constant for classes
    classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal',
               'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

    # plot some random training images
    dataiter = iter(train_data_creator(config={}))
    images, labels = dataiter.next()

    # create grid of images
    img_grid = torchvision.utils.make_grid(images)

    # show images
    matplotlib_imshow(img_grid, one_channel=True)

    # write to tensorboard
    writer.add_image('four_fashion_mnist_images', img_grid)

    # inspect the model using tensorboard
    writer.add_graph(model_creator(config={}), images)
    writer.close()

    # training loss vs. epochs
    criterion = nn.CrossEntropyLoss()
    orca_estimator = Estimator.from_torch(model=model_creator,
                                          optimizer=optimizer_creator,
                                          loss=criterion,
                                          backend="torch_distributed")
    stats = orca_estimator.fit(train_data_creator, epochs=5, batch_size=4)

    for stat in stats:
        writer.add_scalar("training_loss", stat['train_loss'], stat['epoch'])
    print("Train stats: {}".format(stats))
    val_stats = orca_estimator.evaluate(validation_data_creator)
    print("Validation stats: {}".format(val_stats))
    orca_estimator.shutdown()

    stop_orca_context()
def main(max_epoch):
    sc = init_orca_context(cores=4, memory="2g")

    # get DataSet
    # as_supervised returns tuple (img, label) instead of dict {'image': img, 'label':label}
    mnist_train = tfds.load(name="mnist", split="train", as_supervised=True)
    mnist_test = tfds.load(name="mnist", split="test", as_supervised=True)

    # Normalizes images, unit8 -> float32
    def normalize_img(image, label):
        return tf.cast(image, tf.float32) / 255., label

    mnist_train = mnist_train.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    mnist_test = mnist_test.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(20,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               input_shape=(28, 28, 1),
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Conv2D(50,
                               kernel_size=(5, 5),
                               strides=(1, 1),
                               activation='tanh',
                               padding='valid'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding='valid'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(500, activation='tanh'),
        tf.keras.layers.Dense(10, activation='softmax'),
    ])

    model.compile(optimizer=tf.keras.optimizers.RMSprop(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    est = Estimator.from_keras(keras_model=model)
    est.fit(data=mnist_train,
            batch_size=320,
            epochs=max_epoch,
            validation_data=mnist_test)

    result = est.evaluate(mnist_test)
    print(result)

    est.save_keras_model("/tmp/mnist_keras.h5")
    stop_orca_context()
    def test_tcn_forecaster_distributed(self):
        train_data, val_data, test_data = create_data()

        init_orca_context(cores=4, memory="2g")

        forecaster = Seq2SeqForecaster(past_seq_len=24,
                                       future_seq_len=5,
                                       input_feature_num=1,
                                       output_feature_num=1,
                                       loss="mae",
                                       lr=0.01,
                                       distributed=True)

        forecaster.fit(train_data, epochs=2)
        distributed_pred = forecaster.predict(test_data[0])
        distributed_eval = forecaster.evaluate(val_data)

        model = forecaster.get_model()
        assert isinstance(model, torch.nn.Module)

        forecaster.to_local()
        local_pred = forecaster.predict(test_data[0])
        local_eval = forecaster.evaluate(val_data)

        np.testing.assert_almost_equal(distributed_pred, local_pred, decimal=5)

        try:
            import onnx
            import onnxruntime
            local_pred_onnx = forecaster.predict_with_onnx(test_data[0])
            local_eval_onnx = forecaster.evaluate_with_onnx(val_data)
            np.testing.assert_almost_equal(distributed_pred,
                                           local_pred_onnx,
                                           decimal=5)
        except ImportError:
            pass

        model = forecaster.get_model()
        assert isinstance(model, torch.nn.Module)

        stop_orca_context()
Ejemplo n.º 10
0
def main(max_epoch):
    sc = init_orca_context(cores=4, memory="2g")

    # get DataSet
    mnist_train = tfds.load(name="mnist", split="train")
    mnist_test = tfds.load(name="mnist", split="test")

    # Normalizes images
    def normalize_img(data):
        data['image'] = tf.cast(data["image"], tf.float32) / 255.
        return data

    mnist_train = mnist_train.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    mnist_test = mnist_test.map(
        normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    # tensorflow inputs
    images = tf.placeholder(dtype=tf.float32, shape=(None, 28, 28, 1))
    # tensorflow labels
    labels = tf.placeholder(dtype=tf.int32, shape=(None, ))

    with slim.arg_scope(lenet.lenet_arg_scope()):
        logits, end_points = lenet.lenet(images,
                                         num_classes=10,
                                         is_training=True)

    loss = tf.reduce_mean(
        tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels))

    acc = accuracy(logits, labels)

    # create an estimator
    est = Estimator.from_graph(inputs=images,
                               outputs=logits,
                               labels=labels,
                               loss=loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"acc": acc})
    est.fit(data=mnist_train,
            batch_size=320,
            epochs=max_epoch,
            validation_data=mnist_test)

    result = est.evaluate(mnist_test)
    print(result)

    est.save_tf_checkpoint("/tmp/lenet/model")
    stop_orca_context()
Ejemplo n.º 11
0
def orca_context_fixture(request):
    import os
    from zoo.orca import OrcaContext, init_orca_context, stop_orca_context
    OrcaContext._eager_mode = True
    access_key_id = os.getenv("AWS_ACCESS_KEY_ID")
    secret_access_key = os.getenv("AWS_SECRET_ACCESS_KEY")
    if access_key_id is not None and secret_access_key is not None:
        env = {"AWS_ACCESS_KEY_ID": access_key_id,
               "AWS_SECRET_ACCESS_KEY": secret_access_key}
    else:
        env = None
    sc = init_orca_context(cores=4, spark_log_level="INFO",
                           env=env, object_store_memory="1g")
    yield sc
    stop_orca_context()
    def test_tcn_forecaster_distributed(self):
        train_data, val_data, test_data = create_data()
        from zoo.orca import init_orca_context, stop_orca_context
        init_orca_context(cores=4, memory="2g")

        forecaster = TCNForecaster(past_seq_len=24,
                                   future_seq_len=5,
                                   input_feature_num=1,
                                   output_feature_num=1,
                                   kernel_size=3,
                                   lr=0.01,
                                   distributed=True)

        forecaster.fit(train_data[0], train_data[1], epochs=2)
        distributed_pred = forecaster.predict(test_data[0])
        distributed_eval = forecaster.evaluate(val_data[0], val_data[1])

        forecaster.to_local()
        local_pred = forecaster.predict(test_data[0])
        local_eval = forecaster.evaluate(val_data[0], val_data[1])

        np.testing.assert_almost_equal(distributed_pred, local_pred, decimal=5)

        try:
            import onnx
            import onnxruntime
            local_pred_onnx = forecaster.predict_with_onnx(test_data[0])
            local_eval_onnx = forecaster.evaluate_with_onnx(
                val_data[0], val_data[1])
            np.testing.assert_almost_equal(distributed_pred,
                                           local_pred_onnx,
                                           decimal=5)
        except ImportError:
            pass

        stop_orca_context()
Ejemplo n.º 13
0
def orca_context_fixture():
    sc = init_orca_context(cores=8)

    def to_array_(v):
        return v.toArray().tolist()

    def flatten_(v):
        result = []
        for elem in v:
            result.extend(elem.toArray().tolist())
        return result

    spark = SparkSession(sc)
    spark.udf.register("to_array", to_array_, ArrayType(DoubleType()))
    spark.udf.register("flatten", flatten_, ArrayType(DoubleType()))
    yield
    stop_orca_context()
    def setUp(self):
        """ setup any state tied to the execution of the given method in a
        class.  setup_method is invoked for every test method of a class.
        """
        self.sc = init_orca_context(cores=4)

        def to_array_(v):
            return v.toArray().tolist()

        def flatten_(v):
            result = []
            for elem in v:
                result.extend(elem.toArray().tolist())
            return result

        self.spark = SparkSession(self.sc)
        self.spark.udf.register("to_array", to_array_, ArrayType(DoubleType()))
        self.spark.udf.register("flatten", flatten_, ArrayType(DoubleType()))
Ejemplo n.º 15
0
 def setUp(self) -> None:
     from zoo.orca import init_orca_context
     init_orca_context(cores=8, init_ray_on_spark=True)
Ejemplo n.º 16
0
import torch.optim as optim

from zoo.orca import init_orca_context, stop_orca_context
from zoo.orca.learn.pytorch import Estimator
from zoo.orca.learn.metrics import Accuracy
from zoo.orca.learn.trigger import EveryEpoch

parser = argparse.ArgumentParser(description='PyTorch Cifar10 Example')
parser.add_argument('--cluster_mode',
                    type=str,
                    default="local",
                    help='The cluster mode, such as local, yarn or k8s.')
args = parser.parse_args()

if args.cluster_mode == "local":
    init_orca_context(memory="4g")
elif args.cluster_mode == "yarn":
    init_orca_context(cluster_mode="yarn-client",
                      num_nodes=2,
                      driver_memory="4g",
                      conf={
                          "spark.rpc.message.maxSize":
                          "1024",
                          "spark.task.maxFailures":
                          "1",
                          "spark.driver.extraJavaOptions":
                          "-Dbigdl.failure.retryTimes=1"
                      })

transform = transforms.Compose([
    transforms.ToTensor(),
Ejemplo n.º 17
0
def tf_estimator():
    from zoo.orca.learn.tf.estimator import Estimator
    init_orca_context(cluster_mode="local", cores=4, memory="3g")
    
    os.environ["HDF5_USE_FILE_LOCKING"] = 'FALSE'

    print("running tf estimator")
    
    imdb = keras.datasets.imdb
    (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=1000)
    # print(train_data)
    word_index = imdb.get_word_index()
    word_index = {k: (v + 3) for k, v in word_index.items()}
    word_index["<PAD>"] = 0
    word_index["<START>"] = 1
    word_index["<UNK>"] = 2  # unknown
    word_index["<UNUSED>"] = 3
    
    train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding='post', 
                                                            maxlen=256)

    test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding='post',
                                                            maxlen=256)
    
    model = keras.Sequential()
    model.add(keras.layers.Embedding(1000, 16))
    model.add(keras.layers.GlobalAveragePooling1D())
    model.add(keras.layers.Dense(16, activation=tf.nn.relu))
    model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))

    model.summary()
    
    model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['acc'])

    x_val = train_data[:1000]
    partial_x_train = train_data[1000:]

    y_val = train_labels[:1000]
    partial_y_train = train_labels[1000:]

    train_dataset = tf.data.Dataset.from_tensor_slices((partial_x_train, partial_y_train))
    validation_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
    est = Estimator.from_keras(keras_model=model)
    est.set_constant_gradient_clipping(0.1, 0.2)
    est.fit(data=train_dataset, batch_size=512, epochs=5, validation_data=validation_dataset)
    results = est.evaluate(validation_dataset)
    print(results)
    est.clear_gradient_clipping()
    est.set_l2_norm_gradient_clipping(0.1)
    est.fit(data=train_dataset, batch_size=512, epochs=5, validation_data=validation_dataset)
    results = est.evaluate(validation_dataset)
    print(results)
    est.save('work/saved_model')
    print("save API finished")
    # est.save_tf_checkpoint('work/checkpoint')
    # est.load_tf_checkpoint('work/checkpoint')
    print("checkpoint save and load API finished")
    est.save_keras_model('work/keras_model')
    est.save_keras_weights('work/keras_weights')
    print("keras model and weights save API finished")
    # est.load_keras_model('work/keras_model')
    # est.load_keras_weights('work')
    print("keras model and weights load API finished")
    est.get_train_summary(tag='Loss')
    est.get_validation_summary(tag='Top1Accuracy')
    # Estimator.load(est, model_path='work/') # Has not been implemented
    # resutls = est.predict(validation_dataset)
    # print(results)

    stop_orca_context()
Ejemplo n.º 18
0
    help="The number of cores you want to use for prediction on local."
    "You should only parse this arg if you set predict_local to true.")
parser.add_argument(
    "--num_predict_workers",
    type=int,
    default=4,
    help="The number of workers you want to use for prediction on local. "
    "You should only parse this arg if you set predict_local to true.")

if __name__ == "__main__":

    args = parser.parse_args()
    num_nodes = 1 if args.cluster_mode == "local" else args.num_workers
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      num_nodes=num_nodes,
                      memory=args.memory,
                      init_ray_on_spark=True)

    if not args.use_dummy_data:
        assert args.data_dir is not None, "--data_dir must be provided if not using dummy data"

    logger.info('Initalizing TCMFForecaster.')
    model = TCMFForecaster(
        vbsize=128,
        hbsize=256,
        num_channels_X=[32, 32, 32, 32, 32, 1],
        num_channels_Y=[32, 32, 32, 32, 32, 1],
        kernel_size=7,
        dropout=0.2,
        rank=64,
Ejemplo n.º 19
0
    # create an estimator
    est = Estimator.from_graph(inputs=images,
                               outputs=logits,
                               labels=labels,
                               loss=loss,
                               optimizer=tf.train.AdamOptimizer(),
                               metrics={"acc": acc})
    est.fit(data=train_dataset,
            batch_size=320,
            epochs=max_epoch,
            validation_data=val_dataset)

    result = est.evaluate(val_dataset)
    print(result)

    est.save_tf_checkpoint("/tmp/lenet/model")


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cluster_mode', type=str, default="local",
                        help='The mode for the Spark cluster. local or yarn.')

    args = parser.parse_args()
    if args.cluster_mode == "local":
        init_orca_context(cluster_mode="local", cores=4)
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="6g")
    main(5)
    stop_orca_context()
Ejemplo n.º 20
0
    parser.add_argument('-l',
                        '--learning_rate',
                        type=float,
                        default=0.02,
                        help='Learning rate for the LeNet model.')
    parser.add_argument(
        '--log_interval',
        type=int,
        default=20,
        help='The number of batches to wait before logging throughput and '
        'metrics information during the training process.')
    opt = parser.parse_args()

    num_nodes = 1 if opt.cluster_mode == "local" else opt.num_workers
    init_orca_context(cluster_mode=opt.cluster_mode,
                      cores=opt.cores,
                      num_nodes=num_nodes)

    config = create_config(
        optimizer="sgd",
        optimizer_params={'learning_rate': opt.learning_rate},
        log_interval=opt.log_interval,
        seed=42)
    estimator = Estimator.from_mxnet(config=config,
                                     model_creator=get_model,
                                     loss_creator=get_loss,
                                     validation_metrics_creator=get_metrics,
                                     num_workers=opt.num_workers,
                                     num_servers=opt.num_servers,
                                     eval_metrics_creator=get_metrics)
    estimator.fit(data=get_train_data_iter,
Ejemplo n.º 21
0
from tensorflow import keras

import argparse
import numpy as np
from tensorflow.python.keras.datasets import imdb
from tensorflow.python.keras.preprocessing import sequence
from zoo.orca import init_orca_context, stop_orca_context
# from zoo.orca.learn.tf2.estimator import Estimator

parser = argparse.ArgumentParser()
parser.add_argument('--cluster_mode', type=str, default="local",
                    help='The mode for the Spark cluster. local or yarn.')
args = parser.parse_args()
cluster_mode = args.cluster_mode
if cluster_mode == "local":
    init_orca_context(cluster_mode="local", cores=4, memory="3g")
elif cluster_mode == "yarn":
    init_orca_context(cluster_mode="yarn-client", num_nodes=2, cores=2, driver_memory="3g", \
                      conf={"spark.executor.extraJavaOptions": "-Xss512m",
                            "spark.driver.extraJavaOptions": "-Xss512m"})

max_features = 20000
max_len = 200

print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
Ejemplo n.º 22
0
def orca_context_fixture():
    from zoo.orca import init_orca_context, stop_orca_context
    sc = init_orca_context(cores=8)
    yield
    stop_orca_context()
Ejemplo n.º 23
0
parser.add_argument("--use_dummy_data",
                    action='store_true',
                    default=False,
                    help="Whether to use dummy data")
parser.add_argument("--benchmark", action='store_true', default=False)
parser.add_argument("--enable_numa_binding",
                    action='store_true',
                    default=False)

if __name__ == "__main__":

    args = parser.parse_args()
    num_nodes = 1 if args.cluster_mode == "local" else args.worker_num
    init_orca_context(cluster_mode=args.cluster_mode,
                      cores=args.cores,
                      num_nodes=num_nodes,
                      memory=args.memory,
                      init_ray_on_spark=True,
                      enable_numa_binding=args.enable_numa_binding)

    if not args.use_dummy_data:
        assert args.data_dir is not None, "--data_dir must be provided if not using dummy data"

    if not os.path.exists(args.log_dir):
        os.mkdir(args.log_dir)

    from zoo.orca.learn.tf2 import Estimator
    import tensorflow as tf

    global_batch_size = args.worker_num * args.batch_size_per_worker

    base_batch_size = 256
Ejemplo n.º 24
0
raw_df = pd.read_csv("data/data.csv")

df = pd.DataFrame(pd.to_datetime(raw_df.StartTime))
df['AvgRate'] = \
    raw_df.AvgRate.apply(lambda x: float(x[:-4]) if x.endswith("Mbps") else float(x[:-4]) * 1000)
df["total"] = raw_df["total"]
df.set_index("StartTime", inplace=True)
full_idx = pd.date_range(start=df.index.min(), end=df.index.max(), freq='2H')
df = df.reindex(full_idx)
drop_dts, drop_len = get_drop_dates_and_len(df)
df = rm_missing_weeks(drop_dts, drop_len, df)
df.ffill(inplace=True)
df.index.name = "datetime"
df = df.reset_index()

init_orca_context(cores=4, memory="4g", init_ray_on_spark=True)

from zoo.zouwu.autots.forecast import AutoTSTrainer
from zoo.automl.config.recipe import *

trainer = AutoTSTrainer(dt_col="datetime",
                        target_col=["AvgRate", "total"],
                        horizon=1,
                        extra_features_col=None)

look_back = (36, 84)
from zoo.automl.common.util import train_val_test_split
train_df, val_df, test_df = train_val_test_split(df,
                                                 val_ratio=0.1,
                                                 test_ratio=0.1,
                                                 look_back=look_back[0])
Ejemplo n.º 25
0
    def compute_gradients(self, weights):
        self.net.variables.set_flat(weights)
        xs, ys = self.mnist.train.next_batch(self.batch_size)
        return self.net.compute_gradients(xs, ys)


if __name__ == "__main__":
    args = parser.parse_args()
    cluster_mode = args.cluster_mode
    if cluster_mode == "yarn":
        sc = init_orca_context(
            cluster_mode=cluster_mode,
            cores=args.executor_cores,
            memory=args.executor_memory,
            init_ray_on_spark=True,
            num_executors=args.num_workers,
            driver_memory=args.driver_memory,
            driver_cores=args.driver_cores,
            extra_executor_memory_for_ray=args.extra_executor_memory_for_ray,
            object_store_memory=args.object_store_memory,
            additional_archive="MNIST_data.zip#MNIST_data")
        ray_ctx = OrcaContext.get_ray_context()
    elif cluster_mode == "local":
        sc = init_orca_context(cores=args.driver_cores)
        ray_ctx = OrcaContext.get_ray_context()
    else:
        print(
            "init_orca_context failed. cluster_mode should be either 'local' or 'yarn' but got "
            + cluster_mode)

    # Create a parameter server.
 def setUp(self):
     """ setup any state tied to the execution of the given method in a
     class.  setup_method is invoked for every test method of a class.
     """
     self.sc = init_orca_context(cores=4)
Ejemplo n.º 27
0
                        help="The number of workers to run on each node")
    parser.add_argument('--k8s_master', type=str, default="",
                        help="The k8s master. "
                             "It should be k8s://https://<k8s-apiserver-host>: "
                             "<k8s-apiserver-port>.")
    parser.add_argument("--container_image", type=str, default="",
                        help="The runtime k8s image. "
                             "You can change it with your k8s image.")
    parser.add_argument('--k8s_driver_host', type=str, default="",
                        help="The k8s driver localhost.")
    parser.add_argument('--k8s_driver_port', type=str, default="",
                        help="The k8s driver port.")

    args = parser.parse_args()
    if args.cluster_mode == "local":
        init_orca_context(cluster_mode="local", cores=args.cores,
                          num_nodes=args.num_nodes, memory=args.memory)
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode="yarn-client", cores=args.cores,
                          num_nodes=args.num_nodes, memory=args.memory)
    elif args.cluster_mode == "k8s":
        if not args.k8s_master or not args.container_image \
                or not args.k8s_driver_host or not args.k8s_driver_port:
            parser.print_help()
            parser.error('k8s_master, container_image,'
                         'k8s_driver_host/port are required not to be empty')
        init_orca_context(cluster_mode="k8s", master=args.k8s_master,
                          container_image=args.container_image,
                          num_nodes=args.num_nodes, cores=args.cores,
                          conf={"spark.driver.host": args.k8s_driver_host,
                                "spark.driver.port": args.k8s_driver_port})
    train_example(workers_per_node=args.workers_per_node)
Ejemplo n.º 28
0
                    steps_per_epoch=60000 // batch_size,
                    validation_data_creator=val_data_creator,
                    validation_steps=10000 // batch_size)
    print(stats)
    est.save("/tmp/mnist_keras.ckpt")
    est.restore("/tmp/mnist_keras.ckpt")
    stats = est.evaluate(val_data_creator, steps=10000 // batch_size)
    print(stats)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--cluster_mode',
                        type=str,
                        default="local",
                        help='The mode for the Spark cluster. local or yarn.')
    parser.add_argument('--max_epoch', type=int, default=5, help='max epoch')

    args = parser.parse_args()
    if args.cluster_mode == "local":
        init_orca_context(cluster_mode="local",
                          cores=4,
                          init_ray_on_spark=True)
    elif args.cluster_mode == "yarn":
        init_orca_context(cluster_mode="yarn-client",
                          num_nodes=2,
                          cores=2,
                          init_ray_on_spark=True,
                          driver_memory="6g")
    main(args.max_epoch)
Ejemplo n.º 29
0
def get_data():
    def get_linear_data(a, b, size):
        x = np.arange(0, 10, 10 / size, dtype=np.float32)
        y = a*x + b
        return x, y
    train_x, train_y = get_linear_data(2, 5, 1000)
    val_x, val_y = get_linear_data(2, 5, 400)
    data = (train_x, train_y)
    validation_data = (val_x, val_y)
    return data, validation_data


if __name__ == "__main__":
    # 1. the way to enable auto tuning model from creators.
    init_orca_context(init_ray_on_spark=True)
    modelBuilder = PytorchModelBuilder(model_creator=model_creator,
                                       optimizer_creator=optimizer_creator,
                                       loss_creator=loss_creator)

    searcher = SearchEngineFactory.create_engine(backend="ray",
                                                 logs_dir="~/zoo_automl_logs",
                                                 resources_per_trial={"cpu": 2},
                                                 name="demo")

    # pass input data, modelbuilder and recipe into searcher.compile. Note that if user doesn't pass
    # feature transformer, the default identity feature transformer will be used.
    data, validation_data = get_data()
    searcher.compile(data=data,
                     validation_data=validation_data,
                     model_builder=modelBuilder,
    import pickle as pkl

from optparse import OptionParser
from zoo.orca import init_orca_context, stop_orca_context, OrcaContext
from pyspark.sql.functions import udf, col
from zoo.friesian.feature import FeatureTable, StringIndex
from pyspark.sql.types import StringType, IntegerType, ArrayType, FloatType

if __name__ == "__main__":
    parser = OptionParser()
    parser.add_option("--meta", dest="meta_file")
    parser.add_option("--review", dest="review_file")
    parser.add_option("--output", dest="output")
    (options, args) = parser.parse_args(sys.argv)
    begin = time.time()
    sc = init_orca_context("local")
    spark = OrcaContext.get_spark_session()

    # read review datavi run.sh
    transaction_df = spark.read.json(options.review_file).select(
        ['reviewerID', 'asin', 'unixReviewTime']) \
        .withColumnRenamed('reviewerID', 'user') \
        .withColumnRenamed('asin', 'item') \
        .withColumnRenamed('unixReviewTime', 'time')\
        .dropna("any").persist(storageLevel=StorageLevel.DISK_ONLY)
    transaction_tbl = FeatureTable(transaction_df)
    print("review_tbl, ", transaction_tbl.size())

    # read meta data
    def get_category(x):
        cat = x[0][-1] if x[0][-1] is not None else "default"