Beispiel #1
0
def main(args):
    # MultiWorkerMirroredStrategy creates copies of all variables in the model's
    # layers on each device across all workers
    # if your GPUs don't support NCCL, replace "communication" with another
    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
        communication=tf.distribute.experimental.CollectiveCommunication.NCCL)

    BATCH_SIZE_PER_REPLICA = 64
    BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

    with strategy.scope():
        ds_train = make_datasets_unbatched().batch(BATCH_SIZE).repeat()
        options = tf.data.Options()
        options.experimental_distribute.auto_shard_policy = \
            tf.data.experimental.AutoShardPolicy.DATA
        ds_train = ds_train.with_options(options)
        # Model building/compiling need to be within `strategy.scope()`.
        multi_worker_model = get_model(args)

    # Function for decaying the learning rate.
    # You can define any decay function you need.
    # Callback for printing the LR at the end of each epoch.
    class PrintLR(tf.keras.callbacks.Callback):

        def on_epoch_end(self, epoch, logs=None):
            print('\nLearning rate for epoch {} is {}'.format(
                epoch + 1, multi_worker_model.optimizer.lr.numpy()))
    callbacks = [
        PrintLR(),
        tf.keras.callbacks.LearningRateScheduler(decay),
    ]

    # Polyaxon
    if TASK_INDEX == 0:
        plx_callback = PolyaxonKerasCallback()
        plx_model_callback = PolyaxonKerasModelCheckpoint(save_weights_only=True)
        log_dir = tracking.get_tensorboard_path()
        callbacks = [
            tf.keras.callbacks.TensorBoard(log_dir=log_dir),
            plx_model_callback,
            plx_callback,
        ]

    # Keras' `model.fit()` trains the model with specified number of epochs and
    # number of steps per epoch. Note that the numbers here are for demonstration
    # purposes only and may not sufficiently produce a model with good quality.
    multi_worker_model.fit(ds_train,
                           epochs=args.epochs,
                           steps_per_epoch=70,
                           callbacks=callbacks)

    multi_worker_model.save("/tmp/model")

    if TASK_INDEX == 0:
        tracking.log_model(path="/tmp/model", framework="tensorflow")
Beispiel #2
0
    X, y = datasets.load_boston(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.20,
                                                        random_state=1012)

    # Polyaxon
    tracking.log_data_ref(content=X_train, name='x_train')
    tracking.log_data_ref(content=y_train, name='y_train')
    tracking.log_data_ref(content=X_test, name='x_test')
    tracking.log_data_ref(content=y_test, name='y_test')

    rfr = RandomForestRegressor(
        n_estimators=args.n_estimators,
        max_depth=args.max_depth,
        min_samples_split=args.min_samples_split,
    )
    rfr.fit(X_train, y_train)

    # Polyaxon
    log_regressor(rfr, X_test, y_test)

    # Logging the model as joblib
    with tempfile.TemporaryDirectory() as d:
        model_path = os.path.join(d, "model.joblib")
        joblib.dump(rfr, model_path)
        tracking.log_model(model_path,
                           name="model",
                           framework="scikit-learn",
                           versioned=False)
Beispiel #3
0
        '--random_state',
        type=int,
        default=33,
    )
    args = parser.parse_args()

    # Polyaxon
    tracking.init()

    # Train and eval the model with given parameters.
    # Polyaxon
    model_path = "model.joblib"
    metrics = train_and_eval(
        model_path=model_path,
        n_neighbors=args.n_neighbors,
        leaf_size=args.leaf_size,
        metric=args.metric,
        p=args.p,
        weights=args.weights,
        test_size=args.test_size,
        random_state=args.random_state,
    )

    # Logging metrics to Polyaxon
    print("Testing metrics: {}", metrics)
    # Polyaxon
    tracking.log_metrics(**metrics)

    # Logging the model
    tracking.log_model(model_path, name="iris-model", framework="scikit-learn")