perturbation_interval=1,
    hyperparam_mutations={
        # distribution for resampling
        "lr": lambda: np.random.uniform(0.001, 1),
        # allow perturbations within this set of categorical values
        "momentum": [0.8, 0.9, 0.99],
    })

reporter = CLIReporter()
reporter.add_metric_column("val_loss", "loss")
reporter.add_metric_column("val_accuracy", "acc")

analysis = tune.run(
    NoFaultToleranceTrainable,
    num_samples=4,
    config={
        "lr": tune.choice([0.001, 0.01, 0.1]),
        "momentum": 0.8,
        "head_location": None,
        "worker_locations": None
    },
    max_failures=-1,  # used for fault tolerance
    checkpoint_freq=2,  # used for fault tolerance
    progress_reporter=reporter,
    scheduler=pbt_scheduler,
    callbacks=[FailureInjectorCallback()],
    queue_trials=True,
    stop={"training_iteration": 1} if args.smoke_test else None)

print(analysis.get_best_config(metric="val_loss", mode="min"))
        # distribution for resampling
        "lr": lambda: np.random.uniform(0.001, 1),
        # allow perturbations within this set of categorical values
        "momentum": [0.8, 0.9, 0.99],
    })

reporter = CLIReporter()
reporter.add_metric_column("val_loss", "loss")
reporter.add_metric_column("val_accuracy", "acc")

analysis = tune.run(
    TorchTrainable,
    num_samples=4,
    config={
        "lr": tune.choice([0.001, 0.01, 0.1]),
        "momentum": 0.8,
        "head_location": None,
        "worker_locations": None
    },
    max_failures=-1,  # used for fault tolerance
    checkpoint_freq=2,  # used for fault tolerance
    progress_reporter=reporter,
    scheduler=pbt_scheduler,
    callbacks=[
        FailureInjectorCallback(time_between_checks=90),
        ProgressCallback()
    ],
    stop={"training_iteration": 1} if args.smoke_test else None)

print(analysis.get_best_config(metric="val_loss", mode="min"))
Exemple #3
0
    hyperparam_mutations={
        # distribution for resampling
        "lr": lambda: np.random.uniform(0.001, 1),
        # allow perturbations within this set of categorical values
        "momentum": [0.8, 0.9, 0.99],
    },
)

reporter = CLIReporter()
reporter.add_metric_column("val_loss", "loss")
reporter.add_metric_column("val_accuracy", "acc")

analysis = tune.run(
    TorchTrainable,
    num_samples=4,
    config={
        "lr": tune.choice([0.001, 0.01, 0.1]),
        "momentum": 0.8,
        "head_location": None,
        "worker_locations": None,
    },
    max_failures=-1,  # used for fault tolerance
    checkpoint_freq=2,  # used for fault tolerance
    progress_reporter=reporter,
    scheduler=pbt_scheduler,
    callbacks=[FailureInjectorCallback(time_between_checks=90), ProgressCallback()],
    stop={"training_iteration": 1} if args.smoke_test else None,
)

print(analysis.get_best_config(metric="val_loss", mode="min"))