예제 #1
0
def submit_synthetic_local(c, epochs=1):
    """Submit TensorFlow training job using synthetic imagenet data for local execution
    
    Args:
        epochs (int, optional): Number of epochs to run training for. Defaults to 1.
    """
    from aml_compute import TFExperimentCLI

    exp = TFExperimentCLI("synthetic_images_local")
    run = exp.submit_local(
        os.path.join(_BASE_PATH, "src"),
        "resnet_main.py",
        {"--epochs": epochs},
        dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"),
        wait_for_completion=True,
    )
    print(run)
def submit_local(c):
    """This command isn't implemented please modify to use.

    The call below will work for submitting jobs to execute locally on a GPU.
    """
    raise NotImplementedError(
        "You need to modify this call before being able to use it")
    from aml_compute import TFExperimentCLI
    exp = TFExperimentCLI("<YOUR-EXPERIMENT-NAME>")
    run = exp.submit_local(
        os.path.join(_BASE_PATH, "src"),
        "<YOUR-TRAINING-SCRIPT>",
        {"YOUR": "ARGS"},
        dependencies_file="TensorFlow/environment_gpu.yml",
        wait_for_completion=True,
    )
    print(run)
예제 #3
0
def submit_benchmark_local(c):
    """Submit PyTorch training job using synthetic data for local execution
    
    """
    from aml_compute import TFExperimentCLI

    exp = TFExperimentCLI("synthetic_images_local")
    run = exp.submit_local(
        os.path.join(_BASE_PATH, "src"),
        "pytorch_synthetic_benchmark.py",
        {
            "--model": "resnet50",
            "--batch-size": 64
        },
        dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"),
        wait_for_completion=True,
    )
    print(run)
def submit_tf_benchmark_local(c):
    """Submits TensorFlow benchmark job using synthetic data for local execution

    Note:
        Runs ResNet 50 model with batch size of 256 and mixed precision
    """
    from aml_compute import TFExperimentCLI

    exp = TFExperimentCLI("tf_benchmark")
    run = exp.submit_local(
        os.path.join(_BASE_PATH, "src"),
        "tf_cnn_benchmarks.py",
        {
            "--model": "resnet50",
            "--batch_size": 256,
            "--variable_update": "horovod",
            "--use_fp16": "",
        },
        dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"),
        wait_for_completion=True,
    )
    print(run)
예제 #5
0
def submit_images_local(c, epochs=1):
    """Submit TensorFlow training job using real imagenet data for local execution
    
    Args:
        epochs (int, optional): Number of epochs to run training for. Defaults to 1.
    """
    from aml_compute import TFExperimentCLI

    exp = TFExperimentCLI("real_images_local")
    run = exp.submit_local(
        os.path.join(_BASE_PATH, "src"),
        "resnet_main.py",
        {
            "--training_data_path": "/data/train",
            "--validation_data_path": "/data/validation",
            "--epochs": epochs,
            "--data_type": "images",
            "--data-format": "channels_first",
        },
        dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"),
        docker_args=["-v", f"{env_values['DATA']}:/data"],
        wait_for_completion=True,
    )
    print(run)