def submit_synthetic_local(c, epochs=1): """Submit TensorFlow training job using synthetic imagenet data for local execution Args: epochs (int, optional): Number of epochs to run training for. Defaults to 1. """ from aml_compute import TFExperimentCLI exp = TFExperimentCLI("synthetic_images_local") run = exp.submit_local( os.path.join(_BASE_PATH, "src"), "resnet_main.py", {"--epochs": epochs}, dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"), wait_for_completion=True, ) print(run)
def submit_local(c): """This command isn't implemented please modify to use. The call below will work for submitting jobs to execute locally on a GPU. """ raise NotImplementedError( "You need to modify this call before being able to use it") from aml_compute import TFExperimentCLI exp = TFExperimentCLI("<YOUR-EXPERIMENT-NAME>") run = exp.submit_local( os.path.join(_BASE_PATH, "src"), "<YOUR-TRAINING-SCRIPT>", {"YOUR": "ARGS"}, dependencies_file="TensorFlow/environment_gpu.yml", wait_for_completion=True, ) print(run)
def submit_benchmark_local(c): """Submit PyTorch training job using synthetic data for local execution """ from aml_compute import TFExperimentCLI exp = TFExperimentCLI("synthetic_images_local") run = exp.submit_local( os.path.join(_BASE_PATH, "src"), "pytorch_synthetic_benchmark.py", { "--model": "resnet50", "--batch-size": 64 }, dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"), wait_for_completion=True, ) print(run)
def submit_tf_benchmark_local(c): """Submits TensorFlow benchmark job using synthetic data for local execution Note: Runs ResNet 50 model with batch size of 256 and mixed precision """ from aml_compute import TFExperimentCLI exp = TFExperimentCLI("tf_benchmark") run = exp.submit_local( os.path.join(_BASE_PATH, "src"), "tf_cnn_benchmarks.py", { "--model": "resnet50", "--batch_size": 256, "--variable_update": "horovod", "--use_fp16": "", }, dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"), wait_for_completion=True, ) print(run)
def submit_images_local(c, epochs=1): """Submit TensorFlow training job using real imagenet data for local execution Args: epochs (int, optional): Number of epochs to run training for. Defaults to 1. """ from aml_compute import TFExperimentCLI exp = TFExperimentCLI("real_images_local") run = exp.submit_local( os.path.join(_BASE_PATH, "src"), "resnet_main.py", { "--training_data_path": "/data/train", "--validation_data_path": "/data/validation", "--epochs": epochs, "--data_type": "images", "--data-format": "channels_first", }, dependencies_file=os.path.join(_BASE_PATH, "environment_gpu.yml"), docker_args=["-v", f"{env_values['DATA']}:/data"], wait_for_completion=True, ) print(run)