Esempio n. 1
0
 def docker_config_cache_from(self):
     ret_val = tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.ipynb"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
         job_labels={
             "job": "docker_config_cache_from",
             "team": "on_notebook_tests",
         },
     )
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.ipynb"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET,
                                        image=ret_val["docker_image"],
                                        cache_from=ret_val["docker_image"]),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
         job_labels={
             "job": "docker_config_cache_from",
             "team": "on_notebook_tests",
         },
     )
Esempio n. 2
0
 def auto_one_device_strategy_cloud_build(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET),
     )
Esempio n. 3
0
 def docker_config_cloud_build(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(image_build_bucket=_TEST_BUCKET),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
     )
Esempio n. 4
0
 def docker_config_parent_img(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(
             parent_image="gcr.io/deeplearning-platform-release"
             "/tf2-gpu.2-2:latest"),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
     )
Esempio n. 5
0
 def auto_dist_strat_mwms_with_parent_img(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         distribution_strategy="auto",
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(
             parent_image="gcr.io/deeplearning-platform-release"
             "/tf2-gpu.2-2:latest"),
     )
Esempio n. 6
0
 def auto_tpu_strategy(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements_tpu_strategy.txt"),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"],
         worker_count=1,
         worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"],
         docker_config=tfc.DockerConfig(
             parent_image="tensorflow/tensorflow:2.1.0"),
     )
Esempio n. 7
0
 def auto_one_device_strategy_with_image(self):
     ret_val = tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
     )
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(image=ret_val["docker_image"]),
     )
Esempio n. 8
0
 def docker_config_parent_img(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.ipynb"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(
             parent_image="gcr.io/deeplearning-platform-release"
             "/tf2-gpu.2-2:latest"),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
         job_labels={
             "job": "docker_config_parent_img",
             "team": "on_notebook_tests",
         },
     )
Esempio n. 9
0
 def auto_tpu_strategy(self):
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.ipynb"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements_tpu_strategy.txt"),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["CPU"],
         worker_count=1,
         worker_config=tfc.COMMON_MACHINE_CONFIGS["TPU"],
         docker_config=tfc.DockerConfig(
             parent_image="tensorflow/tensorflow:2.1.0"),
         job_labels={
             "job": "auto_tpu_strategy",
             "team": "on_notebook_tests",
         },
     )
Esempio n. 10
0
 def docker_config_image(self):
     ret_val = tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
     )
     return tfc.run(
         entry_point=os.path.join(self.test_data_path,
                                  "mnist_example_using_fit.py"),
         requirements_txt=os.path.join(self.test_data_path,
                                       "requirements.txt"),
         docker_config=tfc.DockerConfig(image=ret_val["docker_image"]),
         chief_config=tfc.COMMON_MACHINE_CONFIGS["P100_1X"],
     )
    callbacks = None

model.fit(train_data,
          epochs=epochs,
          callbacks=callbacks,
          validation_data=test_data,
          verbose=2)

# Calling `tfc.run` with `auto` distribution strategy with multi-gpu
# chief_config. This will automate TensorFlow Mirrored distribution
# strategy when training this model.
# Tip: Move this call to the top of this file if you do not want to
# train your model locally first.
tfc.run(
    requirements_txt="tests/testdata/requirements.txt",
    chief_config=tfc.MachineConfig(
        cpu_cores=8,
        memory=30,
        accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4,
        accelerator_count=2,
    ),
    docker_config=tfc.DockerConfig(image_build_bucket=GCP_BUCKET),
)

# Save, load and evaluate the model
if tfc.remote():
    SAVE_PATH = os.path.join("gs://", GCP_BUCKET, MODEL_PATH)
    model.save(SAVE_PATH)
    model = tf.keras.models.load_model(SAVE_PATH)
model.evaluate(test_data)
Esempio n. 12
0
import os

import autokeras as ak
import tensorflow_cloud as tfc
from tensorflow.keras.datasets import mnist

parser = argparse.ArgumentParser(description="Model save path arguments.")
parser.add_argument("--path",
                    required=True,
                    type=str,
                    help="Keras model save path")
args = parser.parse_args()

tfc.run(
    chief_config=tfc.COMMON_MACHINE_CONFIGS["V100_1X"],
    docker_config=tfc.DockerConfig(base_image="haifengjin/autokeras:1.0.3"),
)

# Prepare the dataset.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print(x_train.shape)  # (60000, 28, 28)
print(y_train.shape)  # (60000,)
print(y_train[:3])  # array([7, 2, 1], dtype=uint8)

# Initialize the ImageClassifier.
clf = ak.ImageClassifier(max_trials=2)
# Search for the best model.
clf.fit(x_train, y_train, epochs=10)
# Evaluate on the testing data.
print("Accuracy: {accuracy}".format(accuracy=clf.evaluate(x_test, y_test)[1]))
Esempio n. 13
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse

import tensorflow_cloud as tfc

parser = argparse.ArgumentParser(
    description="Model cloud bucket name argument.")
parser.add_argument("--bucket_name",
                    required=True,
                    type=str,
                    help="Cloud bucket name")
args = parser.parse_args()

# Automated MirroredStrategy: chief config with multiple GPUs
tfc.run(
    entry_point="tests/testdata/mnist_example_using_fit_no_reqs.py",
    distribution_strategy="auto",
    chief_config=tfc.MachineConfig(
        cpu_cores=8,
        memory=30,
        accelerator_type=tfc.AcceleratorType.NVIDIA_TESLA_T4,
        accelerator_count=2,
    ),
    worker_count=0,
    stream_logs=True,
    docker_config=tfc.DockerConfig(image_build_bucket=args.bucket_name),
)
Esempio n. 14
0
# Copyright 2020 Google LLC. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import tensorflow_cloud as tfc

gcp_bucket = "your-gcp-bucket"

tfc.run(
    entry_point="train_model.py",
    requirements_txt="requirements.txt",
    docker_config=tfc.DockerConfig(image_build_bucket=gcp_bucket),
    stream_logs=True,
)
# If you are using a custom image you can install modules via requirements
# txt file.
with open("requirements.txt", "w") as f:
    f.write("tensorflow-cloud\n")

# Optional: Some recommended base images. If you provide none the system
# will choose one for you.
TF_GPU_IMAGE = "gcr.io/deeplearning-platform-release/tf2-cpu.2-5"
TF_CPU_IMAGE = "gcr.io/deeplearning-platform-release/tf2-gpu.2-5"

# Submit a single node training job using GPU.
tfc.run(
    distribution_strategy="auto",
    requirements_txt="requirements.txt",
    docker_config=tfc.DockerConfig(parent_image=TF_GPU_IMAGE,
                                   image_build_bucket=GCS_BUCKET),
    chief_config=tfc.COMMON_MACHINE_CONFIGS["K80_1X"],
    job_labels={"job": JOB_NAME},
)
"""
## Training Results

### Reconnect your Colab instance

Most remote training jobs are long running. If you are using Colab, it may time
out before the training results are available.

In that case, **rerun the following sections in order** to reconnect and
configure your Colab instance to access the training results.

1.   Import required modules