Python is_tf_v1の例、tensorflow_cloud.experimental.cloud_fit.utils.is_tf_v1 Pythonの例

コード例 #1

0

ファイルを表示

ファイル: client_test.py プロジェクト: debtaru/cloud

    def test_job_id(self, mock_serialize_assets, mock_submit_job):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            with self.assertRaises(RuntimeError):
                client.cloud_fit(
                    self._model,
                    x=self._dataset,
                    validation_data=self._dataset,
                    remote_dir=self._remote_dir,
                    job_spec=self._job_spec,
                    batch_size=1,
                    epochs=2,
                    verbose=3,
                )
            return

        test_job_id = "test_job_id"
        client.cloud_fit(
            self._model,
            x=self._dataset,
            validation_data=self._dataset,
            remote_dir=self._remote_dir,
            job_spec=self._job_spec,
            job_id=test_job_id,
            batch_size=1,
            epochs=2,
            verbose=3,
        )

        kargs, _ = mock_submit_job.call_args
        body, _ = kargs
        self.assertDictContainsSubset({
            "job_id": test_job_id,
        }, body)

コード例 #2

0

ファイルを表示

ファイル: client_test.py プロジェクト: debtaru/cloud

    def test_distribution_strategy(self, mock_serialize_assets,
                                   mock_submit_job):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            with self.assertRaises(RuntimeError):
                client.cloud_fit(self._model,
                                 x=self._dataset,
                                 remote_dir=self._remote_dir)
            return

        client.cloud_fit(self._model,
                         x=self._dataset,
                         remote_dir=self._remote_dir)

        kargs, _ = mock_submit_job.call_args
        body, _ = kargs
        self.assertDictContainsSubset(
            {
                "args": [
                    "--remote_dir",
                    self._remote_dir,
                    "--distribution_strategy",
                    MULTI_WORKER_MIRRORED_STRATEGY_NAME,
                ],
            },
            body["trainingInput"],
        )

        client.cloud_fit(
            self._model,
            x=self._dataset,
            remote_dir=self._remote_dir,
            distribution_strategy=MIRRORED_STRATEGY_NAME,
            job_spec=self._job_spec,
        )

        kargs, _ = mock_submit_job.call_args
        body, _ = kargs
        self.assertDictContainsSubset(
            {
                "args": [
                    "--remote_dir",
                    self._remote_dir,
                    "--distribution_strategy",
                    MIRRORED_STRATEGY_NAME,
                ],
            },
            body["trainingInput"],
        )

        with self.assertRaises(ValueError):
            client.cloud_fit(
                self._model,
                x=self._dataset,
                remote_dir=self._remote_dir,
                distribution_strategy="not_implemented_strategy",
                job_spec=self._job_spec,
            )

コード例 #3

0

ファイルを表示

ファイル: remote_test.py プロジェクト: xiaoral2/cloud

    def test_run(self):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            return

        remote.run(self._remote_dir, MIRRORED_STRATEGY_NAME)
        self.assertGreaterEqual(len(tf.io.gfile.listdir(self._output_dir)), 1)
        self.assertGreaterEqual(len(tf.io.gfile.listdir(self._logs_dir)), 1)

        model = tf.keras.models.load_model(self._output_dir)

        # Test saved model load and works properly
        self.assertGreater(
            model.evaluate(self._x, self._y)[0],
            np.array([0.0], dtype=np.float32))

コード例 #4

0

ファイルを表示

ファイル: remote_test.py プロジェクト: xiaoral2/cloud

    def test_custom_callback(self):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            return

        # Setting up custom callback with mock calls
        _MockCallable.reset()

        self._fit_kwargs["callbacks"] = [CustomCallbackExample()]
        client._serialize_assets(self._remote_dir, self._model,
                                 **self._fit_kwargs)

        # Verify callback function has not been called yet.
        _MockCallable.mock_callable.assert_not_called()

        remote.run(self._remote_dir, MIRRORED_STRATEGY_NAME)
        # Verifying callback functions triggered properly
        _MockCallable.mock_callable.assert_called_once_with()

コード例 #5

0

ファイルを表示

ファイル: client_test.py プロジェクト: debtaru/cloud

    def test_custom_job_spec(self, mock_submit_job):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            with self.assertRaises(RuntimeError):
                client.cloud_fit(
                    self._model,
                    x=self._dataset,
                    validation_data=self._dataset,
                    remote_dir=self._remote_dir,
                    job_spec=self._job_spec,
                    batch_size=1,
                    epochs=2,
                    verbose=3,
                )
            return

        client.cloud_fit(
            self._model,
            x=self._dataset,
            validation_data=self._dataset,
            remote_dir=self._remote_dir,
            job_spec=self._job_spec,
            batch_size=1,
            epochs=2,
            verbose=3,
        )

        kargs, _ = mock_submit_job.call_args
        body, _ = kargs
        self.assertDictContainsSubset(
            {
                "masterConfig": {
                    "imageUri": self._image_uri,
                },
                "args": [
                    "--remote_dir",
                    self._remote_dir,
                    "--distribution_strategy",
                    MULTI_WORKER_MIRRORED_STRATEGY_NAME,
                ],
            },
            body["trainingInput"],
        )

コード例 #6

0

ファイルを表示

ファイル: integration_test.py プロジェクト: dineshbhaisuchak/cloud

    def test_in_memory_data(self):
        # This test should only run in tf 2.x
        if utils.is_tf_v1():
            return

        # Create a folder under remote dir for this test's data
        tmp_folder = str(uuid.uuid4())
        remote_dir = os.path.join(self._remote_dir, tmp_folder)

        # Keep track of test folders created for final clean up
        self._test_folders.append(remote_dir)

        x = np.random.random((2, 3))
        y = np.random.randint(0, 2, (2, 2))

        job_id = client.cloud_fit(
            self._model(),
            x=x,
            y=y,
            remote_dir=remote_dir,
            region=self._region,
            project_id=self._project_id,
            image_uri=self._image_uri,
            job_id="cloud_fit_e2e_test_{}_{}".format(
                _BUILD_ID.replace("-", "_"), "test_in_memory_data"
            ),
            epochs=2,
        )

        # TODO(b/169297404) Replace AIP job status logic with utils wrapper
        # Wait for AIP Training job to finish successfully
        self.assertTrue(
            google_api_client.wait_for_api_training_job_completion(
                job_id, self._project_id))

        # load model from remote dir
        trained_model = tf.keras.models.load_model(os.path.join(
            remote_dir, "output"))
        eval_results = trained_model.evaluate(x, y)

        # Accuracy should be better than zero
        self.assertListEqual(trained_model.metrics_names, ["loss", "accuracy"])
        self.assertGreater(eval_results[1], 0)

コード例 #7

0

ファイルを表示

    def test_client_with_tf_1x_raises_error(self):
        # This test is only applicable to TF 1.x
        if not utils.is_tf_v1():
            return

        x = np.random.random((2, 3))
        y = np.random.randint(0, 2, (2, 2))

        # TF 1.x is not supported, verify proper error is raised for TF 1.x.
        with self.assertRaises(RuntimeError):
            client.cloud_fit(
                self._model(),
                x=x,
                y=y,
                remote_dir="gs://some_test_dir",
                region=self._region,
                project_id=self._project_id,
                image_uri=self._image_uri,
                epochs=2,
            )

コード例 #8

0

ファイルを表示

ファイル: client_test.py プロジェクト: debtaru/cloud

    def test_fit_kwargs(self, mock_submit_job):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            with self.assertRaises(RuntimeError):
                client.cloud_fit(
                    self._model,
                    x=self._dataset,
                    validation_data=self._dataset,
                    remote_dir=self._remote_dir,
                    job_spec=self._job_spec,
                    batch_size=1,
                    epochs=2,
                    verbose=3,
                )
            return
        job_id = client.cloud_fit(
            self._model,
            x=self._dataset,
            validation_data=self._dataset,
            remote_dir=self._remote_dir,
            region=self._region,
            project_id=self._project_id,
            image_uri=self._image_uri,
            batch_size=1,
            epochs=2,
            verbose=3,
        )

        kargs, _ = mock_submit_job.call_args
        body, _ = kargs
        self.assertEqual(body["job_id"], job_id)
        remote_dir = body["trainingInput"]["args"][1]

        training_assets_graph = tf.saved_model.load(
            os.path.join(remote_dir, "training_assets"))
        elements = training_assets_graph.fit_kwargs_fn()
        self.assertDictContainsSubset(tfds.as_numpy(elements), {
            "batch_size": 1,
            "epochs": 2,
            "verbose": 3
        })

コード例 #9

0

ファイルを表示

ファイル: client_test.py プロジェクト: debtaru/cloud

    def test_serialize_assets(self):
        # TF 1.x is not supported
        if utils.is_tf_v1():
            with self.assertRaises(RuntimeError):
                client.cloud_fit(
                    self._model,
                    x=self._dataset,
                    validation_data=self._dataset,
                    remote_dir=self._remote_dir,
                    job_spec=self._job_spec,
                    batch_size=1,
                    epochs=2,
                    verbose=3,
                )
            return
        tensorboard_callback = tf.keras.callbacks.TensorBoard(
            log_dir=self._remote_dir)
        args = self._scalar_fit_kwargs
        args["callbacks"] = [tensorboard_callback]

        client._serialize_assets(self._remote_dir, self._model, **args)
        self.assertGreaterEqual(
            len(
                tf.io.gfile.listdir(
                    os.path.join(self._remote_dir, "training_assets"))),
            1,
        )
        self.assertGreaterEqual(
            len(tf.io.gfile.listdir(os.path.join(self._remote_dir, "model"))),
            1)

        training_assets_graph = tf.saved_model.load(
            os.path.join(self._remote_dir, "training_assets"))

        pickled_callbacks = tfds.as_numpy(training_assets_graph.callbacks_fn())
        unpickled_callbacks = cloudpickle.loads(pickled_callbacks)
        self.assertIsInstance(unpickled_callbacks[0],
                              tf.keras.callbacks.TensorBoard)

コード例 #10

0

ファイルを表示

def run(
    remote_dir: Text,
    distribution_strategy_text: Text
) -> None:
    """deserializes Model and Dataset and runs them.

    Args:
        remote_dir: Temporary cloud storage folder that contains model and
            Dataset graph. This folder is also used for job output.
        distribution_strategy_text: Specifies the distribution strategy for
            remote execution when a jobspec is provided. Accepted values are
            strategy names as specified by 'tf.distribute.<strategy>.__name__'.
    """
    logging.info("Setting distribution strategy to %s",
                 distribution_strategy_text)

    is_mwms = distribution_strategy_text == MULTI_WORKER_MIRRORED_STRATEGY_NAME

    distribution_strategy = SUPPORTED_DISTRIBUTION_STRATEGIES[
        distribution_strategy_text
    ]()

    with distribution_strategy.scope():
        if utils.is_tf_v1():
            training_assets_graph = tf.compat.v2.saved_model.load(
                export_dir=os.path.join(remote_dir, "training_assets"),
                tags=None)
        else:
            training_assets_graph = tf.saved_model.load(
                os.path.join(remote_dir, "training_assets")
            )

        fit_kwargs = {}
        if hasattr(training_assets_graph, "fit_kwargs_fn"):
            fit_kwargs = tfds.as_numpy(training_assets_graph.fit_kwargs_fn())
            logging.info("fit_kwargs were loaded successfully.")

        if hasattr(training_assets_graph, "x_fn"):
            fit_kwargs["x"] = training_assets_graph.x_fn()
            logging.info("x was loaded successfully.")

        if hasattr(training_assets_graph, "y_fn"):
            fit_kwargs["y"] = training_assets_graph.y_fn()
            logging.info("y was loaded successfully.")

        if hasattr(training_assets_graph, "validation_data_fn"):
            fit_kwargs["validation_data"] = (
                training_assets_graph.validation_data_fn())

        if hasattr(training_assets_graph, "callbacks_fn"):
            pickled_callbacks = tfds.as_numpy(
                training_assets_graph.callbacks_fn())
            fit_kwargs["callbacks"] = pickle.loads(pickled_callbacks)
            logging.info("callbacks were loaded successfully.")

        model = tf.keras.models.load_model(os.path.join(remote_dir, "model"))
        logging.info(
            "Model was loaded from %s successfully.",
            os.path.join(remote_dir, "model")
        )
        model.fit(**fit_kwargs)

    # We need to set a different directory on workers when using MWMS since we
    # will run into errors due to concurrent writes to the same directory.
    # This is a workaround for the issue described in b/148619319.
    if not _is_current_worker_chief() and is_mwms:
        tmp_worker_dir = os.path.join(
            remote_dir, "output/tmp/workers_" + str(uuid.uuid4())
        )
        logging.info("Saving model from worker in temporary folder %s.",
                     tmp_worker_dir)
        model.save(tmp_worker_dir)

        logging.info("Removing temporary folder %s.", tmp_worker_dir)
        _delete_dir(tmp_worker_dir)

    else:
        model.save(os.path.join(remote_dir, "output"))

コード例 #11

0

ファイルを表示

def cloud_fit(model,
              remote_dir,
              region=None,
              project_id=None,
              image_uri=None,
              distribution_strategy=DEFAULT_DISTRIBUTION_STRATEGY,
              job_spec=None,
              job_id=None,
              **fit_kwargs):
    """Facilitates remote execution of in memory Model and Dataset on AI Platform.

    Args:
        model: A compiled Keras Model.
        remote_dir: Google Cloud Storage path for temporary assets and AI Platform
            training output. Will overwrite value in job_spec.
        region: Target region for running the AI Platform Training job.
        project_id: Project id where the training should be deployed to.
        image_uri: based image used to use for AI Platform Training
        distribution_strategy: Specifies the distribution strategy for remote
            execution when a jobspec is provided. Accepted values are strategy names
            as specified by 'tf.distribute.<strategy>.__name__'.
        job_spec: AI Platform Training job_spec, will take precedence over all other
            provided values except for remote_dir. If none is provided a default
            cluster spec and distribution strategy will be used.
        job_id: A name to use for the AI Platform Training job (mixed-case letters,
            numbers, and underscores only, starting with a letter).
        **fit_kwargs: Args to pass to model.fit() including training and eval data.
            Only keyword arguments are supported. Callback functions will be
            serialized as is, they must be available in run time environment.

    Returns:
        AI Platform job ID

    Raises:
        RuntimeError: If executing in graph mode, eager execution is required for
            cloud_fit.
        NotImplementedError: Tensorflow v1.x is not supported.
    """
    logging.set_verbosity(logging.INFO)

    if distribution_strategy not in SUPPORTED_DISTRIBUTION_STRATEGIES:
        raise ValueError(
            "{} is not supported. Supported Strategies are {}".format(
                distribution_strategy,
                list(SUPPORTED_DISTRIBUTION_STRATEGIES.keys()),
            ))

    if utils.is_tf_v1():
        raise NotImplementedError("Tensorflow v1.x is not supported.")

    # Can only export Datasets which were created executing eagerly
    # Raise an error if eager execution is not enabled.
    if not tf.executing_eagerly():
        raise RuntimeError("Eager execution is required for cloud_fit.")

    if job_spec:
        job_spec["trainingInput"]["args"] = [
            "--remote_dir",
            remote_dir,
            "--distribution_strategy",
            distribution_strategy,
        ]

    else:
        job_spec = _default_job_spec(
            region=region,
            image_uri=image_uri,
            entry_point_args=[
                "--remote_dir",
                remote_dir,
                "--distribution_strategy",
                distribution_strategy,
            ],
        )

    _serialize_assets(remote_dir, model, **fit_kwargs)

    # Setting AI Platform Training to use chief in TF_CONFIG environment variable
    # https://cloud.google.com/ai-platform/training/docs/distributed-training-details#chief-versus-master
    job_spec["trainingInput"]["useChiefInTfConfig"] = "True"

    # If job_id is provided overwrite the job_id value.
    if job_id:
        job_spec["job_id"] = job_id

    _submit_job(job_spec, project_id)
    return job_spec["job_id"]