def testSuccessCreateTrainingJobWithOptionalArgs(self):
        training_input = copy.deepcopy(self.TRAINING_INPUT)
        training_input['trainingInput']['runtimeVersion'] = '1.6'
        training_input['trainingInput']['pythonVersion'] = '3.5'
        training_input['trainingInput'][
            'jobDir'] = 'gs://some-bucket/jobs/test_training'

        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            success_response = self.TRAINING_INPUT.copy()
            success_response['state'] = 'SUCCEEDED'
            hook_instance = mock_hook.return_value
            hook_instance.create_job.return_value = success_response

            training_op = MLEngineTrainingOperator(
                runtime_version='1.6',
                python_version='3.5',
                job_dir='gs://some-bucket/jobs/test_training',
                **self.TRAINING_DEFAULT_ARGS)
            training_op.execute(None)

            mock_hook.assert_called_with(gcp_conn_id='google_cloud_default',
                                         delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEquals(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', training_input, ANY)
    def testSuccessCreateTrainingJobWithOptionalArgs(self):
        training_input = copy.deepcopy(self.TRAINING_INPUT)
        training_input['trainingInput']['runtimeVersion'] = '1.6'
        training_input['trainingInput']['pythonVersion'] = '3.5'
        training_input['trainingInput']['jobDir'] = 'gs://some-bucket/jobs/test_training'

        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            success_response = self.TRAINING_INPUT.copy()
            success_response['state'] = 'SUCCEEDED'
            hook_instance = mock_hook.return_value
            hook_instance.create_job.return_value = success_response

            training_op = MLEngineTrainingOperator(
                runtime_version='1.6',
                python_version='3.5',
                job_dir='gs://some-bucket/jobs/test_training',
                **self.TRAINING_DEFAULT_ARGS)
            training_op.execute(None)

            mock_hook.assert_called_with(gcp_conn_id='google_cloud_default',
                                         delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEqual(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', training_input, ANY)
Exemple #3
0
def training_tasks(model, dag, PROJECT_ID, BUCKET, DATA_DIR, MODEL_NAME,
                   MODEL_VERSION, MODEL_LOCATION):
    # Constants
    # The code package name comes from the model code in the module directory
    REGION = "us-east1"
    PACKAGE_URI = BUCKET + "/taxifare/code/taxifare-0.1.tar.gz"
    JOB_DIR = BUCKET + "/jobs"

    # ML Engine training job
    job_id = "taxifare_{}_{}".format(
        model.replace(".", "_"),
        datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
    train_files = DATA_DIR + "{}/train-*.csv".format(model.replace(".", "_"))
    eval_files = DATA_DIR + "{}/eval-*.csv".format(model.replace(".", "_"))
    output_dir = BUCKET + "/taxifare/trained_model/{}".format(
        model.replace(".", "_"))
    job_dir = JOB_DIR + "/" + job_id
    training_args = [
        "--job-dir", job_dir, "--train_data_paths", train_files,
        "--eval_data_paths", eval_files, "--output_dir", output_dir,
        "--train_steps",
        str(500), "--train_batch_size",
        str(32), "--eval_steps",
        str(500), "--eval_batch_size",
        str(32), "--nbuckets",
        str(8), "--hidden_units", "128,32,4"
    ]

    # Reference: https://airflow.apache.org/integration.html#cloud-ml-engine
    ml_engine_training_op = MLEngineTrainingOperator(
        task_id="ml_engine_training_{}_task".format(model.replace(".", "_")),
        project_id=PROJECT_ID,
        job_id=job_id,
        package_uris=[PACKAGE_URI],
        training_python_module="trainer.task",
        training_args=training_args,
        region=REGION,
        scale_tier="BASIC",
        runtime_version="1.13",
        python_version="3.5",
        dag=dag)

    bash_remove_old_saved_model_op = BashOperator(
        task_id="bash_remove_old_saved_model_{}_task".format(
            model.replace(".", "_")),
        bash_command=
        "if gsutil ls {0} 2> /dev/null; then gsutil -m rm -rf {0}/*; else true; fi"
        .format(MODEL_LOCATION + model.replace(".", "_")),
        dag=dag)

    bash_copy_new_saved_model_op = BashOperator(
        task_id="bash_copy_new_saved_model_{}_task".format(
            model.replace(".", "_")),
        bash_command=
        "gsutil -m rsync -d -r `gsutil ls {0}/export/exporter/ | tail -1` {1}".
        format(output_dir, MODEL_LOCATION + model.replace(".", "_")),
        dag=dag)

    return (ml_engine_training_op, bash_remove_old_saved_model_op,
            bash_copy_new_saved_model_op)
    def testSuccessCreateTrainingJob(self):
        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            success_response = self.TRAINING_INPUT.copy()
            success_response['state'] = 'SUCCEEDED'
            hook_instance = mock_hook.return_value
            hook_instance.create_job.return_value = success_response

            training_op = MLEngineTrainingOperator(**self.TRAINING_DEFAULT_ARGS)
            training_op.execute(None)

            mock_hook.assert_called_with(gcp_conn_id='google_cloud_default',
                                         delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEquals(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', self.TRAINING_INPUT, ANY)
    def testSuccessCreateTrainingJob(self):
        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            success_response = self.TRAINING_INPUT.copy()
            success_response['state'] = 'SUCCEEDED'
            hook_instance = mock_hook.return_value
            hook_instance.create_job.return_value = success_response

            training_op = MLEngineTrainingOperator(
                **self.TRAINING_DEFAULT_ARGS)
            training_op.execute(None)

            mock_hook.assert_called_with(gcp_conn_id='google_cloud_default',
                                         delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEquals(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', self.TRAINING_INPUT, ANY)
    def testFailedJobError(self):
        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            failure_response = self.TRAINING_INPUT.copy()
            failure_response['state'] = 'FAILED'
            failure_response['errorMessage'] = 'A failure message'
            hook_instance = mock_hook.return_value
            hook_instance.create_job.return_value = failure_response

            with self.assertRaises(RuntimeError) as context:
                training_op = MLEngineTrainingOperator(
                    **self.TRAINING_DEFAULT_ARGS)
                training_op.execute(None)

            mock_hook.assert_called_with(
                gcp_conn_id='google_cloud_default', delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEqual(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', self.TRAINING_INPUT, ANY)
            self.assertEqual('A failure message', str(context.exception))
    def testFailedJobError(self):
        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            failure_response = self.TRAINING_INPUT.copy()
            failure_response['state'] = 'FAILED'
            failure_response['errorMessage'] = 'A failure message'
            hook_instance = mock_hook.return_value
            hook_instance.create_job.return_value = failure_response

            with self.assertRaises(RuntimeError) as context:
                training_op = MLEngineTrainingOperator(
                    **self.TRAINING_DEFAULT_ARGS)
                training_op.execute(None)

            mock_hook.assert_called_with(gcp_conn_id='google_cloud_default',
                                         delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEquals(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', self.TRAINING_INPUT, ANY)
            self.assertEquals('A failure message', str(context.exception))
    def testHttpError(self):
        http_error_code = 403
        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            hook_instance = mock_hook.return_value
            hook_instance.create_job.side_effect = errors.HttpError(
                resp=httplib2.Response({'status': http_error_code}),
                content=b'Forbidden')

            with self.assertRaises(errors.HttpError) as context:
                training_op = MLEngineTrainingOperator(
                    **self.TRAINING_DEFAULT_ARGS)
                training_op.execute(None)

            mock_hook.assert_called_with(gcp_conn_id='google_cloud_default',
                                         delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEquals(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', self.TRAINING_INPUT, ANY)
            self.assertEquals(http_error_code, context.exception.resp.status)
    def testHttpError(self):
        http_error_code = 403
        with patch('airflow.contrib.operators.mlengine_operator.MLEngineHook') \
                as mock_hook:
            hook_instance = mock_hook.return_value
            hook_instance.create_job.side_effect = errors.HttpError(
                resp=httplib2.Response({
                    'status': http_error_code
                }), content=b'Forbidden')

            with self.assertRaises(errors.HttpError) as context:
                training_op = MLEngineTrainingOperator(
                    **self.TRAINING_DEFAULT_ARGS)
                training_op.execute(None)

            mock_hook.assert_called_with(
                gcp_conn_id='google_cloud_default', delegate_to=None)
            # Make sure only 'create_job' is invoked on hook instance
            self.assertEquals(len(hook_instance.mock_calls), 1)
            hook_instance.create_job.assert_called_with(
                'test-project', self.TRAINING_INPUT, ANY)
            self.assertEquals(http_error_code, context.exception.resp.status)
Exemple #10
0
import datetime
from airflow import DAG
from airflow.contrib.operators.mlengine_operator import MLEngineTrainingOperator

default_args = {
    'start_date': datetime.datetime(2018, 1, 1),
}

with DAG(
        "iris",
        schedule_interval=None,
        default_args=default_args) as dag:

    task = MLEngineTrainingOperator(
        project_id=<プロジェクトID>,
        job_id="iris_" + datetime.datetime.now().strftime('%Y%m%d%H%M%S'),
        package_uris="gs://<ストレージのパス>/iris-0.1.tar.gz",
        training_args="",
        region="asia-east1",
        training_python_module="trainer.task",
        task_id="iris",
        python_version=2.7
    )

    task
        "--eval_data_paths", eval_files, "--output_dir", output_dir,
        "--train_steps",
        str(500), "--train_batch_size",
        str(32), "--eval_steps",
        str(500), "--eval_batch_size",
        str(32), "--nbuckets",
        str(8), "--hidden_units", "128,32,4"
    ]

    # Reference: https://airflow.apache.org/integration.html#cloud-ml-engine
    ml_engine_training_op = MLEngineTrainingOperator(
        task_id="ml_engine_training_{}_task".format(model.replace(".", "_")),
        project_id=PROJECT_ID,
        job_id=job_id,
        package_uris=[PACKAGE_URI],
        training_python_module="trainer.task",
        training_args=training_args,
        region=REGION,
        scale_tier="BASIC",
        runtime_version="1.13",
        python_version="3.5",
        dag=dag)

    MODEL_NAME = "taxifare_"
    MODEL_VERSION = "v1"
    MODEL_LOCATION = BUCKET + "/taxifare/saved_model/"

    bash_remove_old_saved_model_op = BashOperator(
        task_id="bash_remove_old_saved_model_{}_task".format(
            model.replace(".", "_")),
        bash_command=
        "if gsutil ls {0} 2> /dev/null; then gsutil -m rm -rf {0}/*; else true; fi"
Exemple #12
0
dag = DAG('launch_training',
          default_args=default_args,
          schedule_interval='TODO')

# ----------------------------------
# Tasks Definitions Here
# ----------------------------------

opr_hello = BashOperator(task_id='say_Hi', bash_command='echo "Hi!!"', dag=dag)

training_args = [
    "--scale-tier", "BASIC"
]  # check doc https://cloud.google.com/ai-platform/training/docs/machine-types

launch_train = MLEngineTrainingOperator(
    task_id='ml_engine_training_op',
    project_id=PROJECT_ID,
    runtime_version=RUNTIME_VERSION,
    python_version=PYTHON_VERSION,
    package_uris=PACKAGE_URI,
    job_id="taxi_fare_training_pipeline_{}".format(
        datetime.datetime.now().strftime('%Y%m%d_%H%M%S')),
    training_python_module='{}.{}'.format(PACKAGE_NAME, FILENAME),
    region=REGION,
    job_dir="gs://{}/trainings".format(BUCKET_NAME),
    training_args=training_args,
    dag=dag)

opr_hello >> launch_train
    bash_remove_trained_model_op = BashOperator(
        task_id="bash_remove_old_trained_model_{}_task"
                .format(model.replace(".", "_")),
        bash_command=("if gsutil ls {0} 2> /dev/null;"
                      "then gsutil -m rm -rf {0}/*; else true; fi"
                      .format(output_dir + model.replace(".", "_"))),
        dag=dag)

    # Task to submit AI Platform training job
    ml_engine_training_op = MLEngineTrainingOperator(
        task_id="ml_engine_training_{}_task".format(model.replace(".", "_")),
        project_id=PROJECT_ID,
        job_id=job_id,
        package_uris=[PACKAGE_URI],
        training_python_module="trainer.task",
        training_args=training_args,
        region=REGION,
        scale_tier="BASIC",
        runtime_version="2.1",
        python_version="3.7",
        dag=dag
     )

    # SQL Query to check if new model metrics satisfy conditions. Condition is
    # set to be high to ensure that the model likely meets the conditions.
    model_check_sql = """
                SELECT
                    IF(rmse - 10.0 >= 0, rmse - 10.0, 0) AS rmse
                FROM
                    `{0}.{1}.{2}`
                WHERE