def testFailures(self): dag = DAG('test_dag', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE, 'end_date': DEFAULT_DATE, 'project_id': 'test-project', 'region': 'us-east1', }, schedule_interval='@daily') input_with_model = self.INPUT_MISSING_ORIGIN.copy() other_params_but_models = { 'task_prefix': 'eval-test', 'batch_prediction_job_id': 'eval-test-prediction', 'data_format': input_with_model['dataFormat'], 'input_paths': input_with_model['inputPaths'], 'prediction_path': input_with_model['outputPath'], 'metric_fn_and_keys': (self.metric_fn, ['err']), 'validate_fn': (lambda x: 'err=%.1f' % x['err']), 'dag': dag, } with self.assertRaisesRegexp(AirflowException, 'Missing model origin'): create_evaluate_ops(**other_params_but_models) with self.assertRaisesRegexp(AirflowException, 'Ambiguous model origin'): create_evaluate_ops(model_uri='abc', model_name='cde', **other_params_but_models) with self.assertRaisesRegexp(AirflowException, 'Ambiguous model origin'): create_evaluate_ops(model_uri='abc', version_name='vvv', **other_params_but_models) with self.assertRaisesRegexp(AirflowException, '`metric_fn` param must be callable'): params = other_params_but_models.copy() params['metric_fn_and_keys'] = (None, ['abc']) create_evaluate_ops(model_uri='gs://blah', **params) with self.assertRaisesRegexp(AirflowException, '`validate_fn` param must be callable'): params = other_params_but_models.copy() params['validate_fn'] = None create_evaluate_ops(model_uri='gs://blah', **params)
def testSuccessfulRun(self): input_with_model = self.INPUT_MISSING_ORIGIN.copy() pred, summary, validate = create_evaluate_ops( task_prefix='eval-test', batch_prediction_job_id='eval-test-prediction', data_format=input_with_model['dataFormat'], input_paths=input_with_model['inputPaths'], prediction_path=input_with_model['outputPath'], metric_fn_and_keys=(self.metric_fn, ['err']), validate_fn=(lambda x: 'err=%.1f' % x['err']), dag=self.dag) with patch('airflow.contrib.operators.mlengine_operator.' 'MLEngineHook') as mock_mlengine_hook: success_message = self.SUCCESS_MESSAGE_MISSING_INPUT.copy() success_message['predictionInput'] = input_with_model hook_instance = mock_mlengine_hook.return_value hook_instance.create_job.return_value = success_message result = pred.execute(None) mock_mlengine_hook.assert_called_with('google_cloud_default', None) hook_instance.create_job.assert_called_once_with( 'test-project', { 'jobId': 'eval_test_prediction', 'predictionInput': input_with_model, }, ANY) self.assertEqual(success_message['predictionOutput'], result) with patch('airflow.contrib.operators.dataflow_operator.' 'DataFlowHook') as mock_dataflow_hook: hook_instance = mock_dataflow_hook.return_value hook_instance.start_python_dataflow.return_value = None summary.execute(None) mock_dataflow_hook.assert_called_with( gcp_conn_id='google_cloud_default', delegate_to=None, poll_sleep=10) hook_instance.start_python_dataflow.assert_called_once_with( 'eval-test-summary', { 'prediction_path': 'gs://legal-bucket/fake-output-path', 'labels': {'airflow-version': TEST_VERSION}, 'metric_keys': 'err', 'metric_fn_encoded': self.metric_fn_encoded, }, 'airflow.contrib.operators.mlengine_prediction_summary', ['-m']) with patch('airflow.contrib.operators.mlengine_operator_utils.' 'GoogleCloudStorageHook') as mock_gcs_hook: hook_instance = mock_gcs_hook.return_value hook_instance.download.return_value = '{"err": 0.9, "count": 9}' result = validate.execute({}) hook_instance.download.assert_called_once_with( 'legal-bucket', 'fake-output-path/prediction.summary.json') self.assertEqual('err=0.9', result)
def testSuccessfulRun(self): input_with_model = self.INPUT_MISSING_ORIGIN.copy() pred, summary, validate = create_evaluate_ops( task_prefix='eval-test', batch_prediction_job_id='eval-test-prediction', data_format=input_with_model['dataFormat'], input_paths=input_with_model['inputPaths'], prediction_path=input_with_model['outputPath'], metric_fn_and_keys=(self.metric_fn, ['err']), validate_fn=(lambda x: 'err=%.1f' % x['err']), dag=self.dag) with patch('airflow.contrib.operators.mlengine_operator.' 'MLEngineHook') as mock_mlengine_hook: success_message = self.SUCCESS_MESSAGE_MISSING_INPUT.copy() success_message['predictionInput'] = input_with_model hook_instance = mock_mlengine_hook.return_value hook_instance.create_job.return_value = success_message result = pred.execute(None) mock_mlengine_hook.assert_called_with('google_cloud_default', None) hook_instance.create_job.assert_called_once_with( 'test-project', { 'jobId': 'eval_test_prediction', 'predictionInput': input_with_model, }, ANY) self.assertEqual(success_message['predictionOutput'], result) with patch('airflow.contrib.operators.dataflow_operator.' 'DataFlowHook') as mock_dataflow_hook: hook_instance = mock_dataflow_hook.return_value hook_instance.start_python_dataflow.return_value = None summary.execute(None) mock_dataflow_hook.assert_called_with( gcp_conn_id='google_cloud_default', delegate_to=None, poll_sleep=10) hook_instance.start_python_dataflow.assert_called_once_with( 'eval-test-summary', { 'prediction_path': 'gs://legal-bucket/fake-output-path', 'labels': { 'airflow-version': TEST_VERSION }, 'metric_keys': 'err', 'metric_fn_encoded': self.metric_fn_encoded, }, 'airflow.contrib.operators.mlengine_prediction_summary', ['-m']) with patch('airflow.contrib.operators.mlengine_operator_utils.' 'GoogleCloudStorageHook') as mock_gcs_hook: hook_instance = mock_gcs_hook.return_value hook_instance.download.return_value = '{"err": 0.9, "count": 9}' result = validate.execute({}) hook_instance.download.assert_called_once_with( 'legal-bucket', 'fake-output-path/prediction.summary.json') self.assertEqual('err=0.9', result)
def testFailures(self): dag = DAG( 'test_dag', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE, 'end_date': DEFAULT_DATE, 'project_id': 'test-project', 'region': 'us-east1', }, schedule_interval='@daily') input_with_model = self.INPUT_MISSING_ORIGIN.copy() other_params_but_models = { 'task_prefix': 'eval-test', 'batch_prediction_job_id': 'eval-test-prediction', 'data_format': input_with_model['dataFormat'], 'input_paths': input_with_model['inputPaths'], 'prediction_path': input_with_model['outputPath'], 'metric_fn_and_keys': (self.metric_fn, ['err']), 'validate_fn': (lambda x: 'err=%.1f' % x['err']), 'dag': dag, } with self.assertRaisesRegexp(AirflowException, 'Missing model origin'): _ = create_evaluate_ops(**other_params_but_models) with self.assertRaisesRegexp(AirflowException, 'Ambiguous model origin'): _ = create_evaluate_ops(model_uri='abc', model_name='cde', **other_params_but_models) with self.assertRaisesRegexp(AirflowException, 'Ambiguous model origin'): _ = create_evaluate_ops(model_uri='abc', version_name='vvv', **other_params_but_models) with self.assertRaisesRegexp(AirflowException, '`metric_fn` param must be callable'): params = other_params_but_models.copy() params['metric_fn_and_keys'] = (None, ['abc']) _ = create_evaluate_ops(model_uri='gs://blah', **params) with self.assertRaisesRegexp(AirflowException, '`validate_fn` param must be callable'): params = other_params_but_models.copy() params['validate_fn'] = None _ = create_evaluate_ops(model_uri='gs://blah', **params)