コード例 #1
0
    def testFailures(self):
        dag = DAG('test_dag',
                  default_args={
                      'owner': 'airflow',
                      'start_date': DEFAULT_DATE,
                      'end_date': DEFAULT_DATE,
                      'project_id': 'test-project',
                      'region': 'us-east1',
                  },
                  schedule_interval='@daily')

        input_with_model = self.INPUT_MISSING_ORIGIN.copy()
        other_params_but_models = {
            'task_prefix': 'eval-test',
            'batch_prediction_job_id': 'eval-test-prediction',
            'data_format': input_with_model['dataFormat'],
            'input_paths': input_with_model['inputPaths'],
            'prediction_path': input_with_model['outputPath'],
            'metric_fn_and_keys': (self.metric_fn, ['err']),
            'validate_fn': (lambda x: 'err=%.1f' % x['err']),
            'dag': dag,
        }

        with self.assertRaisesRegexp(AirflowException, 'Missing model origin'):
            create_evaluate_ops(**other_params_but_models)

        with self.assertRaisesRegexp(AirflowException,
                                     'Ambiguous model origin'):
            create_evaluate_ops(model_uri='abc',
                                model_name='cde',
                                **other_params_but_models)

        with self.assertRaisesRegexp(AirflowException,
                                     'Ambiguous model origin'):
            create_evaluate_ops(model_uri='abc',
                                version_name='vvv',
                                **other_params_but_models)

        with self.assertRaisesRegexp(AirflowException,
                                     '`metric_fn` param must be callable'):
            params = other_params_but_models.copy()
            params['metric_fn_and_keys'] = (None, ['abc'])
            create_evaluate_ops(model_uri='gs://blah', **params)

        with self.assertRaisesRegexp(AirflowException,
                                     '`validate_fn` param must be callable'):
            params = other_params_but_models.copy()
            params['validate_fn'] = None
            create_evaluate_ops(model_uri='gs://blah', **params)
コード例 #2
0
    def testSuccessfulRun(self):
        input_with_model = self.INPUT_MISSING_ORIGIN.copy()

        pred, summary, validate = create_evaluate_ops(
            task_prefix='eval-test',
            batch_prediction_job_id='eval-test-prediction',
            data_format=input_with_model['dataFormat'],
            input_paths=input_with_model['inputPaths'],
            prediction_path=input_with_model['outputPath'],
            metric_fn_and_keys=(self.metric_fn, ['err']),
            validate_fn=(lambda x: 'err=%.1f' % x['err']),
            dag=self.dag)

        with patch('airflow.contrib.operators.mlengine_operator.'
                   'MLEngineHook') as mock_mlengine_hook:

            success_message = self.SUCCESS_MESSAGE_MISSING_INPUT.copy()
            success_message['predictionInput'] = input_with_model
            hook_instance = mock_mlengine_hook.return_value
            hook_instance.create_job.return_value = success_message
            result = pred.execute(None)
            mock_mlengine_hook.assert_called_with('google_cloud_default', None)
            hook_instance.create_job.assert_called_once_with(
                'test-project',
                {
                    'jobId': 'eval_test_prediction',
                    'predictionInput': input_with_model,
                },
                ANY)
            self.assertEqual(success_message['predictionOutput'], result)

        with patch('airflow.contrib.operators.dataflow_operator.'
                   'DataFlowHook') as mock_dataflow_hook:

            hook_instance = mock_dataflow_hook.return_value
            hook_instance.start_python_dataflow.return_value = None
            summary.execute(None)
            mock_dataflow_hook.assert_called_with(
                gcp_conn_id='google_cloud_default', delegate_to=None, poll_sleep=10)
            hook_instance.start_python_dataflow.assert_called_once_with(
                'eval-test-summary',
                {
                    'prediction_path': 'gs://legal-bucket/fake-output-path',
                    'labels': {'airflow-version': TEST_VERSION},
                    'metric_keys': 'err',
                    'metric_fn_encoded': self.metric_fn_encoded,
                },
                'airflow.contrib.operators.mlengine_prediction_summary',
                ['-m'])

        with patch('airflow.contrib.operators.mlengine_operator_utils.'
                   'GoogleCloudStorageHook') as mock_gcs_hook:

            hook_instance = mock_gcs_hook.return_value
            hook_instance.download.return_value = '{"err": 0.9, "count": 9}'
            result = validate.execute({})
            hook_instance.download.assert_called_once_with(
                'legal-bucket', 'fake-output-path/prediction.summary.json')
            self.assertEqual('err=0.9', result)
    def testSuccessfulRun(self):
        input_with_model = self.INPUT_MISSING_ORIGIN.copy()

        pred, summary, validate = create_evaluate_ops(
            task_prefix='eval-test',
            batch_prediction_job_id='eval-test-prediction',
            data_format=input_with_model['dataFormat'],
            input_paths=input_with_model['inputPaths'],
            prediction_path=input_with_model['outputPath'],
            metric_fn_and_keys=(self.metric_fn, ['err']),
            validate_fn=(lambda x: 'err=%.1f' % x['err']),
            dag=self.dag)

        with patch('airflow.contrib.operators.mlengine_operator.'
                   'MLEngineHook') as mock_mlengine_hook:

            success_message = self.SUCCESS_MESSAGE_MISSING_INPUT.copy()
            success_message['predictionInput'] = input_with_model
            hook_instance = mock_mlengine_hook.return_value
            hook_instance.create_job.return_value = success_message
            result = pred.execute(None)
            mock_mlengine_hook.assert_called_with('google_cloud_default', None)
            hook_instance.create_job.assert_called_once_with(
                'test-project', {
                    'jobId': 'eval_test_prediction',
                    'predictionInput': input_with_model,
                }, ANY)
            self.assertEqual(success_message['predictionOutput'], result)

        with patch('airflow.contrib.operators.dataflow_operator.'
                   'DataFlowHook') as mock_dataflow_hook:

            hook_instance = mock_dataflow_hook.return_value
            hook_instance.start_python_dataflow.return_value = None
            summary.execute(None)
            mock_dataflow_hook.assert_called_with(
                gcp_conn_id='google_cloud_default',
                delegate_to=None,
                poll_sleep=10)
            hook_instance.start_python_dataflow.assert_called_once_with(
                'eval-test-summary', {
                    'prediction_path': 'gs://legal-bucket/fake-output-path',
                    'labels': {
                        'airflow-version': TEST_VERSION
                    },
                    'metric_keys': 'err',
                    'metric_fn_encoded': self.metric_fn_encoded,
                }, 'airflow.contrib.operators.mlengine_prediction_summary',
                ['-m'])

        with patch('airflow.contrib.operators.mlengine_operator_utils.'
                   'GoogleCloudStorageHook') as mock_gcs_hook:

            hook_instance = mock_gcs_hook.return_value
            hook_instance.download.return_value = '{"err": 0.9, "count": 9}'
            result = validate.execute({})
            hook_instance.download.assert_called_once_with(
                'legal-bucket', 'fake-output-path/prediction.summary.json')
            self.assertEqual('err=0.9', result)
コード例 #4
0
    def testFailures(self):
        dag = DAG(
            'test_dag',
            default_args={
                'owner': 'airflow',
                'start_date': DEFAULT_DATE,
                'end_date': DEFAULT_DATE,
                'project_id': 'test-project',
                'region': 'us-east1',
            },
            schedule_interval='@daily')

        input_with_model = self.INPUT_MISSING_ORIGIN.copy()
        other_params_but_models = {
            'task_prefix': 'eval-test',
            'batch_prediction_job_id': 'eval-test-prediction',
            'data_format': input_with_model['dataFormat'],
            'input_paths': input_with_model['inputPaths'],
            'prediction_path': input_with_model['outputPath'],
            'metric_fn_and_keys': (self.metric_fn, ['err']),
            'validate_fn': (lambda x: 'err=%.1f' % x['err']),
            'dag': dag,
        }

        with self.assertRaisesRegexp(AirflowException, 'Missing model origin'):
            _ = create_evaluate_ops(**other_params_but_models)

        with self.assertRaisesRegexp(AirflowException, 'Ambiguous model origin'):
            _ = create_evaluate_ops(model_uri='abc', model_name='cde',
                                    **other_params_but_models)

        with self.assertRaisesRegexp(AirflowException, 'Ambiguous model origin'):
            _ = create_evaluate_ops(model_uri='abc', version_name='vvv',
                                    **other_params_but_models)

        with self.assertRaisesRegexp(AirflowException,
                                     '`metric_fn` param must be callable'):
            params = other_params_but_models.copy()
            params['metric_fn_and_keys'] = (None, ['abc'])
            _ = create_evaluate_ops(model_uri='gs://blah', **params)

        with self.assertRaisesRegexp(AirflowException,
                                     '`validate_fn` param must be callable'):
            params = other_params_but_models.copy()
            params['validate_fn'] = None
            _ = create_evaluate_ops(model_uri='gs://blah', **params)