def test_result_callback_no_get(mock_civis): # Test that the completed callback happens even if we don't call `get` callback = mock.MagicMock() mock_civis.io.civis_to_file.side_effect = make_to_file_mock('spam') fut = ContainerFuture(1, 2, client=mock.MagicMock()) fut.set_result(Response({'state': 'success'})) civis.parallel._CivisBackendResult(fut, callback) assert callback.call_count == 1
def test_result_success(mock_civis): # Test that we can get a result back from a succeeded job. callback = mock.MagicMock() mock_civis.io.civis_to_file.side_effect = make_to_file_mock('spam') fut = ContainerFuture(1, 2, client=mock.MagicMock()) fut.set_result(Response({'state': 'success'})) res = civis.parallel._CivisBackendResult(fut, callback) assert res.get() == 'spam' assert callback.call_count == 1
def test_result_exception(mock_civis): # An error in the job should be raised by the result callback = mock.MagicMock() exc = ZeroDivisionError() mock_civis.io.civis_to_file.side_effect = make_to_file_mock(exc) fut = ContainerFuture(1, 2, client=mock.MagicMock()) fut._set_api_exception(Response({'state': 'failed'})) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(ZeroDivisionError): res.get() assert callback.call_count == 0
def test_result_eventual_success(mock_civis): # Test that we can get a result back from a succeeded job, # even if we need to retry a few times to succeed with the download. callback = mock.MagicMock() exc = requests.ConnectionError() se = make_to_file_mock('spam', max_n_err=2, exc=exc) mock_civis.io.civis_to_file.side_effect = se fut = ContainerFuture(1, 2, client=mock.MagicMock()) fut.set_result(Response({'state': 'success'})) res = civis.parallel._CivisBackendResult(fut, callback) assert res.get() == 'spam' assert callback.call_count == 1
def test_result_eventual_failure(mock_civis): # We will retry a connection error up to 5 times. Make sure # that we will get an error if it persists forever. callback = mock.MagicMock() exc = requests.ConnectionError() se = make_to_file_mock('spam', max_n_err=10, exc=exc) mock_civis.io.civis_to_file.side_effect = se fut = ContainerFuture(1, 2, client=mock.MagicMock()) fut.set_result(Response({'state': 'success'})) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(requests.ConnectionError): res.get() assert callback.call_count == 0
def test_result_callback_exception(mock_civis): # An error in the result retrieval should be raised by .get callback = mock.MagicMock() exc = ZeroDivisionError() mock_civis.io.civis_to_file.side_effect = exc fut = ContainerFuture(1, 2, client=mock.MagicMock()) # We're simulating a job which succeeded but generated an # exception when we try to download the outputs. fut._set_api_exception(Response({'state': 'succeeded'})) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(ZeroDivisionError): res.get() assert callback.call_count == 0
def test_civis_to_multifile_passes_client( self, m_sql_script, m_civis_to_file, m_CivisFuture, *mocks): """Ensure the client kwarg is passed forward.""" m_sql_script.return_value = (mock.MagicMock(), mock.MagicMock()) # We need to write some JSON into the buffer to avoid errors. m_civis_to_file.side_effect = ( lambda _, buf, *args, **kwargs: buf.write(b'{}') ) mock_client = mock.MagicMock() civis.io.civis_to_multifile_csv('sql', 'db', client=mock_client) m_civis_to_file.assert_called_once_with( mock.ANY, mock.ANY, client=mock_client )
def _make_paginated_response(path, params): results = [[ { 'id': 1, 'name': 'job_1' }, { 'id': 2, 'name': 'job_2' }, { 'id': 3, 'name': 'job_3' }, ], [ { 'id': 4, 'name': 'job_4' }, { 'id': 5, 'name': 'job_5' }, ], []] mock_endpoint = mock.MagicMock() mock_endpoint._make_request.side_effect = [ _create_mock_response(result, {}) for result in results ] mock_endpoint._return_type = 'snake' paginator = PaginatedResponse(path, params, mock_endpoint) return paginator, mock_endpoint
def test_train_data_(mock_cio): def poller(*args, **kwargs): return Response({'state': 'succeeded'}) mock_client = mock.MagicMock() mock_client.scripts.get_containers_runs = poller path = '/green/eggs/and/ham' training_meta = { 'run': { 'configuration': { 'data': { 'location': path } }, 'status': 'succeeded' } } mock_cio.file_to_json.return_value = training_meta mf = _model.ModelFuture(job_id=1, run_id=2, train_job_id=11, train_run_id=13, client=mock_client) assert mf.training_metadata == training_meta mock_cio.file_id_from_run_output.assert_called_with('model_info.json', 11, 13, client=mock_client)
def test_create_method_iterator_kwarg(): args = [{ "name": 'limit', "in": 'query', "required": False, "doc": "" }, { "name": 'page_num', "in": 'query', "required": False, "doc": "" }, { "name": 'order', "in": 'query', "required": False, "doc": "" }, { "name": 'order_by', "in": 'query', "required": False, "doc": "" }] method = _resources.create_method(args, 'get', 'mock_name', '/objects', 'fake_doc') mock_endpoint = mock.MagicMock() method(mock_endpoint, iterator=True) mock_endpoint._call_api.assert_called_once_with('get', '/objects', {}, {}, iterator=True)
def test_create_method_unexpected_kwargs(): args = [{ "name": 'foo', "in": 'query', "required": True, "doc": "" }, { "name": 'bar', "in": 'query', "required": False, "doc": "" }] method = _resources.create_method(args, 'get', 'mock_name', '/objects', 'fake_doc') mock_endpoint = mock.MagicMock() # Method works without unexpected kwarg method(mock_endpoint, foo=0, bar=0) mock_endpoint._call_api.assert_called_once_with('get', '/objects', { "foo": 0, "bar": 0 }, {}, iterator=False) # Method raises TypeError with unexpected kwarg if six.PY3: expected_msg = ("mock_name() got an unexpected keyword argument(s) " "{'baz'}") else: expected_msg = ("mock_name() got an unexpected keyword argument(s) " "set(['baz'])") with pytest.raises(TypeError) as excinfo: method(mock_endpoint, foo=0, bar=0, baz=0) assert str(excinfo.value) == expected_msg
def test_infer_no_job_id_error(mock_make_factory, mock_job): # The `infer_backend_factory` should give a RuntimeError # if there's no CIVIS_JOB_ID in the environment. mock_client = mock.MagicMock() mock_client.scripts.get_containers.return_value = mock_job with mock.patch.dict('os.environ', {}, clear=True): with pytest.raises(RuntimeError): civis.parallel.infer_backend_factory(client=mock_client)
def test_check_is_fit(): mock_pipe = mock.MagicMock() mock_pipe.train_result_ = True @_model._check_fit_initiated def foo(arg): return 7 assert foo(mock_pipe) == 7
def test_result_exception_no_result(): # If the job errored but didn't write an output, we should get # a generic TransportableException back. callback = mock.MagicMock() # Passing the client mock as an argument instead of globally # patching the client tests that the _CivisBackendResult # uses the client object on the input CivisFuture. mock_client = mock.MagicMock().APIClient() mock_client.scripts.list_containers_runs_outputs.return_value = [] fut = ContainerFuture(1, 2, client=mock_client) fut._set_api_exception(Response({'state': 'failed'})) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(TransportableException) as exc: res.get() assert "{'state': 'failed'}" in str(exc.value) assert callback.call_count == 0
def test_check_is_fit_exception(): mock_pipe = mock.MagicMock() mock_pipe.train_result_ = None @_model._check_fit_initiated def foo(arg): return 7 with pytest.raises(ValueError): foo(mock_pipe)
def test_pagination(): results = [[ { 'id': 1, 'name': 'job_1' }, { 'id': 2, 'name': 'job_2' }, { 'id': 3, 'name': 'job_3' }, ], [ { 'id': 4, 'name': 'job_4' }, { 'id': 5, 'name': 'job_5' }, ], []] mock_endpoint = mock.MagicMock() mock_endpoint._make_request.side_effect = [ _create_mock_response(result, {}) for result in results ] mock_endpoint._return_type = 'snake' path = '/objects' params = {'param': 'value'} paginator = iter(PaginatedResponse(path, params, mock_endpoint)) # No API calls made yet. mock_endpoint._make_request.assert_not_called() all_data = [] for indx, obj in enumerate(paginator): assert obj['id'] == indx + 1 all_data.append(obj) # Test lazy evaluation. Should make only make one call up until the # first item of the second page is needed. if indx < 3: mock_endpoint._make_request.assert_called_once_with( 'GET', path, dict(params, **{'page_num': 1})) else: mock_endpoint._make_request.assert_called_with( 'GET', path, dict(params, **{'page_num': 2})) # One extra call is made. Pagination is stopped since the response is # empty. assert mock_endpoint._make_request.call_count == 3 assert len(all_data) == 5
def test_modelpipeline_init_err(): mock_client = mock.MagicMock() r = Response({'content': None, 'status_code': 9999, 'reason': None}) mock_client.templates.get_scripts.side_effect = CivisAPIError(r) with pytest.raises(NotImplementedError): _model.ModelPipeline(LogisticRegression(), 'test', etl=LogisticRegression(), client=mock_client) # clean up del _model._NEWEST_CIVISML_VERSION
def test_modelpipeline_init_newest(): mock_client = mock.MagicMock() mock_client.templates.get_scripts.return_value = {} etl = LogisticRegression() mp = _model.ModelPipeline(LogisticRegression(), 'test', etl=etl, client=mock_client) assert mp.etl == etl # clean up del _model._NEWEST_CIVISML_VERSION
def test_infer_update_resources(mock_make_factory, mock_job): # Verify that users can modify requested resources for jobs. mock_client = mock.MagicMock() mock_client.scripts.get_containers.return_value = mock_job with mock.patch.dict('os.environ', {'CIVIS_JOB_ID': "test_job", 'CIVIS_RUN_ID': "test_run"}): civis.parallel.infer_backend_factory( client=mock_client, required_resources={'cpu': -11}) assert mock_make_factory.call_args[1]['required_resources'] == \ {'cpu': -11}
def test_infer_new_params(mock_make_factory, mock_job): # Test overwriting existing job parameters with new parameters mock_client = mock.MagicMock() mock_client.scripts.get_containers.return_value = mock_job new_params = [{'name': 'spam', 'type': 'fun'}, {'name': 'foo', 'type': 'bar'}] with mock.patch.dict('os.environ', {'CIVIS_JOB_ID': "test_job", 'CIVIS_RUN_ID': "test_run"}): civis.parallel.infer_backend_factory( client=mock_client, params=new_params) assert mock_make_factory.call_args[1]['params'] == new_params
def test_infer_update_args(mock_make_factory, mock_job): # Verify that users can modify the existing job's # arguments for sub-processes. mock_client = mock.MagicMock() mock_client.scripts.get_containers.return_value = mock_job with mock.patch.dict('os.environ', {'CIVIS_JOB_ID': "test_job", 'CIVIS_RUN_ID': "test_run"}): civis.parallel.infer_backend_factory( client=mock_client, arguments={'foo': 'bar'}) assert mock_make_factory.call_args[1]['arguments'] == \ {'spam': 'eggs', 'foo': 'bar'}
def test_infer_extra_param(mock_make_factory, mock_job): # Test adding a new parameter and keeping # the existing parameter unchanged. mock_client = mock.MagicMock() mock_client.scripts.get_containers.return_value = mock_job new_params = [{'name': 'foo', 'type': 'bar'}] with mock.patch.dict('os.environ', {'CIVIS_JOB_ID': "test_job", 'CIVIS_RUN_ID': "test_run"}): civis.parallel.infer_backend_factory( client=mock_client, params=new_params) assert mock_make_factory.call_args[1]['params'] == \ [{'name': 'spam'}, {'name': 'foo', 'type': 'bar'}]
def test_train_data_exc_handling(mock_load_table): def poller(*args, **kwargs): return Response({'state': 'succeeded'}) mock_client = mock.MagicMock() mock_client.scripts.get_containers_runs = poller mf = _model.ModelFuture(job_id=1, run_id=2, client=mock_client) mf._train_data_fname = 'placeholder.csv' # check we catch 404 error and raise some intelligible r = Response({'content': None, 'status_code': 404, 'reason': None}) mock_load_table.side_effect = [CivisAPIError(r)] with pytest.raises(ValueError): mf.train_data
def test_modelpipeline_init_newest(): _model._CIVISML_TEMPLATE = None mock_client = mock.MagicMock() mock_client.templates.get_scripts.return_value = {} etl = LogisticRegression() mp = _model.ModelPipeline(LogisticRegression(), 'test', etl=etl, client=mock_client) assert mp.etl == etl assert mp.train_template_id == LATEST_TRAIN_TEMPLATE # clean up _model._CIVISML_TEMPLATE = None
def test_train_data_fname(): # swap out the poller with a simple function that accepts *args, **kwargs # and returns a simple successful Response object def poller(*args, **kwargs): return Response({'state': 'succeeded'}) mock_client = mock.MagicMock() mock_client.scripts.get_containers_runs = poller mf = _model.ModelFuture(job_id=1, run_id=2, client=mock_client) path = '/green/eggs/and/ham' training_meta = {'run': {'configuration': {'data': {'location': path}}}} mf._train_metadata = training_meta assert mf.train_data_fname == 'ham'
def test_infer_from_custom_job(mock_make_factory): # Test that `infer_backend_factory` can find needed # parameters if it's run inside a custom job created # from a template. mock_client = mock.MagicMock() mock_custom = Response(dict(from_template_id=999, id=42, required_resources=None, params=[{'name': 'spam'}], arguments={'spam': 'eggs'}, docker_image_name='image_name', docker_image_tag='tag', repo_http_uri='cabbage', repo_ref='servant')) mock_script = mock_job() mock_template = Response(dict(id=999, script_id=171)) def _get_container(job_id): if int(job_id) == 42: return mock_custom elif int(job_id) == 171: return mock_script else: raise ValueError("Got job_id {}".format(job_id)) mock_client.scripts.get_containers.side_effect = _get_container mock_client.templates.get_scripts.return_value = mock_template with mock.patch.dict('os.environ', {'CIVIS_JOB_ID': "42", 'CIVIS_RUN_ID': "test_run"}): civis.parallel.infer_backend_factory( client=mock_client) # We should have called `get_containers` twice now -- once for # the container we're running in, and a second time for the # container which backs the template this job was created from. # The backing script has settings which aren't visible from # the container which was created from it. assert mock_client.scripts.get_containers.call_count == 2 mock_client.templates.get_scripts.assert_called_once_with(999) expected_kwargs = {'required_resources': {'cpu': 11}, 'params': [{'name': 'spam'}], 'arguments': {'spam': 'eggs'}, 'client': mock.ANY, 'polling_interval': mock.ANY, 'setup_cmd': None, 'max_submit_retries': mock.ANY, 'max_job_retries': mock.ANY, 'hidden': True, 'remote_backend': 'civis'} for key in civis.parallel.KEYS_TO_INFER: expected_kwargs[key] = mock_script[key] mock_make_factory.assert_called_once_with(**expected_kwargs)
def test_create_method_no_iterator_kwarg(): # Test that dynamically-created function errors when an # unexpected "iterator" parameter is passed in args = [{"name": 'id', "in": 'query', "required": True, "doc": ""}] method = _resources.create_method(args, 'get', 'mock_name', '/objects', 'fake_doc') mock_endpoint = mock.MagicMock() with pytest.raises(TypeError) as excinfo: method(mock_endpoint, id=202, iterator=True) assert 'keyword argument' in str(excinfo.value) # Dynamic functions handle optional argument through a different # code path; verify that this also rejects unexpected arguments. args2 = [{"name": 'foo', "in": 'query', "required": False, "doc": ""}] method2 = _resources.create_method(args2, 'get', 'mock_name', '/objects', 'fake_doc') mock_endpoint2 = mock.MagicMock() with pytest.raises(TypeError) as excinfo: method2(mock_endpoint2, iterator=True) assert 'keyword argument' in str(excinfo.value)
def _create_mock_endpoint(): args = [{ "name": 'foo', "in": 'query', "required": True, "doc": "" }, { "name": 'bar', "in": 'query', "required": False, "doc": "" }] method = _resources.create_method(args, 'get', 'mock_name', '/objects', 'fake_doc') mock_endpoint = mock.MagicMock() return mock_endpoint, method
def test_get_table_id(schema_tablename): """Check that get_table_id handles quoted schema.tablename correctly.""" client = civis.APIClient(local_api_spec=TEST_SPEC, api_key='none') client.get_database_id = mock.Mock(return_value=123) mock_tables = mock.MagicMock() mock_tables.__getitem__.side_effect = {0: mock.Mock()}.__getitem__ client.tables.list = mock.Mock(return_value=mock_tables) client.get_table_id(table=schema_tablename, database=123) client.tables.list.assert_called_once_with( database_id=123, schema='foo', name='bar' )
def mock_api_error(job_id, run_id): if counter['failures'] < num_failures: counter['failures'] += 1 if failure_is_error: raise CivisAPIError(mock.MagicMock()) else: return response.Response({ 'id': run_id, 'container_id': job_id, 'state': 'failed' }) else: return response.Response({ 'id': run_id, 'container_id': job_id, 'state': 'succeeded' })