def test_container_exception_no_result_logs(m_sleep): # If the job errored with no output but with logs, # we should return error logs with the future exception. mem_msg = ('Run used approximately 2 millicores ' 'of its 256 millicore CPU limit') failed_msg = 'Failed: The job container failed. Exit code 1' logs = [{'id': 111, 'created_at': 'abc', 'message': mem_msg, 'level': 'info'}, {'id': 222, 'created_at': 'def', 'message': failed_msg, 'level': 'error'}] mock_client = create_client_mock_for_container_tests( 1, 2, state='failed', run_outputs=[], log_outputs=logs) fut = ContainerFuture(1, 2, client=mock_client) with pytest.raises(CivisJobFailure) as err: fut.result() expected_msg = ( "(From job 1 / run 2) " + '\n'.join([failed_msg, mem_msg, ''])) assert expected_msg == str(fut._exception.error_message) assert str(err.value) == expected_msg
def test_table_None(mock_res, mock_lt, mock_meta): mock_lt.side_effect = FileNotFoundError() c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) assert mf.table is None mock_lt.assert_called_once_with(3, 7, 'predictions.csv', index_col=0, client=c)
def test_set_job_exception_unknown_error(m_sleep): # If we really don't recognize the error, at least give the # user a few lines of logs so they can maybe figure it out themselves. logs = [{'created_at': '2017-05-10T12:00:00.000Z', 'id': 10005, 'level': 'error', 'message': 'Failed'}, {'created_at': '2017-05-10T12:00:00.000Z', 'id': 10003, 'level': 'error', 'message': 'Error on job: Process ended with an ' 'error, exiting: 137.'}, {'created_at': '2017-05-10T12:00:00.000Z', 'id': 10000, 'level': 'error', 'message': 'Oops'}] mock_client = create_client_mock_for_container_tests( 1, 2, state='failed', log_outputs=logs) err_msg = ( "(From job 1 / run 2) " + '\n'.join([x['message'] for x in logs][::-1])) fut = _model.ModelFuture(1, 2, client=mock_client) with pytest.raises(CivisJobFailure) as err: fut.result() assert str(err.value).startswith(err_msg)
def test_set_job_exception_metadata_exception(m_sleep): """Tests cases where accessing metadata throws exceptions """ # State "running" prevents termination when the object is created. mock_client = create_client_mock_for_container_tests(1, 2, state='running') class ModelFutureRaiseExc(_model.ModelFuture): def __init__(self, exc, *args, **kwargs): self.__exc = exc super().__init__(*args, **kwargs) @property def metadata(self): raise self.__exc('What a spectacular failure, you say!') # exception types get caught! for exc in [FileNotFoundError, CivisJobFailure, CancelledError]: fut = ModelFutureRaiseExc(exc, 1, 2, client=mock_client) _model.ModelFuture._set_job_exception(fut) with pytest.warns(UserWarning): # The KeyError is caught, but sends a warning fut = ModelFutureRaiseExc(KeyError, 1, 2, client=mock_client) _model.ModelFuture._set_job_exception(fut) fut = ModelFutureRaiseExc(RuntimeError, 1, 2, client=mock_client) with pytest.raises(RuntimeError): _model.ModelFuture._set_job_exception(fut)
def test_container_exception_memory_error(m_sleep): err_msg = ('Process ran out of its allowed 3000 MiB of ' 'memory and was killed.') logs = [{'created_at': '2017-05-10T12:00:00.000Z', 'id': 10005, 'level': 'error', 'message': 'Failed'}, {'created_at': '2017-05-10T12:00:00.000Z', 'id': 10003, 'level': 'error', 'message': 'Error on job: Process ended with an ' 'error, exiting: 137.'}, {'created_at': '2017-05-10T12:00:00.000Z', 'id': 10000, 'level': 'error', 'message': err_msg}] mock_client = create_client_mock_for_container_tests( 1, 2, state='failed', run_outputs=[], log_outputs=logs) fut = ContainerFuture(1, 2, client=mock_client) with pytest.raises(MemoryError) as err: fut.result() assert str(err.value) == f"(From job 1 / run 2) {err_msg}"
def test_metrics_prediction(mock_file_id_from_run_output): mock_file_id_from_run_output.return_value = 11 c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(1, 2, 3, 7, client=c) assert mf.metrics == 'foo' mock_file_id_from_run_output.assert_called_with('metrics.json', 3, 7, client=mock.ANY)
def test_result_callback_no_get(mock_civis): # Test that the completed callback happens even if we don't call `get` callback = mock.MagicMock() mock_civis.io.civis_to_file.side_effect = make_to_file_mock('spam') mock_client = create_client_mock_for_container_tests( 1, 2, state='success', run_outputs=mock.MagicMock()) fut = ContainerFuture(1, 2, client=mock_client) civis.parallel._CivisBackendResult(fut, callback) assert callback.call_count == 1
def test_container_future_job_id_run_id(): job_id, run_id = 123, 456 result = ContainerFuture( job_id=job_id, run_id=run_id, client=create_client_mock_for_container_tests(), ) assert result.job_id == job_id assert result.run_id == run_id
def test_set_job_exception_no_exception(mock_f2j): # If nothing went wrong, we shouldn't set an exception ro = [{'name': 'model_info.json', 'object_id': 137, 'object_type': 'File'}, {'name': 'metrics.json', 'object_id': 139, 'object_type': 'File'}] ro = [Response(o) for o in ro] mock_client = create_client_mock_for_container_tests( 1, 2, state='succeeded', run_outputs=ro) fut = _model.ModelFuture(1, 2, client=mock_client) assert fut.exception() is None
def test_modelpipeline_classmethod_constructor_old_version( mock_future, train_id, predict_id): # Test that we select the correct prediction template for different # versions of a training job. mock_client = create_client_mock_for_container_tests() mock_client.scripts.get_containers.return_value = \ _container_response_stub(from_template_id=train_id) mp = _model.ModelPipeline.from_existing(1, 1, client=mock_client) assert mp.predict_template_id == predict_id
def test_validation_metadata_prediction(mock_spe, mock_f2f, mock_file_id_from_run_output): mock_file_id_from_run_output.return_value = 11 c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(1, 2, 3, 7, client=c) assert mf.validation_metadata == 'foo' mock_f2f.assert_called_once_with(11, client=c) mock_file_id_from_run_output.assert_called_with('metrics.json', 3, 7, client=mock.ANY)
def test_getstate(): c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) ret = mf.__getstate__() assert ret['_done_callbacks'] == [] assert not ret['_self_polling_executor'] assert 'client' not in ret assert 'poller' not in ret assert '_condition' not in ret
def test_result_success(mock_civis): # Test that we can get a result back from a succeeded job. callback = mock.MagicMock() mock_civis.io.civis_to_file.side_effect = make_to_file_mock('spam') mock_client = create_client_mock_for_container_tests( 1, 2, state='success', run_outputs=mock.MagicMock()) fut = ContainerFuture(1, 2, client=mock_client) res = civis.parallel._CivisBackendResult(fut, callback) assert res.get() == 'spam' assert callback.call_count == 1
def test_validation_metadata_missing(mock_spe, mock_f2f, mock_file_id_from_run_output): # Make sure that missing validation metadata doesn't cause an error mock_file_id_from_run_output.side_effect = FileNotFoundError c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) assert mf.validation_metadata is None assert mf.metrics is None assert mock_f2f.call_count == 0 assert mock_file_id_from_run_output.call_count == 1
def test_estimator(mock_le): c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) assert mock_le.call_count == 0, "Estimator retrieval is lazy." assert mf.estimator == 'spam' assert mock_le.call_count == 1 assert mf.estimator == 'spam' assert mock_le.call_count == 1,\ "The Estimator is only downloaded once and cached."
def test_state(): c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) ret = mf.state assert ret == 'foo' c.scripts.get_containers_runs.return_value = Response({'id': 7, 'container_id': 3, 'state': 'failed'}) mf = _model.ModelFuture(3, 7, client=c) assert mf.state == 'failed'
def test_modelfuture_constructor(mock_adc, mock_spe): c = create_client_mock_for_container_tests(7, 17) mf = _model.ModelFuture(job_id=7, run_id=17, client=c) assert mf.is_training is True assert mf.train_run_id == 17 assert mf.train_job_id == 7 mf = _model.ModelFuture(job_id=7, run_id=17, train_job_id=23, train_run_id=29, client=c) assert mf.is_training is False assert mf.train_run_id == 29 assert mf.train_job_id == 23
def test_result_exception(m_sleep, mock_civis): # An error in the job should be raised by the result callback = mock.MagicMock() exc = ZeroDivisionError() mock_civis.io.civis_to_file.side_effect = make_to_file_mock(exc) mock_client = create_client_mock_for_container_tests( 1, 2, state='failed', run_outputs=mock.MagicMock()) fut = ContainerFuture(1, 2, client=mock_client) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(ZeroDivisionError): res.get() assert callback.call_count == 0
def test_result_eventual_failure(m_sleep, mock_civis): # We will retry a connection error up to 5 times. Make sure # that we will get an error if it persists forever. callback = mock.MagicMock() exc = requests.ConnectionError() se = make_to_file_mock('spam', max_n_err=10, exc=exc) mock_civis.io.civis_to_file.side_effect = se mock_client = create_client_mock_for_container_tests( 1, 2, state='success', run_outputs=mock.MagicMock()) fut = ContainerFuture(1, 2, client=mock_client) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(requests.ConnectionError): res.get() assert callback.call_count == 0
def test_result_eventual_success(m_sleep, mock_civis): # Test that we can get a result back from a succeeded job, # even if we need to retry a few times to succeed with the download. callback = mock.MagicMock() exc = requests.ConnectionError() se = make_to_file_mock('spam', max_n_err=2, exc=exc) mock_civis.io.civis_to_file.side_effect = se mock_client = create_client_mock_for_container_tests( 1, 2, state='success', run_outputs=mock.MagicMock()) fut = ContainerFuture(1, 2, client=mock_client) res = civis.parallel._CivisBackendResult(fut, callback) assert res.get() == 'spam' assert callback.call_count == 1
def test_modelpipeline_classmethod_constructor_nonint_id(m_sleep): # Verify that we can still JSON-serialize job and run IDs even # if they're entered in a non-JSON-able format. # We need to turn them into JSON to set them as script arguments. mock_client = create_client_mock_for_container_tests(1, 2) container_response_stub = _container_response_stub(TRAIN_ID_PROD) mock_client.scripts.get_containers.return_value = container_response_stub mp = _model.ModelPipeline.from_existing(np.int64(1), np.int64(2), client=mock_client) out = json.dumps({'job': mp.train_result_.job_id, 'run': mp.train_result_.run_id}) assert out == '{"job": 1, "run": 2}' or out == '{"run": 2, "job": 1}'
def test_modelpipeline_train_custom_etl(mock_ccr, mock_f2c, mock_template_ids): # Provide a custom ETL estimator and make sure we can train. mock_api = create_client_mock_for_container_tests() # training template ID 11111 >= 9968 for the etl arg to work mock_template_ids.return_value = 11111, 22222, 33333 etl = LogisticRegression() mp = _model.ModelPipeline('wf', 'dv', client=mock_api, etl=etl) mock_f2c.return_value = -21 mock1, mock2 = mock.Mock(), mock.Mock() mock_ccr.return_value = 'res', mock1, mock2 assert 'res' == mp.train(file_id=7) assert mp.train_result_ == 'res' assert mock_f2c.call_count == 1 # Called once to store input Estimator
def test_result_running_and_cancel_requested(mock_civis): # When scripts request cancellation, they remain in a running # state. Make sure these are treated as cancelled runs. response = Response({'is_cancel_requested': True, 'state': 'running'}) mock_client = create_client_mock_for_container_tests( 1, 2, state='running', run_outputs=mock.MagicMock()) mock_client.scripts.post_cancel.return_value = response fut = ContainerFuture(1, 2, client=mock_client) callback = mock.MagicMock() # When a _CivisBackendResult created by the Civis joblib backend completes # successfully, a callback is executed. When cancelled, this callback # shouldn't be run civis.parallel._CivisBackendResult(fut, callback) fut.cancel() assert callback.call_count == 0
def test_result_callback_exception(mock_civis): # An error in the result retrieval should be raised by .get callback = mock.MagicMock() exc = ZeroDivisionError() mock_civis.io.civis_to_file.side_effect = exc # We're simulating a job which succeeded but generated an # exception when we try to download the outputs. mock_client = create_client_mock_for_container_tests( 1, 2, state='succeeded', run_outputs=mock.MagicMock()) fut = ContainerFuture(1, 2, client=mock_client) res = civis.parallel._CivisBackendResult(fut, callback) with pytest.raises(ZeroDivisionError): res.get() assert callback.call_count == 0
def test_metrics_training_None(mock_file_to_json, mock_file_id_from_run_output): mock_file_to_json.return_value = mock.MagicMock( return_value={'metrics': 'foo', 'run': {'status': 'succeeded'}}) mock_file_id_from_run_output.return_value = 11 c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) # override validation metadata to be None, as though we ran # a train job without validation mf._val_metadata = None mock_file_to_json.return_value = None assert mf.metrics is None mock_file_id_from_run_output.assert_called_with('metrics.json', 3, 7, client=mock.ANY)
def test_result_exception_no_result(m_sleep): # If the job errored but didn't write an output, we should get # a generic TransportableException back. callback = mock.MagicMock() mock_client = create_client_mock_for_container_tests(1, 2, state='failed', run_outputs=[]) fut = ContainerFuture(1, 2, client=mock_client) res = civis.parallel._CivisBackendResult(fut, callback) fut._set_api_exception(CivisJobFailure(Response({'state': 'failed'}))) with pytest.raises(TransportableException) as exc: res.get() assert "{'state': 'failed'}" in str(exc.value) assert callback.call_count == 0
def mp_setup(): mock_api = create_client_mock_for_container_tests() mock_api.aliases.list.return_value = TEST_TEMPLATE_ID_ALIAS_OBJECTS mp = _model.ModelPipeline('wf', 'dv', client=mock_api) return mp
def test_table_no_pkey(mock_res, mock_lt, mock_meta): c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) assert mf.table == 'bar' mock_lt.assert_called_once_with(3, 7, 'predictions.csv', index_col=False, client=c)
def test_metadata(mock_spec, mock_f2j): c = create_client_mock_for_container_tests(3, 7) mf = _model.ModelFuture(3, 7, client=c) assert mf.metadata == {'foo': 'bar'} mock_f2j.assert_called_once_with(11, client=c)
def test_modelfuture_pickle_smoke(mock_client): mf = _model.ModelFuture(job_id=7, run_id=13, client=create_client_mock_for_container_tests()) mf.result() mf_pickle = pickle.dumps(mf) pickle.loads(mf_pickle)