def test_save_state_discards_duplicates(self, test_client): uuid1 = str(uuid4()) uuid2 = str(uuid4()) payload = { 'state': { 'test': [ '${}$'.format(uuid1), '${}$'.format(uuid1), '${}$'.format(uuid2) ] }, 'handler': 'test', 'server': 'localfoo' } redis.set(name='data:{}'.format(uuid1), value=json.dumps({'meta': { 'descriptor': 'foo' }})) redis.set(name='data:{}'.format(uuid2), value=json.dumps({'meta': { 'descriptor': 'bar' }})) rv = test_client.post('/state', data=flask.json.dumps(payload)) body = flask.json.loads(rv.get_data()) assert 201 == rv.status_code, body assert UUID(body['state_id']) meta_state = json.loads(redis.get('state:{}'.format(body['state_id']))) assert len(meta_state['task_ids']) == 2 assert len(meta_state['descriptors']) == 2 assert uuid1 in meta_state['task_ids'] assert uuid2 in meta_state['task_ids'] assert 'foo' in meta_state['descriptors'] assert 'bar' in meta_state['descriptors']
def test_get_state_refuse_if_one_task_fails(self, test_client, monkeypatch): meta_state = { 'state': { 'foo': ['$123$', '$456$'] }, 'task_ids': ['123', '456'], } uuid = str(uuid4()) redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) with test_client.session_transaction() as sess: sess['state_access'][uuid] = ['abc', 'efg'] class FakeAsyncResult: def __init__(self, *args, **kwargs): self.state = 'FAILURE' self.id = args[0] def get(self, *args, **kwargs): pass monkeypatch.setattr(celery, 'AsyncResult', FakeAsyncResult) rv = test_client.get('/state/{}'.format(uuid)) body = flask.json.loads(rv.get_data()) assert 403 == rv.status_code, body assert 'no access' in body.get('error')
def test_get_state_get_message_if_not_all_tasks_finished( self, test_client, monkeypatch): meta_state = { 'state': { 'foo': ['$123$', '$456$'] }, 'task_ids': ['123', '456'], } uuid = str(uuid4()) redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) with test_client.session_transaction() as sess: sess['state_access'][uuid] = ['abc', 'efg'] class FakeAsyncResult: def __init__(self, *args, **kwargs): self.state = 'SUBMITTED' self.id = args[0] def get(self, *args, **kwargs): pass monkeypatch.setattr(celery, 'AsyncResult', FakeAsyncResult) rv = test_client.get('/state/{}'.format(uuid)) body = flask.json.loads(rv.get_data()) assert 202 == rv.status_code, body assert 'still running' in body.get('message')
def test_get_state_with_replaced_ids_if_all_tasks_succeed( self, test_client, monkeypatch): meta_state = { 'state': { 'foo': ['$123$', '$456$'] }, 'task_ids': ['123', '456'], } uuid = str(uuid4()) redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) with test_client.session_transaction() as sess: sess['state_access'][uuid] = ['abc', 'efg'] class FakeAsyncResult: def __init__(self, *args, **kwargs): self.state = 'SUCCESS' self.id = args[0] def get(self, *args, **kwargs): pass monkeypatch.setattr(celery, 'AsyncResult', FakeAsyncResult) rv = test_client.get('/state/{}'.format(uuid)) body = flask.json.loads(rv.get_data()) assert 200 == rv.status_code, body assert body['state']['foo'][0] == '$abc$' assert body['state']['foo'][1] == '$efg$'
def test_request_state_access_reuses_duplicate(self, test_client): uuid1 = str(uuid4()) uuid2 = str(uuid4()) meta_state = { 'state': { 'foo': ['${}$'.format(uuid1), '${}$'.format(uuid2)] }, 'handler': 'test', 'server': 'localfoo', 'task_ids': [uuid1, uuid2], 'descriptors': [{ 'data_type': 'default' }, { 'data_type': 'default' }], } uuid = str(uuid4()) redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) with test_client.session_transaction() as sess: assert not sess['data_tasks'] assert not sess['state_access'] rv = test_client.post('/state/{}'.format(uuid), data=flask.json.dumps({'auth': { 'token': '' }})) body = flask.json.loads(rv.get_data()) assert 202 == rv.status_code, body assert not body with test_client.session_transaction() as sess: assert len(sess['data_tasks']) == 1 assert len(sess['state_access']) == 1 key = list(sess['state_access'].keys())[0] assert len(sess['state_access'][key]) == 1 assert sess['data_tasks'] == sess['state_access'][key] assert meta_state['task_ids'][0] != sess['state_access'][key][0]
def test_request_state_reuses_previous_etls_but_only_in_own_scope( self, test_client, monkeypatch): uuid1 = str(uuid4()) uuid2 = str(uuid4()) descriptor_1 = {'data_type': 'default', 'id': 1} descriptor_2 = {'data_type': 'default', 'id': 2} handler = 'test' server = 'localfoo' meta_state = { 'state': { 'foo': ['${}$'.format(uuid1), '${}$'.format(uuid2)] }, 'handler': handler, 'server': server, 'task_ids': [uuid1, uuid2], 'descriptors': [descriptor_1, descriptor_2], } uuid = str(uuid4()) redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) etlhandler = ETLHandler.factory(handler=handler, server=server, auth={}) etlhandler.create_redis_entry(task_id=uuid1, file_path='', descriptor=descriptor_1, data_type='') etlhandler.create_redis_entry(task_id=uuid2, file_path='', descriptor=descriptor_2, data_type='') with test_client.session_transaction() as sess: sess['data_tasks'] = [uuid2] class FakeAsyncResult: def __init__(self, *args, **kwargs): self.state = 'SUCCESS' self.id = args[0] def get(self, *args, **kwargs): pass monkeypatch.setattr(celery, 'AsyncResult', FakeAsyncResult) rv = test_client.post('/state/{}'.format(uuid), data=flask.json.dumps({'auth': { 'token': '' }})) body = flask.json.loads(rv.get_data()) assert 202 == rv.status_code, body assert not body with test_client.session_transaction() as sess: assert len(sess['data_tasks']) == 2 key = list(sess['state_access'].keys())[0] assert len(sess['state_access'][key]) == 2 assert meta_state['task_ids'][0] not in sess['state_access'][key] assert meta_state['task_ids'][1] in sess['state_access'][key]
def test_discard_expired_states(self, test_client): data_state = { 'a': 'b', 'file_path': '', 'meta': '' } redis.set(name='data:456', value=json.dumps(data_state)) with test_client.session_transaction() as sess: sess['data_tasks'] = ['123', '456'] rv = test_client.get('/data?wait=1') body = flask.json.loads(rv.get_data()) assert rv.status_code == 200, body assert len(body['data_states']) == 1 assert body['data_states'][0]['a'] == 'b' with test_client.session_transaction() as sess: sess['data_tasks'] = ['456']
def test_update_redis(self): df1 = pd.DataFrame([[1, 2, 3]], columns=['id', 'feature', 'value']) df2 = pd.DataFrame([[1, 3]], columns=['id', 'value']) df3 = pd.DataFrame([], columns=['id', 'feature', 'value']) redis.set('data:123', json.dumps({'meta': {}})) self.etl.update_redis(data_frame=df1) data_state = json.loads(redis.get('data:123')) assert data_state['meta']['features'] == [2] self.etl.update_redis(data_frame=df2) data_state = json.loads(redis.get('data:123')) assert data_state['meta']['features'] == [] self.etl.update_redis(data_frame=df3) data_state = json.loads(redis.get('data:123')) assert data_state['meta']['features'] == []
def test_save_state_saves_and_returns(self, test_client): uuid = str(uuid4()) payload = { 'state': { 'test': ['${}$'.format(uuid), '${${}'] }, 'handler': 'test', 'server': 'localfoo' } redis.set(name='data:{}'.format(uuid), value=json.dumps({'meta': { 'descriptor': 'foo' }})) rv = test_client.post('/state', data=flask.json.dumps(payload)) body = flask.json.loads(rv.get_data()) assert 201 == rv.status_code, body assert UUID(body['state_id']) meta_state = json.loads(redis.get('state:{}'.format(body['state_id']))) assert meta_state['task_ids'] == [uuid] assert meta_state['state']['test'][0] == '${}$'.format(uuid)
def save_state() -> Tuple[Response, int]: """Save given payload to redis, so it can be accessed later on. :return: UUID linked to the saved state. """ logger.debug("Received POST request on /state.") payload = request.get_json(force=True) state = str(payload['state']) matches = re.findall('\$.+?\$', state) task_ids = [AnalyticTask.parse_value(match)[0] for match in matches] task_ids = [task_id for task_id in set(task_ids) if task_id is not None] if not task_ids: error = "This state cannot be saved because it contains no data " \ "task ids. These are used to verify access to the state and " \ "its potentially sensitive data." logger.error(error) return jsonify({'error': error}), 400 descriptors = [] for task_id in task_ids: value = redis.get('data:{}'.format(task_id)) if value is None: error = "Data task id is {} could not be found in redis. " \ "State cannot be saved".format(task_id) logger.error(error) return jsonify({'error': error}), 400 data_state = json.loads(value) descriptors.append(data_state['meta']['descriptor']) assert len(task_ids) == len(descriptors) meta_state = { 'state': ast.literal_eval(state), 'server': payload['server'], 'handler': payload['handler'], 'task_ids': task_ids, 'descriptors': descriptors } uuid = uuid4() redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) logger.debug("Successfully saved data to redis. Sending response.") return jsonify({'state_id': uuid}), 201
def test_request_state_acces_works(self, test_client): meta_state = { 'state': { 'foo': ['$123$', '$456$'] }, 'handler': 'test', 'server': 'localfoo', 'task_ids': ['123', '456'], 'descriptors': [{ 'data_type': 'default' }, { 'data_type': 'default', 'foo': 'bar' }], } uuid = str(uuid4()) redis.set(name='state:{}'.format(uuid), value=json.dumps(meta_state)) with test_client.session_transaction() as sess: assert not sess['data_tasks'] assert not sess['state_access'] rv = test_client.post('/state/{}'.format(uuid), data=flask.json.dumps({'auth': { 'token': '' }})) body = flask.json.loads(rv.get_data()) assert 202 == rv.status_code, body assert not body with test_client.session_transaction() as sess: assert len(sess['data_tasks']) == 2 assert len(sess['state_access']) == 1 key = list(sess['state_access'].keys())[0] assert len(sess['state_access'][key]) == 2 assert sess['data_tasks'][0] in sess['state_access'][key] assert sess['data_tasks'][1] in sess['state_access'][key] assert meta_state['task_ids'][0] not in sess['state_access'][key] assert meta_state['task_ids'][1] not in sess['state_access'][key]