def test_project_publish_with_workflow(api_app, users, api_project, es): """Test publish a project with a workflow.""" project, video_1, video_2 = api_project prepare_videos_for_publish([video_1, video_2]) project_depid = project['_deposit']['id'] project_id = str(project.id) video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) video_2_depid = video_2['_deposit']['id'] receiver_id = 'test_project_publish_with_workflow' workflow_receiver_video_failing(api_app, db, video_1, receiver_id=receiver_id) headers = [('Content-Type', 'application/json')] payload = json.dumps(dict(somekey='somevalue')) with mock.patch('invenio_indexer.tasks.index_record.delay') \ as mock_indexer, \ api_app.test_request_context(headers=headers, data=payload): event = Event.create(receiver_id=receiver_id) db.session.add(event) event.process() # check video and project are indexed assert mock_indexer.called is True ids = get_indexed_records_from_mock(mock_indexer) assert video_1_id == ids[0] assert project_id == ids[1] db.session.commit() # check tasks status is propagated to video and project video_1 = deposit_video_resolver(video_1_depid) expected = {u'add': u'SUCCESS', u'failing': u'FAILURE'} assert video_1['_cds']['state'] == expected assert video_1.project['_cds']['state'] == expected events = get_deposit_events(deposit_id=video_1_depid) assert len(events) == 1 def check(project_status, video_1_status, video_2_status): project = deposit_project_resolver(project_depid) video_1 = deposit_video_resolver(video_1_depid) video_2 = deposit_video_resolver(video_2_depid) assert project.status == project_status assert video_1.status == video_1_status assert video_2.status == video_2_status check('draft', 'draft', 'draft') login_user(User.query.get(users[0])) video_2 = deposit_video_resolver(video_2_depid) video_2.publish() check('draft', 'draft', 'published') project = deposit_project_resolver(project_depid) project.publish() check('published', 'published', 'published')
def test_delete_not_existing_keywords(cern_keywords): """Test delete not existing keywords on db.""" keywords = [ # 1: unchanged { 'key_id': '751', 'name': '13 TeV' }, # 2: deleted { 'key_id': '856', 'name': 'test-deleted' }, ] keywords_db = [] for keyword in keywords: keywords_db.append(create_keyword(data=keyword)) assert RecordMetadata.query.count() == 2 # keyword harvested keywords_api = { '751': '13 TeV', } indexer = type('indexer', (object, ), {}) indexer.bulk_index = mock.Mock() _delete_not_existing_keywords(indexer=indexer, keywords_api=keywords_api, keywords_db=keywords_db) assert indexer.bulk_index.called # 1 keyword deleted ids = get_indexed_records_from_mock(indexer.bulk_index) assert len(ids) == 1 records = RecordMetadata.query.filter_by(id=ids[0]).all() assert len(records) == 1 assert records[0].json['key_id'] == '856' assert records[0].json['name'] == 'test-deleted' # 1 existing keyword only records = RecordMetadata.query.filter(RecordMetadata.id != ids[0]).all() assert len(records) == 1 assert records[0].json['key_id'] == '751' assert records[0].json['name'] == '13 TeV'
def test_video_events_on_download_create(api_app, webhooks, db, api_project, access_token, json_headers): """Test deposit events.""" (project, video_1, video_2) = api_project video_1_depid = video_1['_deposit']['id'] project_id = str(project.id) video_1_id = str(video_1.id) bucket_id = video_1._bucket.id with api_app.test_request_context(): url = url_for('invenio_webhooks.event_list', receiver_id='downloader', access_token=access_token) with mock.patch('requests.get') as mock_request, \ mock.patch('invenio_indexer.tasks.index_record.delay') \ as mock_indexer, \ api_app.test_client() as client: file_size = 1024 * 1024 mock_request.return_value = type( 'Response', (object, ), { 'raw': BytesIO(b'\x00' * file_size), 'headers': { 'Content-Length': file_size } }) payload = dict(uri='http://example.com/test.pdf', bucket_id=str(bucket_id), deposit_id=video_1_depid, key='test.pdf') resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 file_size = 1024 * 1024 * 6 mock_request.return_value = type( 'Response', (object, ), { 'raw': BytesIO(b'\x00' * file_size), 'headers': { 'Content-Length': file_size } }) resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 deposit = deposit_video_resolver(video_1_depid) events = get_deposit_events(deposit['_deposit']['id']) assert len(events) == 2 assert events[0].payload['deposit_id'] == video_1_depid assert events[1].payload['deposit_id'] == video_1_depid status = get_tasks_status_by_task(events) assert status == {'file_download': states.SUCCESS} # check if the states are inside the deposit res = client.get(url_for('invenio_deposit_rest.video_item', pid_value=video_1_depid, access_token=access_token), headers=json_headers) assert res.status_code == 200 data = json.loads(res.data.decode('utf-8'))['metadata'] assert data['_cds']['state']['file_download'] == states.SUCCESS assert deposit._get_files_dump() == data['_files'] # check the record is inside the indexer queue ids = set(get_indexed_records_from_mock(mock_indexer)) assert len(ids) == 2 assert video_1_id in ids assert project_id in ids
def test_download_receiver(api_app, db, api_project, access_token, webhooks, json_headers): """Test downloader receiver.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) project_id = str(project.id) with api_app.test_request_context(): url = url_for('invenio_webhooks.event_list', receiver_id='downloader', access_token=access_token) with mock.patch('requests.get') as mock_request, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_client() as client: sse_channel = 'mychannel' mock_sse.return_value = None file_size = 1024 mock_request.return_value = type( 'Response', (object, ), { 'raw': BytesIO(b'\x00' * file_size), 'headers': { 'Content-Length': file_size } }) payload = dict(uri='http://example.com/test.pdf', deposit_id=video_1_depid, key='test.pdf', sse_channel=sse_channel) resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 data = json.loads(resp.data.decode('utf-8')) assert '_tasks' in data assert data['tags']['uri_origin'] == 'http://example.com/test.pdf' assert data['key'] == 'test.pdf' assert 'version_id' in data assert 'links' in data # TODO decide with links are needed assert all( [link in data['links'] for link in ['self', 'version', 'cancel']]) assert ObjectVersion.query.count() == 1 obj = ObjectVersion.query.first() tags = obj.get_tags() assert tags['_event_id'] == data['tags']['_event_id'] assert obj.key == data['key'] assert str(obj.version_id) == data['version_id'] assert obj.file assert obj.file.size == file_size # check sse is called assert mock_sse.called def set_data(state, message, size, total, percentage, type_): return { 'state': state, 'meta': { 'message': message, 'payload': { 'event_id': str(tags['_event_id']), 'key': u'test.pdf', 'tags': { u'uri_origin': u'http://example.com/test.pdf', u'_event_id': str(tags['_event_id']), u'context_type': u'master', }, 'deposit_id': video_1_depid, 'percentage': percentage, 'version_id': str(obj.version_id), 'size': size, 'total': total, 'sse_channel': sse_channel, 'type': type_ } } } assert mock_sse.call_count == 7 mock_sse.assert_any_call(data=set_data( states.STARTED, 'Downloading {} of {}'.format(file_size, file_size), file_size, file_size, 100, 'file_download'), channel=u'mychannel', type_='file_download') mock_sse.assert_any_call(data=set_data(states.SUCCESS, str(obj.version_id), file_size, file_size, 100, 'file_download'), channel=u'mychannel', type_='file_download') deposit = deposit_video_resolver(video_1_depid) mock_sse.assert_any_call( channel='mychannel', data={ 'state': states.SUCCESS, 'meta': { 'payload': { 'event_id': str(tags['_event_id']), 'deposit_id': video_1_depid, 'deposit': deposit, } } }, type_='update_deposit', ) # check ElasticSearch is called ids = set(get_indexed_records_from_mock(mock_indexer)) assert video_1_id in ids assert project_id in ids assert deposit['_cds']['state'] == {u'file_download': states.SUCCESS} # Test cleaning! url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token) with mock.patch('requests.get') as mock_request, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_client() as client: resp = client.delete(url, headers=json_headers) assert resp.status_code == 201 assert ObjectVersion.query.count() == 2 bucket = Bucket.query.first() assert bucket.size == 0 assert mock_sse.called is False assert mock_indexer.called is False
def test_avc_workflow_receiver_local_file_pass(api_app, db, api_project, access_token, json_headers, mock_sorenson, online_video, webhooks, local_file): """Test AVCWorkflow receiver.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) project_id = str(project.id) bucket_id = ObjectVersion.query.filter_by( version_id=local_file).one().bucket_id video_size = 5510872 master_key = 'test.mp4' slave_keys = [ 'slave_{0}.mp4'.format(quality) for quality in get_presets_applied().keys() if quality != '1024p' ] with api_app.test_request_context(): url = url_for('invenio_webhooks.event_list', receiver_id='avc', access_token=access_token) with api_app.test_client() as client, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer: sse_channel = 'mychannel' payload = dict( uri=online_video, deposit_id=video_1_depid, key=master_key, sse_channel=sse_channel, sleep_time=0, version_id=str(local_file), ) # [[ RUN WORKFLOW ]] resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 data = json.loads(resp.data.decode('utf-8')) assert '_tasks' in data assert data['key'] == master_key assert 'version_id' in data assert data.get('presets') == get_available_preset_qualities() assert 'links' in data # TODO decide with links are needed assert ObjectVersion.query.count() == get_object_count() # Master file master = ObjectVersion.get(bucket_id, master_key) tags = master.get_tags() assert tags['_event_id'] == data['tags']['_event_id'] assert master.key == master_key assert str(master.version_id) == data['version_id'] assert master.file assert master.file.size == video_size # Check metadata tags metadata_keys = [ 'duration', 'bit_rate', 'size', 'avg_frame_rate', 'codec_name', 'codec_long_name', 'width', 'height', 'nb_frames', 'display_aspect_ratio', 'color_range' ] assert all([key in tags for key in metadata_keys]) assert ObjectVersion.query.count() == get_object_count() assert ObjectVersionTag.query.count() == get_tag_count(is_local=True) # Check metadata patch recid = PersistentIdentifier.get('depid', video_1_depid).object_uuid record = Record.get_record(recid) assert 'extracted_metadata' in record['_cds'] assert all([ key in str(record['_cds']['extracted_metadata']) for key in metadata_keys ]) # Check slaves for slave_key in slave_keys: slave = ObjectVersion.get(bucket_id, slave_key) tags = slave.get_tags() assert slave.key == slave_key assert '_sorenson_job_id' in tags assert tags['_sorenson_job_id'] == '1234' assert 'master' in tags assert tags['master'] == str(master.version_id) assert master.file assert master.file.size == video_size video = deposit_video_resolver(video_1_depid) events = get_deposit_events(video['_deposit']['id']) # check deposit tasks status tasks_status = get_tasks_status_by_task(events) assert len(tasks_status) == 3 assert 'file_transcode' in tasks_status assert 'file_video_extract_frames' in tasks_status assert 'file_video_metadata_extraction' in tasks_status # check single status collector = CollectInfoTasks() iterate_events_results(events=events, fun=collector) info = list(collector) assert len(info) == 11 assert info[0][0] == 'file_video_metadata_extraction' assert info[0][1].status == states.SUCCESS assert info[1][0] == 'file_video_extract_frames' assert info[1][1].status == states.SUCCESS transocode_tasks = info[2:] statuses = [task[1].status for task in info[2:]] assert len(transocode_tasks) == len(statuses) assert [ states.SUCCESS, states.REVOKED, states.REVOKED, states.REVOKED, states.SUCCESS, states.REVOKED, states.REVOKED, states.REVOKED, states.REVOKED ] == statuses # check tags (exclude 'uri-origin') assert ObjectVersionTag.query.count() == (get_tag_count() - 1) # check sse is called assert mock_sse.called messages = [ (sse_channel, states.SUCCESS, 'file_video_metadata_extraction'), (sse_channel, states.STARTED, 'file_transcode'), (sse_channel, states.SUCCESS, 'file_transcode'), (sse_channel, states.REVOKED, 'file_transcode'), # ResolutionError (sse_channel, states.STARTED, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'update_deposit'), ] call_args = [] for (_, kwargs) in mock_sse.call_args_list: type_ = kwargs['type_'] state = kwargs['data']['state'] channel = kwargs['channel'] tuple_ = (channel, state, type_) if tuple_ not in call_args: call_args.append(tuple_) assert len(call_args) == len(messages) for message in messages: assert message in call_args deposit = deposit_video_resolver(video_1_depid) def filter_events(call_args): _, x = call_args return x['type_'] == 'update_deposit' list_kwargs = list(filter(filter_events, mock_sse.call_args_list)) assert len(list_kwargs) == 10 _, kwargs = list_kwargs[8] assert kwargs['type_'] == 'update_deposit' assert kwargs['channel'] == 'mychannel' assert kwargs['data']['state'] == states.SUCCESS assert kwargs['data']['meta']['payload'] == { 'deposit_id': deposit['_deposit']['id'], 'event_id': data['tags']['_event_id'], 'deposit': deposit, } # check ElasticSearch is called ids = set(get_indexed_records_from_mock(mock_indexer)) assert video_1_id in ids assert project_id in ids assert deposit['_cds']['state'] == { 'file_video_metadata_extraction': states.SUCCESS, 'file_video_extract_frames': states.SUCCESS, 'file_transcode': states.SUCCESS, } # Test cleaning! url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token) with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_client() as client: # [[ DELETE WORKFLOW ]] resp = client.delete(url, headers=json_headers) assert resp.status_code == 201 # check that object versions and tags are deleted # (Create + Delete) * Num Objs - 1 (because the file is local and will # be not touched) assert ObjectVersion.query.count() == 2 * get_object_count() - 1 # Tags associated with the old version assert ObjectVersionTag.query.count() == get_tag_count(is_local=True) bucket = Bucket.query.first() # and bucket is empty assert bucket.size == 0 record = RecordMetadata.query.filter_by(id=video_1_id).one() # check metadata patch are deleted assert 'extracted_metadata' not in record.json['_cds'] # check the corresponding Event persisted after cleaning assert len(get_deposit_events(record.json['_deposit']['id'])) == 0 assert len( get_deposit_events(record.json['_deposit']['id'], _deleted=True)) == 1 # check no SSE message and reindexing is fired assert mock_sse.called is False assert mock_indexer.called is False
def test_avc_workflow_receiver_pass(api_app, db, api_project, access_token, json_headers, mock_sorenson, online_video, webhooks, users): """Test AVCWorkflow receiver.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) project_id = str(project.id) bucket_id = video_1['_buckets']['deposit'] video_size = 5510872 master_key = 'test.mp4' slave_keys = [ 'slave_{0}.mp4'.format(quality) for quality in get_presets_applied() if quality != '1024p' ] with api_app.test_request_context(): url = url_for('invenio_webhooks.event_list', receiver_id='avc', access_token=access_token) with api_app.test_client() as client, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer: sse_channel = 'mychannel' payload = dict( uri=online_video, deposit_id=video_1_depid, key=master_key, sse_channel=sse_channel, sleep_time=0, ) resp = client.post(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 data = json.loads(resp.data.decode('utf-8')) assert '_tasks' in data assert data['tags']['uri_origin'] == online_video assert data['key'] == master_key assert 'version_id' in data assert data.get('presets') == get_available_preset_qualities() assert 'links' in data # TODO decide with links are needed assert ObjectVersion.query.count() == get_object_count() # Master file master = ObjectVersion.get(bucket_id, master_key) tags = master.get_tags() assert tags['_event_id'] == data['tags']['_event_id'] assert master.key == master_key assert str(master.version_id) == data['version_id'] assert master.file assert master.file.size == video_size # Check metadata tags metadata_keys = [ 'duration', 'bit_rate', 'size', 'avg_frame_rate', 'codec_name', 'codec_long_name', 'width', 'height', 'nb_frames', 'display_aspect_ratio', 'color_range' ] assert all([key in tags for key in metadata_keys]) # Check metadata patch recid = PersistentIdentifier.get('depid', video_1_depid).object_uuid record = Record.get_record(recid) assert 'extracted_metadata' in record['_cds'] assert all([ key in str(record['_cds']['extracted_metadata']) for key in metadata_keys ]) # Check slaves for slave_key in slave_keys: slave = ObjectVersion.get(bucket_id, slave_key) tags = slave.get_tags() assert slave.key == slave_key assert '_sorenson_job_id' in tags assert tags['_sorenson_job_id'] == '1234' assert 'master' in tags assert tags['master'] == str(master.version_id) assert master.file assert master.file.size == video_size video = deposit_video_resolver(video_1_depid) events = get_deposit_events(video['_deposit']['id']) # check deposit tasks status tasks_status = get_tasks_status_by_task(events) assert len(tasks_status) == 4 assert 'file_download' in tasks_status assert 'file_transcode' in tasks_status assert 'file_video_extract_frames' in tasks_status assert 'file_video_metadata_extraction' in tasks_status # check single status collector = CollectInfoTasks() iterate_events_results(events=events, fun=collector) info = list(collector) presets = get_presets_applied().keys() assert info[0][0] == 'file_download' assert info[0][1].status == states.SUCCESS assert info[1][0] == 'file_video_metadata_extraction' assert info[1][1].status == states.SUCCESS assert info[2][0] == 'file_video_extract_frames' assert info[2][1].status == states.SUCCESS for i in info[3:]: assert i[0] == 'file_transcode' if i[1].status == states.SUCCESS: assert i[1].result['payload']['preset_quality'] in presets # check tags assert ObjectVersionTag.query.count() == get_tag_count() # check sse is called assert mock_sse.called messages = [ (sse_channel, states.STARTED, 'file_download'), (sse_channel, states.SUCCESS, 'file_download'), (sse_channel, states.SUCCESS, 'file_video_metadata_extraction'), (sse_channel, states.STARTED, 'file_transcode'), (sse_channel, states.SUCCESS, 'file_transcode'), (sse_channel, states.REVOKED, 'file_transcode'), # ResolutionError (sse_channel, states.STARTED, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'file_video_extract_frames'), (sse_channel, states.SUCCESS, 'update_deposit'), ] call_args = [] for (_, kwargs) in mock_sse.call_args_list: type_ = kwargs['type_'] state = kwargs['data']['state'] channel = kwargs['channel'] tuple_ = (channel, state, type_) if tuple_ not in call_args: call_args.append(tuple_) assert len(call_args) == len(messages) for message in messages: assert message in call_args deposit = deposit_video_resolver(video_1_depid) def filter_events(call_args): _, x = call_args return x['type_'] == 'update_deposit' list_kwargs = list(filter(filter_events, mock_sse.call_args_list)) assert len(list_kwargs) == 12 _, kwargs = list_kwargs[10] assert kwargs['type_'] == 'update_deposit' assert kwargs['channel'] == 'mychannel' assert kwargs['data']['state'] == states.SUCCESS assert kwargs['data']['meta']['payload'] == { 'deposit_id': deposit['_deposit']['id'], 'event_id': data['tags']['_event_id'], 'deposit': deposit, } # check ElasticSearch is called ids = set(get_indexed_records_from_mock(mock_indexer)) assert video_1_id in ids assert project_id in ids assert deposit['_cds']['state'] == { 'file_download': states.SUCCESS, 'file_video_metadata_extraction': states.SUCCESS, 'file_video_extract_frames': states.SUCCESS, 'file_transcode': states.SUCCESS, } # check feedback from anoymous user event_id = data['tags']['_event_id'] with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: resp = client.get(url, headers=json_headers) assert resp.status_code == 401 # check feedback from owner with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: login_user_via_session(client, email=User.query.get(users[0]).email) resp = client.get(url, headers=json_headers) assert resp.status_code == 200 # check feedback from another user without access with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: login_user_via_session(client, email=User.query.get(users[1]).email) resp = client.get(url, headers=json_headers) assert resp.status_code == 403 # check feedback from another user with access user_2 = User.query.get(users[1]) user_2_id = str(user_2.id) user_2_email = user_2.email project = deposit_project_resolver(project['_deposit']['id']) project['_access'] = {'update': [user_2_email]} project = project.commit() with api_app.test_request_context(): url = url_for('invenio_webhooks.event_feedback_item', event_id=event_id, receiver_id='avc') with api_app.test_client() as client: @identity_loaded.connect def load_email(sender, identity): if current_user.get_id() == user_2_id: identity.provides.update([UserNeed(user_2_email)]) login_user_via_session(client, email=user_2_email) resp = client.get(url, headers=json_headers) assert resp.status_code == 200 # Test cleaning! url = '{0}?access_token={1}'.format(data['links']['cancel'], access_token) with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_client() as client: resp = client.delete(url, headers=json_headers) assert resp.status_code == 201 # check that object versions and tags are deleted # (Create + Delete) * Num Objs assert ObjectVersion.query.count() == 2 * get_object_count() # Tags connected with the old version assert ObjectVersionTag.query.count() == get_tag_count() bucket = Bucket.query.first() # and bucket is empty assert bucket.size == 0 record = RecordMetadata.query.filter_by(id=video_1_id).one() # check metadata patch are deleted assert 'extracted_metadata' not in record.json['_cds'] # check the corresponding Event persisted after cleaning assert len(get_deposit_events(record.json['_deposit']['id'])) == 0 assert len( get_deposit_events(record.json['_deposit']['id'], _deleted=True)) == 1 # check no SSE message and reindexing is fired assert mock_sse.called is False assert mock_indexer.called is False
def test_project_publish_with_workflow(api_app, users, api_project, es): """Test publish a project with a workflow.""" project, video_1, video_2 = api_project prepare_videos_for_publish([video_1, video_2]) project_depid = project['_deposit']['id'] project_id = str(project.id) video_1_depid = video_1['_deposit']['id'] video_1_id = str(video_1.id) video_2_depid = video_2['_deposit']['id'] sse_channel = 'mychannel' receiver_id = 'test_project_publish_with_workflow' workflow_receiver_video_failing(api_app, db, video_1, receiver_id=receiver_id, sse_channel=sse_channel) headers = [('Content-Type', 'application/json')] payload = json.dumps(dict(somekey='somevalue')) with mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer, \ api_app.test_request_context(headers=headers, data=payload): event = Event.create(receiver_id=receiver_id) db.session.add(event) event.process() # check messages are sent to the sse channel assert mock_sse.called is True args = list(mock_sse.mock_calls[0])[2] assert args['channel'] == sse_channel assert args['type_'] == 'update_deposit' assert args['data']['meta']['payload']['deposit_id'] == video_1_depid args = list(mock_sse.mock_calls[1])[2] assert args['channel'] == sse_channel assert args['type_'] == 'update_deposit' assert args['data']['meta']['payload']['deposit_id'] == project_depid # check video and project are indexed assert mock_indexer.called is True ids = get_indexed_records_from_mock(mock_indexer) assert video_1_id == ids[0] assert project_id == ids[1] db.session.commit() # check tasks status is propagated to video and project video_1 = deposit_video_resolver(video_1_depid) expected = {u'add': u'SUCCESS', u'failing': u'FAILURE'} assert video_1['_cds']['state'] == expected assert video_1.project['_cds']['state'] == expected events = get_deposit_events(deposit_id=video_1_depid) assert len(events) == 1 def check(project_status, video_1_status, video_2_status): project = deposit_project_resolver(project_depid) video_1 = deposit_video_resolver(video_1_depid) video_2 = deposit_video_resolver(video_2_depid) assert project.status == project_status assert video_1.status == video_1_status assert video_2.status == video_2_status check('draft', 'draft', 'draft') video_2 = deposit_video_resolver(video_2_depid) video_2.publish() check('draft', 'draft', 'published') project = deposit_project_resolver(project_depid) project.publish() check('published', 'published', 'published')
def check_restart_avc_workflow(api_app, event_id, access_token, json_headers, data, video_1_id, video_1_depid, users): """Try to restart AVC workflow via REST API.""" with api_app.test_request_context(): url = url_for('invenio_webhooks.event_item', receiver_id='avc', event_id=event_id, access_token=access_token) with api_app.test_client() as client, \ mock.patch('invenio_sse.ext._SSEState.publish') as mock_sse, \ mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_indexer: sse_channel = 'mychannel' payload = dict(sse_channel=sse_channel) resp = client.put(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201 # (Create + Clean + Create) * Objects Count assert ObjectVersion.query.count() == 3 * get_object_count() # (Version 1 + Version 2) * Tags Count assert ObjectVersionTag.query.count() == 2 * get_tag_count() # check extracted metadata is there record = RecordMetadata.query.get(video_1_id) assert 'extracted_metadata' in record.json['_cds'] # check SSE assert mock_sse.called is True # check elasticsearch assert mock_indexer.called is True ids = get_indexed_records_from_mock(mock_indexer) setids = set(ids) assert len(setids) == 2 assert video_1_id in setids # check restart from anonymous user with api_app.test_request_context(): url = url_for( 'invenio_webhooks.event_item', receiver_id='avc', event_id=event_id, ) with api_app.test_client() as client: sse_channel = 'mychannel' payload = dict(sse_channel=sse_channel) resp = client.put(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 401 # check feedback from another user without access user_2 = User.query.get(users[1]) user_2_email = user_2.email with api_app.test_request_context(): url = url_for( 'invenio_webhooks.event_item', receiver_id='avc', event_id=event_id, ) with api_app.test_client() as client: sse_channel = 'mychannel' payload = dict(sse_channel=sse_channel) login_user_via_session(client, email=user_2_email) resp = client.put(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 403 # check feedback from another user with access user_2 = User.query.get(users[1]) user_2_id = str(user_2.id) user_2_email = user_2.email project = deposit_video_resolver(video_1_depid).project project['_access'] = {'update': [user_2_email]} project.commit() with api_app.test_request_context(): url = url_for( 'invenio_webhooks.event_item', receiver_id='avc', event_id=event_id, ) with api_app.test_client() as client: @identity_loaded.connect def load_email(sender, identity): if current_user.get_id() == user_2_id: identity.provides.update([UserNeed(user_2_email)]) sse_channel = 'mychannel' payload = dict(sse_channel=sse_channel) login_user_via_session(client, email=user_2_email) resp = client.put(url, headers=json_headers, data=json.dumps(payload)) assert resp.status_code == 201
def test_update_existing_keywords(cern_keywords): """Test update existing keywords on db.""" keywords = [ # 1: unchanged { 'key_id': '751', 'name': '13 TeV', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 2: changed { 'key_id': '856', 'name': 'test-changed', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 3: already deleted { 'key_id': '21', 'name': 'ACE', 'deleted': True, 'provenance': 'http://home.cern/api/tags-json-feed' }, # 4: restored { 'key_id': '14', 'name': 'AEGIS', 'deleted': True, 'provenance': 'http://home.cern/api/tags-json-feed' }, ] keywords_db = [] for keyword in keywords: keywords_db.append(create_keyword(data=keyword)) assert RecordMetadata.query.count() == 4 # keyword harvested keywords_api = { '751': dict(name='13 TeV', provenance='http://home.cern/api/tags-json-feed'), '856': dict(name='Accelerating News', provenance='http://home.cern/api/tags-json-feed'), '97': dict(name='accelerator', provenance='http://home.cern/api/tags-json-feed'), '14': dict(name='AEGIS', provenance='http://home.cern/api/tags-json-feed'), } indexer = type('indexer', (object, ), {}) indexer.bulk_index = mock.Mock() _update_existing_keywords(indexer=indexer, keywords_api=keywords_api, keywords_db=keywords_db) assert indexer.bulk_index.called # 1 modified + 1 created + 1 restored ids = get_indexed_records_from_mock(indexer.bulk_index) assert len(ids) == 3 # 2 existing + 1 created + 1 deleted + 1 restored records = RecordMetadata.query.all() assert len(records) == 5 # This test becomes hackish, so I would remove it in the future ks = { k.json['key_id']: dict(name=k.json['name'], provenance=k.json['provenance']) for k in records } # count also the deleted key keywords_api['21'] = { 'name': 'ACE', 'provenance': 'http://home.cern/api/tags-json-feed' } assert keywords_api == ks
def test_keyword_harvesting_deleted_keywords(db, es, cern_keywords): """Test keyword harvesting.""" keywords = [ # 1: unchanged { 'key_id': '751', 'name': '13 TeV', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 2: unchanged { 'key_id': '856', 'name': 'Accelerating News', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 3: deleted { 'key_id': '532', 'name': 'ACCU', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 4: unchanged { 'key_id': '97', 'name': 'accelerator', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 6: unchanged { 'key_id': '14', 'name': 'AEGIS', 'provenance': 'http://home.cern/api/tags-json-feed' }, ] keywords_db = [] for keyword in keywords: keywords_db.append(create_keyword(data=keyword)) assert RecordMetadata.query.count() == 5 sleep(2) return_value = type('test', (object, ), {'text': json.dumps(cern_keywords)}) with mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_bulk_index, \ mock.patch('requests.get', return_value=return_value): keywords_harvesting.s().apply() # check if 1 keyword is deleted assert mock_bulk_index.called ids = get_indexed_records_from_mock(mock_bulk_index) assert len(ids) == 1 deleted = Record.get_record(ids[0]) # deleted = RecordMetadata.query.filter_by(id=ids[0]).first() RecordIndexer().index(deleted) sleep(2) # restore a key cern_keywords['tags'].append({ 'id': deleted['key_id'], 'name': deleted['name'] }) return_value = type('test', (object, ), {'text': json.dumps(cern_keywords)}) with mock.patch('invenio_indexer.api.RecordIndexer.bulk_index'), \ mock.patch('requests.get', return_value=return_value): # run again keywords_harvesting.s().apply() sleep(1) keywords_es = query_to_objects( query=KeywordSearch().params(version=True), cls=Keyword) keywords_db = [k.json for k in RecordMetadata.query.all()] sorted_db = sorted(keywords_db, key=lambda x: x['key_id']) sorted_es = sorted(keywords_es, key=lambda x: x['key_id']) assert sorted_es == sorted_db
def test_keyword_harvesting_one_time(db, es, cern_keywords): """Test keyword harvesting.""" keywords = [ # 1: unchanged { 'key_id': '751', 'name': '13 TeV', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 2: changed { 'key_id': '856', 'name': 'test-changed', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 3: deleted { 'key_id': '532', 'name': 'ACCU', 'provenance': 'http://home.cern/api/tags-json-feed' }, # 4: already deleted { 'key_id': '21', 'name': 'ACE', 'deleted': True, 'provenance': 'http://home.cern/api/tags-json-feed' }, # 5: restored { 'key_id': '14', 'name': 'AEGIS', 'deleted': True, 'provenance': 'http://home.cern/api/tags-json-feed' }, ] keywords_db = [] for keyword in keywords: keywords_db.append(create_keyword(data=keyword)) assert RecordMetadata.query.count() == 5 sleep(2) return_value = type('test', (object, ), {'text': json.dumps(cern_keywords)}) with mock.patch('invenio_indexer.api.RecordIndexer.bulk_index') \ as mock_bulk_index, \ mock.patch('requests.get', return_value=return_value): keywords_harvesting.s().apply() # assert 4 keywords are updated(1 update + 1 new + 1 restored + 1 deleted) assert mock_bulk_index.called ids = get_indexed_records_from_mock(mock_bulk_index) records = RecordMetadata.query.filter(RecordMetadata.id.in_(ids)).all() jsons = {record.json['key_id']: record.json for record in records} keys = jsons.keys() assert len(keys) == 4 assert '856' in keys assert jsons['856']['name'] == 'Accelerating News' assert jsons['856']['deleted'] is False assert '97' in keys assert jsons['97']['name'] == 'accelerator' assert jsons['97']['deleted'] is False assert '14' in keys assert jsons['14']['name'] == 'AEGIS' assert jsons['14']['deleted'] is False assert jsons['532']['key_id'] == '532' assert jsons['532']['name'] == 'ACCU' assert jsons['532']['deleted'] is True