def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create(data=record, status=ObjectStatus.COMPLETED, data_type="hep") obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_core_record_auto_accepted( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): """Test a full harvesting workflow.""" record, categories = core_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): workflow_id = build_workflow(record).id with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.extra_data["approved"] is True assert obj.extra_data["auto-approved"] is True assert obj.data["core"] is True
def test_keep_previously_rejected_from_fully_harvested_category_is_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type='hep', ) obj.extra_data['approved'] = False # reject it obj.save() es.indices.refresh('holdingpen-hep') extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj2 = eng.processed_objects[0] assert obj2.extra_data['auto-approved'] assert len(obj2.extra_data['previously_rejected_matches']) > 0 assert obj.status == ObjectStatus.COMPLETED
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved( mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, mocked_is_pdf_link, mocked_package_download, mocked_arxiv_download, workflow_app, mocked_external_services, ): record, categories = core_record() record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"] obj = workflow_object_class.create( data=record, status=ObjectStatus.COMPLETED, data_type="hep" ) obj.extra_data["approved"] = False # reject it obj.save() es.indices.refresh("holdingpen-hep") extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", "ARXIV_CATEGORIES": categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_id = build_workflow(record).id eng_uuid = start("article", object_id=workflow_id) eng = WorkflowEngine.from_uuid(eng_uuid) obj2 = eng.processed_objects[0] assert not obj2.extra_data["auto-approved"] assert len(obj2.extra_data["previously_rejected_matches"]) > 0 assert obj2.status == ObjectStatus.COMPLETED
def test_harvesting_arxiv_workflow_core_record_auto_accepted( mocked_download, mocked_is_pdf, mocked_refextract_extract_refs, mocked_api_request_magpie, mocked_api_request_beard, workflow_app, mocked_external_services): """Test a full harvesting workflow.""" record, categories = core_record() extra_config = { "BEARD_API_URL": "http://example.com/beard", "MAGPIE_API_URL": "http://example.com/magpie", 'ARXIV_CATEGORIES': categories, } with workflow_app.app_context(): with mock.patch.dict(workflow_app.config, extra_config): workflow_uuid = start('article', [record]) eng = WorkflowEngine.from_uuid(workflow_uuid) obj = eng.processed_objects[0] assert obj.extra_data['approved'] is True assert obj.extra_data['auto-approved'] is True assert obj.data['core'] is True