コード例 #1
0
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0]['title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
コード例 #2
0
def test_match_in_holdingpen_different_sources_continues(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    es.indices.refresh('holdingpen-hep')
    eng = WorkflowEngine.from_uuid(eng_uuid)
    wf_to_match = eng.objects[0].id
    obj = workflow_object_class.get(wf_to_match)
    assert obj.status == ObjectStatus.HALTED
    # generated wf pending in holdingpen

    record['titles'][0][
        'title'] = 'This is an update that will match the wf in the holdingpen'
    record['acquisition_source']['source'] = 'but not the source'
    # this workflow matches in the holdingpen but continues because has a
    # different source
    workflow_id = build_workflow(record).id
    eng_uuid = start('article', object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.extra_data['already-in-holding-pen'] is True
    assert obj.extra_data['holdingpen_matches'] == [wf_to_match]
    assert obj.extra_data['previously_rejected'] is False
    assert not obj.extra_data.get('stopped-matched-holdingpen-wf')
コード例 #3
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    record_workflow = build_workflow(record).id
    eng_uuid = start("article", object_id=record_workflow)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get("approved") is False

    es.indices.refresh("holdingpen-hep")

    record["titles"][0][
        "title"] = "This is an update that will match the wf in the holdingpen"
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data["previously_rejected"] is True
    assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
コード例 #4
0
def test_cli_restart_by_error_restarts_one_wf_from_current_step(app_cli_runner):
    obj_1 = build_workflow({}, data_type='hep')
    obj_1.status = ObjectStatus.ERROR
    obj_1.extra_data["_error_msg"] = "Error in SendRobotUpload"
    obj_1.save()

    obj_2 = build_workflow({}, data_type='hep')
    obj_2.status = ObjectStatus.ERROR
    obj_2.extra_data["_error_msg"] = "Error in WebColl"
    obj_1.save()

    result = app_cli_runner.invoke(workflows, ['restart_by_error', 'RobotUpload'])
    assert "Found 1 workflows to restart from current step" in result.output_bytes
コード例 #5
0
ファイル: test_views.py プロジェクト: inspirehep/inspire-next
def test_responses_with_etag(workflow_app):

    factory = TestRecordMetadata.create_from_kwargs(
        json={'titles': [{'title': 'Etag version'}]}
    )

    workflow_id = build_workflow(factory.record_metadata.json).id
    obj = workflow_object_class.get(workflow_id)
    obj.save()
    db.session.commit()

    workflow_url = '/api/holdingpen/{}'.format(obj.id)

    with workflow_app.test_client() as client:
        login_user_via_session(client, email='*****@*****.**')
        response = client.get(workflow_url)
        assert response.status_code == 200

        etag = response.headers['ETag']
        last_modified = response.headers['Last-Modified']

        response = client.get(
            workflow_url, headers={'If-Modified-Since': last_modified})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': etag})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'})
        assert response.status_code == 200
コード例 #6
0
def test_validation_error_callback_with_missing_worfklow(workflow_app):
    invalid_record = {
        '_collections': [
            'Literature',
        ],
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'A title'
            },
        ],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start('article', object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(workflow_app, 1111, obj.data,
                                      obj.extra_data)

    data = json.loads(response.get_data())
    expected_message = 'The workflow with id "1111" was not found.'
    expected_error_code = 'WORKFLOW_NOT_FOUND'

    assert response.status_code == 404
    assert expected_error_code == data['error_code']
    assert expected_message == data['message']
コード例 #7
0
def test_validation_error_callback_with_a_valid(workflow_app):
    valid_record = {
        '_collections': [
            'Literature',
        ],
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'A title'
            },
        ],
    }

    workflow_id = build_workflow(valid_record).id
    eng_uuid = start('article', object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data,
                                      obj.extra_data)

    expected_error_code = 'WORKFLOW_NOT_IN_ERROR_STATE'
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data['error_code']
コード例 #8
0
def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'document_type': [
            'article',
        ],
        'titles': [
            {
                'title': 'A title'
            },
        ],
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start('article', object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']

    expected_url = 'http://localhost:5000/callback/workflows/resolve_validation_errors'

    assert expected_url == obj.extra_data['callback_url']
    assert obj.extra_data['validation_errors']
    assert 'message' in obj.extra_data['validation_errors'][0]
    assert 'path' in obj.extra_data['validation_errors'][0]
コード例 #9
0
def test_conflict_creates_ticket(
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    with patch(
            'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters',
        ['acquisition_source.source']):
        TestRecordMetadata.create_from_file(__name__,
                                            'merge_record_arxiv.json',
                                            index_name='records-hep')
        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        start('article', object_id=update_workflow_id)

        wf = workflow_object_class.get(update_workflow_id)
        expected_ticket = u'content=Queue%3A+HEP_conflicts%0AText%3A+Merge+conflict+needs+to+be+resolved.%0A++%0A++https%3A%2F%2Flocalhost%3A5000%2Feditor%2Fholdingpen%2F{wf_id}%0ASubject%3A+arXiv%3A1703.04802+%28%23None%29%0Aid%3A+ticket%2Fnew%0ACF'.format(
            wf_id=wf.id)

        assert mocked_external_services.request_history[0].text.startswith(
            expected_ticket)
        assert wf.extra_data['conflict-ticket-id']

        expected_ticket_close_url = 'http://rt.inspire/ticket/{ticket_id}/edit'.format(
            ticket_id=wf.extra_data['conflict-ticket-id'])

        wf.continue_workflow()

        assert mocked_external_services.request_history[
            1].url == expected_ticket_close_url
        assert mocked_external_services.request_history[
            1].text == u'content=Status%3A+resolved'
コード例 #10
0
def test_cli_restart_by_error_restarts_one_wf_from_beginning(app_cli_runner):
    obj_1 = build_workflow({}, data_type='hep')
    obj_1.status = ObjectStatus.ERROR
    obj_1.extra_data["_error_msg"] = "Error in WebColl number 1"
    obj_1.save()

    obj_2 = build_workflow({}, data_type='hep')
    obj_2.status = ObjectStatus.ERROR
    obj_2.extra_data["_error_msg"] = "Error in WebColl number 2"
    obj_1.save()

    result = app_cli_runner.invoke(
        workflows, ['restart_by_error', 'WebColl', '--from-beginning'])
    output = result.output_bytes

    assert 'Found 2 workflows to restart from first step\n' in output
コード例 #11
0
def test_merge_with_conflicts_callback_url(
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
    disable_file_upload,
    enable_merge_on_update,
):
    with patch(
            'inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters',
        ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

        assert obj.status == ObjectStatus.HALTED
        assert expected_url == obj.extra_data.get('callback_url')
        assert len(conflicts) == 1

        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS

        payload = {
            'id': obj.id,
            'metadata': obj.data,
            '_extra_data': obj.extra_data
        }

        with workflow_app.test_client() as client:
            response = client.put(
                obj.extra_data.get('callback_url'),
                data=json.dumps(payload),
                content_type='application/json',
            )

        data = json.loads(response.get_data())
        expected_message = 'Workflow {} has been saved with conflicts.'.format(
            obj.id)

        assert response.status_code == 200
        assert expected_message == data['message']

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        assert obj.status == ObjectStatus.HALTED

        updated_root = read_wf_record_source(factory.record_metadata.id,
                                             'arxiv')
        assert updated_root is None
コード例 #12
0
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    record, categories = core_record()
    record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"]

    obj = workflow_object_class.create(
        data=record, status=ObjectStatus.COMPLETED, data_type="hep"
    )
    obj.extra_data["approved"] = False  # reject it
    obj.save()
    es.indices.refresh("holdingpen-hep")

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        "ARXIV_CATEGORIES": categories,
    }
    with workflow_app.app_context():
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_id = build_workflow(record).id
            eng_uuid = start("article", object_id=workflow_id)
            eng = WorkflowEngine.from_uuid(eng_uuid)
            obj2 = eng.processed_objects[0]
            assert not obj2.extra_data["auto-approved"]
            assert len(obj2.extra_data["previously_rejected_matches"]) > 0
            assert obj2.status == ObjectStatus.COMPLETED
コード例 #13
0
def test_workflow_checks_affiliations_if_record_is_not_important(
    mocked_is_auto_rejected,
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    record['authors'][0]['raw_affiliations'] = [{
        "value": "IN2P3"
    }, {
        "value": "Cern"
    }]
    record['authors'][1]['raw_affiliations'] = [{"value": "Fermilab"}]
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" in collections_in_record
    assert "HAL Hidden" in collections_in_record
    assert "Fermilab" in collections_in_record
    assert "Literature" not in collections_in_record
コード例 #14
0
def test_previously_rejected_from_not_fully_harvested_category_is_not_auto_approved(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    record, categories = core_record()
    record["arxiv_eprints"][0]["categories"] = ["q-bio.GN"]

    obj = workflow_object_class.create(data=record,
                                       status=ObjectStatus.COMPLETED,
                                       data_type="hep")
    obj.extra_data["approved"] = False  # reject it
    obj.save()
    es.indices.refresh("holdingpen-hep")

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        "ARXIV_CATEGORIES": categories,
    }
    with workflow_app.app_context():
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_id = build_workflow(record).id
            eng_uuid = start("article", object_id=workflow_id)
            eng = WorkflowEngine.from_uuid(eng_uuid)
            obj2 = eng.processed_objects[0]
            assert not obj2.extra_data["auto-approved"]
            assert len(obj2.extra_data["previously_rejected_matches"]) > 0
            assert obj2.status == ObjectStatus.COMPLETED
コード例 #15
0
def test_workflow_without_validation_error(
    fake_validation,
    mocked_match,
    mocked_magpie_json_api_request,
    mocked_beard_json_api_request,
    workflow_app,
    mocked_external_services,
):
    record_without_validation_error = {
        "$schema":
        "https://labs.inspirehep.net/schemas/records/hep.json",
        "titles": [{
            "title": "Update without conflicts title."
        }],
        "arxiv_eprints": [{
            "categories": ["hep-lat", "hep-th"],
            "value": "1703.04802"
        }],
        "document_type": ["article"],
        "_collections": ["Literature"],
        "acquisition_source": {
            "source": "arXiv"
        },
    }
    workflow = build_workflow(record_without_validation_error)
    start("article", object_id=workflow.id)

    assert fake_validation.call_count == 2
    assert workflow.status == ObjectStatus.WAITING
コード例 #16
0
def test_workflows_halts_on_multiple_exact_matches(workflow_app):
    # Record from arxiv with just arxiv ID in DB
    TestRecordMetadata.create_from_file(__name__,
                                        "multiple_matches_arxiv.json",
                                        index_name="records-hep")

    # Record from publisher with just DOI in DB
    TestRecordMetadata.create_from_file(__name__,
                                        "multiple_matches_publisher.json",
                                        index_name="records-hep")

    path = pkg_resources.resource_filename(
        __name__, "fixtures/multiple_matches_arxiv_update.json")
    update_from_arxiv = json.load(open(path))

    # An update from arxiv with the same arxiv and DOI as above records
    workflow_id = build_workflow(update_from_arxiv).id
    start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert len(set(obj.extra_data["matches"]["exact"])) == 2

    assert obj.status == ObjectStatus.HALTED
    assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
コード例 #17
0
def test_workflow_do_not_changes_to_hidden_if_record_authors_do_not_have_interesting_affiliations(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_beard_api,
    mocked_actions_download,
    mocked_is_pdf_link,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()
    workflow_id = build_workflow(record).id
    with patch.dict(
            workflow_app.config, {
                'FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT': True,
                'INSPIREHEP_URL': "http://web:8000"
            }):
        start("article", object_id=workflow_id)
        wf = workflow_object_class.get(workflow_id)
        wf.extra_data['approved'] = True
        wf.save()
        wf.continue_workflow(delayed=False)

    collections_in_record = mocked_external_services.request_history[0].json(
    )['_collections']
    assert "CDS Hidden" not in collections_in_record
    assert "HAL Hidden" not in collections_in_record
    assert "Fermilab" not in collections_in_record
    assert ["Literature"] == collections_in_record
コード例 #18
0
def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        "document_type": ["article"],
        "titles": [{
            "title": "A title"
        }]
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR
    assert "_error_msg" in obj.extra_data
    assert "required" in obj.extra_data["_error_msg"]

    expected_url = "http://localhost:5000/callback/workflows/resolve_validation_errors"

    assert expected_url == obj.extra_data["callback_url"]
    assert obj.extra_data["validation_errors"]
    assert "message" in obj.extra_data["validation_errors"][0]
    assert "path" in obj.extra_data["validation_errors"][0]
コード例 #19
0
def test_merge_without_conflicts_handles_update_without_acquisition_source_and_acts_as_rootless(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.PublisherOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITHOUT_ACQUISITION_SOURCE_AND_NO_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert not conflicts

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_head_revision'] == 0
        assert obj.extra_data['merger_original_root'] == {}

        # source us unknown, so no new root is saved.
        roots = read_all_wf_record_sources(factory.record_metadata.id)
        assert not roots
コード例 #20
0
def test_validation_error_callback_with_malformed_with_invalid_types(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(
        workflow_app,
        # id
        "Alias Investigations",
        obj.data,
        # extra_data
        "Jessica Jones",
    )
    data = json.loads(response.get_data())
    expected_message = "The workflow request is malformed."
    expected_error_code = "MALFORMED"

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]
    assert "errors" in data
コード例 #21
0
def test_harvesting_arxiv_workflow_core_record_auto_accepted(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record, categories = core_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        "ARXIV_CATEGORIES": categories,
    }
    with workflow_app.app_context():
        workflow_id = build_workflow(record).id
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_uuid = start("article", object_id=workflow_id)

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.extra_data["approved"] is True
        assert obj.extra_data["auto-approved"] is True
        assert obj.data["core"] is True
コード例 #22
0
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks'][
            'restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks'][
            'restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
コード例 #23
0
def test_validation_error_callback_with_a_valid(workflow_app):
    valid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{
            "title": "A title"
        }],
    }

    workflow_id = build_workflow(valid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data,
                                      obj.extra_data)

    expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
コード例 #24
0
def test_merge_with_conflicts_rootful(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        # By default the root is {}.

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')
        assert obj.status == ObjectStatus.HALTED
        assert len(conflicts) == 1

        assert obj.extra_data.get('callback_url') is not None
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS
        assert obj.extra_data['merger_head_revision'] == 0
        assert obj.extra_data['merger_original_root'] == {}
コード例 #25
0
def test_validation_error_callback_with_validation_error(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{
            "title": "A title"
        }],
        "preprint_date": "Jessica Jones",
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data,
                                      obj.extra_data)

    expected_message = "Validation error."
    expected_error_code = "VALIDATION_ERROR"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]

    assert data["workflow"]["_extra_data"]["callback_url"]
    assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
コード例 #26
0
def test_merge_without_conflicts_rootful(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        insert_wf_record_source(json=ARXIV_ROOT, record_uuid=factory.record_metadata.id, source='arxiv')

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        assert obj.status == ObjectStatus.COMPLETED
        assert not conflicts

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_head_revision'] == 0
        assert obj.extra_data['merger_original_root'] == ARXIV_ROOT

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root.json == RECORD_WITH_CONFLICTS
コード例 #27
0
def test_validation_error_callback_with_missing_worfklow(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{
            "title": "A title"
        }],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(workflow_app, 1111, obj.data,
                                      obj.extra_data)

    data = json.loads(response.get_data())
    expected_message = 'The workflow with id "1111" was not found.'
    expected_error_code = "WORKFLOW_NOT_FOUND"

    assert response.status_code == 404
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]
コード例 #28
0
def test_harvesting_arxiv_workflow_core_record_auto_accepted(
    mocked_refextract_extract_refs,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record, categories = core_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        "ARXIV_CATEGORIES": categories,
    }
    with workflow_app.app_context():
        workflow_id = build_workflow(record).id
        with mock.patch.dict(workflow_app.config, extra_config):
            workflow_uuid = start("article", object_id=workflow_id)

        eng = WorkflowEngine.from_uuid(workflow_uuid)
        obj = eng.processed_objects[0]

        assert obj.extra_data["approved"] is True
        assert obj.extra_data["auto-approved"] is True
        assert obj.data["core"] is True
コード例 #29
0
def test_validation_error_callback_with_malformed_with_invalid_types(
        workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{
            "title": "A title"
        }],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(
        workflow_app,
        # id
        "Alias Investigations",
        obj.data,
        # extra_data
        "Jessica Jones",
    )
    data = json.loads(response.get_data())
    expected_message = "The workflow request is malformed."
    expected_error_code = "MALFORMED"

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]
    assert "errors" in data
コード例 #30
0
def test_workflow_restart_count_initialized_properly(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services,
):
    """Test a full harvesting workflow."""
    record = generate_record()

    with workflow_app.app_context():
        obj_id = build_workflow(record).id
        start('article', object_id=obj_id)

        obj = workflow_object_class.get(obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 0
        assert obj.extra_data['restart-count'] == 0

        obj.callback_pos = [0]
        obj.save()
        db.session.commit()

        start('article', object_id=obj_id)

        assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 1
        assert obj.extra_data['restart-count'] == 1
コード例 #31
0
def test_validation_error_callback_with_validation_error(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
        "preprint_date": "Jessica Jones",
    }

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data)

    expected_message = "Validation error."
    expected_error_code = "VALIDATION_ERROR"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]

    assert data["workflow"]["_extra_data"]["callback_url"]
    assert len(data["workflow"]["_extra_data"]["validation_errors"]) == 1
コード例 #32
0
def test_responses_with_etag(workflow_app):

    factory = TestRecordMetadata.create_from_kwargs(
        json={'titles': [{'title': 'Etag version'}]}
    )

    workflow_id = build_workflow(factory.record_metadata.json).id
    obj = workflow_object_class.get(workflow_id)
    obj.save()
    db.session.commit()

    workflow_url = '/api/holdingpen/{}'.format(obj.id)

    with workflow_app.test_client() as client:
        login_user_via_session(client, email='*****@*****.**')
        response = client.get(workflow_url)
        assert response.status_code == 200

        etag = response.headers['ETag']
        last_modified = response.headers['Last-Modified']

        response = client.get(
            workflow_url, headers={'If-Modified-Since': last_modified})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': etag})
        assert response.status_code == 304

        response = client.get(workflow_url, headers={'If-None-Match': 'Jessica Jones'})
        assert response.status_code == 200
コード例 #33
0
def test_update_exact_matched_goes_trough_the_workflow(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get(
        "control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
コード例 #34
0
def test_merge_with_disabled_merge_on_update_feature_flag(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
):

    with patch.dict(workflow_app.config, {'FEATURE_FLAG_ENABLE_MERGER': False}):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id
        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        assert obj.status == ObjectStatus.COMPLETED

        assert obj.extra_data.get('callback_url') is None
        assert obj.extra_data.get('conflicts') is None
        assert obj.extra_data.get('merged') is True
        assert obj.extra_data.get('merger_root') is None
        assert obj.extra_data.get('is-update') is True

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root is None
コード例 #35
0
def test_workflows_halts_on_multiple_exact_matches(workflow_app):
    # Record from arxiv with just arxiv ID in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_arxiv.json", index_name="records-hep"
    )

    # Record from publisher with just DOI in DB
    TestRecordMetadata.create_from_file(
        __name__, "multiple_matches_publisher.json", index_name="records-hep"
    )

    path = pkg_resources.resource_filename(
        __name__, "fixtures/multiple_matches_arxiv_update.json"
    )
    update_from_arxiv = json.load(open(path))

    # An update from arxiv with the same arxiv and DOI as above records
    workflow_id = build_workflow(update_from_arxiv).id
    start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert len(set(obj.extra_data["matches"]["exact"])) == 2

    assert obj.status == ObjectStatus.HALTED
    assert obj.extra_data["_action"] == "resolve_multiple_exact_matches"
コード例 #36
0
def test_merge_callback_url_with_malformed_workflow(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITH_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        expected_url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

        assert obj.status == ObjectStatus.HALTED
        assert expected_url == obj.extra_data.get('callback_url')
        assert len(conflicts) == 1

        assert obj.extra_data.get('is-update') is True
        assert obj.extra_data['merger_root'] == RECORD_WITH_CONFLICTS

        payload = {
            'id': obj.id,
            'metadata': 'Jessica Jones',
            '_extra_data': 'Frank Castle'
        }

        with workflow_app.test_client() as client:
            response = client.put(
                obj.extra_data.get('callback_url'),
                data=json.dumps(payload),
                content_type='application/json',
            )

        data = json.loads(response.get_data())
        expected_message = 'The workflow request is malformed.'

        assert response.status_code == 400
        assert expected_message == data['message']

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        assert obj.status == ObjectStatus.HALTED
        assert obj.extra_data.get('callback_url') is not None
        assert obj.extra_data.get('conflicts') is not None
        assert obj.extra_data['merger_root'] is not None

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root is None
コード例 #37
0
def test_update_record_goes_through_api_version_of_store_record_without_issue(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    expected_control_number = record['control_number']
    expected_head_uuid = str(record.id)
    with mock.patch.dict(
            workflow_app.config, {
                "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True,
                "INSPIREHEP_URL": "http://web:8000"
            }):
        with requests_mock.Mocker(real_http=True) as requests_mocker:
            requests_mocker.register_uri(
                'PUT',
                '{url}/literature/{cn}'.format(
                    url=workflow_app.config.get("INSPIREHEP_URL"),
                    cn=expected_control_number,
                ),
                headers={'content-type': 'application/json'},
                status_code=200,
                json={
                    'metadata': {
                        'control_number': expected_control_number,
                    },
                    'id_': expected_head_uuid
                })
            eng_uuid = start("article", object_id=workflow_id)
            url_paths = [r.path for r in requests_mocker.request_history]
            url_hostnames = [
                r.hostname for r in requests_mocker.request_history
            ]

            assert 'web' in url_hostnames
            assert "/literature/{cn}".format(
                cn=expected_control_number) in url_paths

    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.data['control_number'] == expected_control_number

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get(
        "control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
コード例 #38
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    es.indices.refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    es.indices.refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
コード例 #39
0
def test_match_in_holdingpen_stops_pending_wf(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    current_search.flush_and_refresh("holdingpen-hep")
    eng = WorkflowEngine.from_uuid(eng_uuid)
    old_wf = eng.objects[0]
    obj_id = old_wf.id

    assert old_wf.status == ObjectStatus.HALTED
    assert old_wf.extra_data["previously_rejected"] is False

    record2 = record
    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    record2_workflow = build_workflow(record2).id
    start("article", object_id=record2_workflow)
    current_search.flush_and_refresh("holdingpen-hep")

    update_wf = workflow_object_class.get(record2_workflow)

    assert update_wf.status == ObjectStatus.HALTED
    #  As workflow stops (in error) before setting this
    assert update_wf.extra_data["previously_rejected"] is False
    assert update_wf.extra_data['already-in-holding-pen'] is True
    assert update_wf.extra_data["stopped-matched-holdingpen-wf"] is True
    assert update_wf.extra_data["is-update"] is False

    old_wf = workflow_object_class.get(obj_id)
    assert old_wf.extra_data['already-in-holding-pen'] is False
    assert old_wf.extra_data['previously_rejected'] is False
    assert old_wf.extra_data['stopped-by-wf'] == update_wf.id
    assert old_wf.extra_data.get('approved') is None
    assert old_wf.extra_data['is-update'] is False
    assert old_wf.status == ObjectStatus.COMPLETED
コード例 #40
0
def test_refextract_from_pdf(mocked_indexing_task, mocked_api_request_magpie,
                             mocked_api_request_beard, mocked_is_pdf_link,
                             mocked_package_download, mocked_arxiv_download,
                             workflow_app, mocked_external_services):
    """Test refextract from PDF and reference matching for default Configuration
     by going through the entire workflow."""

    cited_record_json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'arxiv_eprints': [{
            'categories': [
                'quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph',
                'math.MP'
            ],
            'value':
            '1308.0815'
        }],
        'control_number':
        1000,
        'document_type': ['article'],
        'titles': [{
            'source':
            'arXiv',
            'title':
            'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation'
        }],
    }

    TestRecordMetadata.create_from_kwargs(json=cited_record_json,
                                          index='records-hep',
                                          pid_type='lit')
    citing_record, categories = insert_citing_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        'ARXIV_CATEGORIES': categories,
    }

    schema = load_schema('hep')
    subschema = schema['properties']['acquisition_source']

    assert validate(citing_record['acquisition_source'], subschema) is None

    with mock.patch.dict(workflow_app.config, extra_config):
        workflow_id = build_workflow(citing_record).id
        citing_doc_workflow_uuid = start('article', object_id=workflow_id)

    citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid)
    citing_doc_obj = citing_doc_eng.processed_objects[0]

    assert citing_doc_obj.data['references'][7]['record'][
        '$ref'] == 'http://localhost:5000/api/literature/1000'
    assert citing_doc_obj.data['references'][0]['raw_refs'][0][
        'source'] == 'arXiv'
コード例 #41
0
def test_refextract_from_pdf(
    mocked_indexing_task,
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_is_pdf_link,
    mocked_package_download,
    mocked_arxiv_download,
    workflow_app,
    mocked_external_services
):
    """Test refextract from PDF and reference matching for default Configuration
     by going through the entire workflow."""

    cited_record_json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'arxiv_eprints': [
            {
                'categories': ['quant-ph', 'cond-mat.mes-hall', 'cond-mat.str-el', 'math-ph', 'math.MP'],
                'value': '1308.0815'
            }
        ],
        'control_number': 1000,
        'document_type': ['article'],
        'titles': [
            {
                'source': 'arXiv',
                'title': 'Solving a two-electron quantum dot model in terms of polynomial solutions of a Biconfluent Heun equation'
            }
        ],
    }

    TestRecordMetadata.create_from_kwargs(
        json=cited_record_json, index='records-hep', pid_type='lit')
    citing_record, categories = insert_citing_record()

    extra_config = {
        "BEARD_API_URL": "http://example.com/beard",
        "MAGPIE_API_URL": "http://example.com/magpie",
        'ARXIV_CATEGORIES': categories,
    }

    schema = load_schema('hep')
    subschema = schema['properties']['acquisition_source']

    assert validate(citing_record['acquisition_source'], subschema) is None

    with mock.patch.dict(workflow_app.config, extra_config):
        workflow_id = build_workflow(citing_record).id
        citing_doc_workflow_uuid = start('article', object_id=workflow_id)

    citing_doc_eng = WorkflowEngine.from_uuid(citing_doc_workflow_uuid)
    citing_doc_obj = citing_doc_eng.processed_objects[0]

    assert citing_doc_obj.data['references'][7]['record']['$ref'] == 'http://localhost:5000/api/literature/1000'
    assert citing_doc_obj.data['references'][0]['raw_refs'][0]['source'] == 'arXiv'
コード例 #42
0
def test_start_wf_with_no_source_data_fails(workflow_app):
    record = generate_record()

    obj = build_workflow(record)
    del obj.extra_data["source_data"]
    obj.save()
    db.session.commit()

    with pytest.raises(ValueError):
        start("article", object_id=obj.id)
コード例 #43
0
def test_update_record_goes_through_api_version_of_store_record_without_issue(
    mocked_is_pdf_link,
    mocked_download_arxiv,
    mocked_api_request_beard,
    mocked_api_request_magpie,
    workflow_app,
    mocked_external_services,
    record_from_db,
):
    record = record_from_db
    workflow_id = build_workflow(record).id
    expected_control_number = record['control_number']
    expected_head_uuid = str(record.id)
    with mock.patch.dict(
        workflow_app.config, {
            "FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT": True,
            "INSPIREHEP_URL": "http://web:8000"
        }
    ):
        with requests_mock.Mocker(real_http=True) as requests_mocker:
            requests_mocker.register_uri(
                'PUT', '{url}/literature/{cn}'.format(
                    url=workflow_app.config.get("INSPIREHEP_URL"),
                    cn=expected_control_number,
                ),
                headers={'content-type': 'application/json'},
                status_code=200,
                json={
                    'metadata': {
                        'control_number': expected_control_number,
                    },
                    'id_': expected_head_uuid
                }
            )
            eng_uuid = start("article", object_id=workflow_id)
            url_paths = [r.path for r in requests_mocker.request_history]
            url_hostnames = [r.hostname for r in requests_mocker.request_history]

            assert 'web' in url_hostnames
            assert "/literature/{cn}".format(cn=expected_control_number) in url_paths

    obj_id = WorkflowEngine.from_uuid(eng_uuid).objects[0].id
    obj = workflow_object_class.get(obj_id)

    assert obj.data['control_number'] == expected_control_number

    assert obj.extra_data["holdingpen_matches"] == []
    assert obj.extra_data["previously_rejected"] is False
    assert not obj.extra_data.get("stopped-matched-holdingpen-wf")
    assert obj.extra_data["is-update"]
    assert obj.extra_data["exact-matched"]
    assert obj.extra_data["matches"]["exact"] == [record.get("control_number")]
    assert obj.extra_data["matches"]["approved"] == record.get("control_number")
    assert obj.extra_data["approved"]
    assert obj.status == ObjectStatus.COMPLETED
コード例 #44
0
def test_match_wf_in_error_goes_in_initial_state(workflow_app):
    record = generate_record()

    obj = workflow_object_class.create(data=record, data_type="hep")
    obj.status = ObjectStatus.INITIAL
    obj.save()
    es.indices.refresh("holdingpen-hep")

    with pytest.raises(WorkflowsError):
        workflow_id = build_workflow(record).id
        start("article", object_id=workflow_id)
コード例 #45
0
def get_halted_workflow(mocked_is_pdf_link, app, record, extra_config=None):
    mocked_is_pdf_link.return_value = True

    extra_config = extra_config or {}
    with mock.patch.dict(app.config, extra_config):
        workflow_id = build_workflow(record).id
        workflow_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(workflow_uuid)
    obj = eng.processed_objects[0]

    assert obj.status == ObjectStatus.HALTED
    assert obj.data_type == "hep"

    # Files should have been attached (tarball + pdf, and plots)
    assert obj.files["1407.7587.pdf"]
    assert obj.files["1407.7587.tar.gz"]

    assert len(obj.files) > 2

    # A publication note should have been extracted
    pub_info = obj.data.get("publication_info")
    assert pub_info
    assert pub_info[0]
    assert pub_info[0].get("year") == 2014
    assert pub_info[0].get("journal_title") == "J. Math. Phys."

    # A prediction should have been made
    prediction = obj.extra_data.get("relevance_prediction")
    assert prediction
    assert prediction["decision"] == "Non-CORE"
    assert prediction["scores"]["Non-CORE"] == 0.8358207729691823

    expected_experiment_prediction = {
        "experiments": [{"label": "CMS", "score": 0.75495152473449707}]
    }
    experiments_prediction = obj.extra_data.get("experiments_prediction")
    assert experiments_prediction == expected_experiment_prediction

    keywords_prediction = obj.extra_data.get("keywords_prediction")
    assert keywords_prediction
    assert {
        "label": "galaxy",
        "score": 0.29424679279327393,
        "accept": True,
    } in keywords_prediction["keywords"]

    # This record should not have been touched yet
    assert obj.extra_data["approved"] is None

    return workflow_uuid, eng, obj
コード例 #46
0
def test_match_in_holdingpen_previously_rejected_wf_stop(
    mocked_api_request_magpie,
    mocked_api_request_beard,
    mocked_package_download,
    mocked_is_pdf_link,
    mocked_download_arxiv,
    workflow_app,
    mocked_external_services,
):
    record = generate_record()

    record_workflow = build_workflow(record).id
    eng_uuid = start("article", object_id=record_workflow)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj_id = eng.objects[0].id
    obj = workflow_object_class.get(obj_id)
    obj.extra_data["approved"] = False  # reject record
    obj.continue_workflow()
    obj = workflow_object_class.get(obj_id)
    assert obj.status == ObjectStatus.COMPLETED
    assert obj.extra_data.get("approved") is False

    es.indices.refresh("holdingpen-hep")

    record["titles"][0][
        "title"
    ] = "This is an update that will match the wf in the holdingpen"
    # this workflow matches in the holdingpen and stops because the
    # matched one was rejected
    workflow_id = build_workflow(record).id
    eng_uuid = start("article", object_id=workflow_id)
    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj2 = eng.objects[0]

    assert obj2.extra_data["previously_rejected"] is True
    assert obj2.extra_data["previously_rejected_matches"] == [obj_id]
コード例 #47
0
def test_article_workflow_continues_when_record_is_valid(workflow_app):
    valid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(valid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR
    assert "_error_msg" not in obj.extra_data
コード例 #48
0
def test_authors_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {
        'name': {
            'preferred_name': 'John Smith',
            'value': 'Smith, John'
        }
    }

    obj_id = build_workflow(invalid_record, data_type='authors').id

    with pytest.raises(ValidationError):
        start('author', object_id=obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.status == ObjectStatus.ERROR
    assert '_error_msg' in obj.extra_data
    assert 'required' in obj.extra_data['_error_msg']
コード例 #49
0
def test_merge_without_conflicts_callback_url(
        mocked_api_request_magpie,
        mocked_beard_api,
        workflow_app,
        mocked_external_services,
        disable_file_upload,
        enable_merge_on_update,
):
    with patch('inspire_json_merger.config.ArxivOnArxivOperations.conflict_filters', ['acquisition_source.source']):
        factory = TestRecordMetadata.create_from_file(
            __name__, 'merge_record_arxiv.json', index_name='records-hep')

        update_workflow_id = build_workflow(RECORD_WITHOUT_CONFLICTS).id

        eng_uuid = start('article', object_id=update_workflow_id)

        eng = WorkflowEngine.from_uuid(eng_uuid)
        obj = eng.objects[0]

        conflicts = obj.extra_data.get('conflicts')

        url = 'http://localhost:5000/callback/workflows/resolve_merge_conflicts'

        assert obj.status == ObjectStatus.COMPLETED
        assert conflicts is None
        assert obj.extra_data.get('is-update') is True

        updated_root = read_wf_record_source(factory.record_metadata.id, 'arxiv')
        assert updated_root.json == RECORD_WITHOUT_CONFLICTS

        payload = {
            'id': obj.id,
            'metadata': obj.data,
            '_extra_data': obj.extra_data
        }

        with workflow_app.test_client() as client:
            response = client.put(
                url,
                data=json.dumps(payload),
                content_type='application/json',
            )

        assert response.status_code == 400
コード例 #50
0
def test_authors_workflow_continues_when_record_is_valid(workflow_app, mocked_external_services):
    valid_record = {
        '_collections': ['Authors'],
        'name': {
            'preferred_name': 'John Smith',
            'value': 'Smith, John'
        }
    }

    workflow_id = build_workflow(valid_record, data_type='authors', id_user=1).id

    obj = workflow_object_class.get(workflow_id)

    start('author', object_id=obj.id)

    obj = workflow_object_class.get(obj.id)

    assert obj.status == ObjectStatus.HALTED
    assert '_error_msg' not in obj.extra_data
コード例 #51
0
def test_workflow_restarts_twice_if_working_with_stale_data(
    mocked__is_stale_data,
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
):
    factory = TestRecordMetadata.create_from_file(
        __name__, 'merge_record_arxiv.json', index_name='records-hep'
    )

    obj_id = build_workflow(factory.record_metadata.json).id
    start('article', object_id=obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.extra_data['head_version_id'] == 1
    assert obj.extra_data['is-update']
    assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 2
    assert obj.status == ObjectStatus.COMPLETED
コード例 #52
0
def test_article_workflow_stops_when_record_is_not_valid(workflow_app):
    invalid_record = {"document_type": ["article"], "titles": [{"title": "A title"}]}

    workflow_id = build_workflow(invalid_record).id

    with pytest.raises(ValidationError):
        start("article", object_id=workflow_id)

    obj = workflow_object_class.get(workflow_id)

    assert obj.status == ObjectStatus.ERROR
    assert "_error_msg" in obj.extra_data
    assert "required" in obj.extra_data["_error_msg"]

    expected_url = "http://localhost:5000/callback/workflows/resolve_validation_errors"

    assert expected_url == obj.extra_data["callback_url"]
    assert obj.extra_data["validation_errors"]
    assert "message" in obj.extra_data["validation_errors"][0]
    assert "path" in obj.extra_data["validation_errors"][0]
コード例 #53
0
def test_workflow_restarts_goes_in_error_after_three_restarts(
    mocked__is_stale_data,
    mocked_api_request_magpie,
    mocked_beard_api,
    workflow_app,
    mocked_external_services,
):
    factory = TestRecordMetadata.create_from_file(
        __name__, 'merge_record_arxiv.json', index_name='records-hep'
    )

    obj_id = build_workflow(factory.record_metadata.json).id

    with pytest.raises(WorkflowsError):
        start('article', object_id=obj_id)

    obj = workflow_object_class.get(obj_id)

    assert obj.extra_data['source_data']['persistent_data']['marks']['restart-count'] == 3
    assert 'Workflow restarted too many times' in obj.extra_data['_error_msg']
    assert obj.status == ObjectStatus.ERROR
コード例 #54
0
def test_workflow_without_validation_error(
    fake_validation,
    mocked_match,
    mocked_magpie_json_api_request,
    mocked_beard_json_api_request,
    workflow_app,
    mocked_external_services,
):
    record_without_validation_error = {
        "$schema": "https://labs.inspirehep.net/schemas/records/hep.json",
        "titles": [{"title": "Update without conflicts title."}],
        "arxiv_eprints": [{"categories": ["hep-lat", "hep-th"], "value": "1703.04802"}],
        "document_type": ["article"],
        "_collections": ["Literature"],
        "acquisition_source": {"source": "arXiv"},
    }
    workflow = build_workflow(record_without_validation_error)
    start("article", object_id=workflow.id)

    assert fake_validation.call_count == 2
    assert workflow.status == ObjectStatus.WAITING
コード例 #55
0
def test_validation_error_callback_with_a_valid(workflow_app):
    valid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(valid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    assert obj.status != ObjectStatus.ERROR

    response = do_validation_callback(workflow_app, obj.id, obj.data, obj.extra_data)

    expected_error_code = "WORKFLOW_NOT_IN_ERROR_STATE"
    data = json.loads(response.get_data())

    assert response.status_code == 400
    assert expected_error_code == data["error_code"]
コード例 #56
0
def test_validation_error_callback_with_missing_worfklow(workflow_app):
    invalid_record = {
        "_collections": ["Literature"],
        "document_type": ["article"],
        "titles": [{"title": "A title"}],
    }

    workflow_id = build_workflow(invalid_record).id
    eng_uuid = start("article", object_id=workflow_id)

    eng = WorkflowEngine.from_uuid(eng_uuid)
    obj = eng.objects[0]

    response = do_validation_callback(workflow_app, 1111, obj.data, obj.extra_data)

    data = json.loads(response.get_data())
    expected_message = 'The workflow with id "1111" was not found.'
    expected_error_code = "WORKFLOW_NOT_FOUND"

    assert response.status_code == 404
    assert expected_error_code == data["error_code"]
    assert expected_message == data["message"]